MySQL 分組函數全面詳解與最佳實踐
MySQL 分組函數(聚合函數)的核心知識、注意事項和高級應用技巧:
📊 分組函數核心列表
函數 | 描述 | 示例 |
---|---|---|
COUNT() | 計算行數 | COUNT(*) |
SUM() | 計算數值總和 | SUM(salary) |
AVG() | 計算平均值 | AVG(score) |
MAX() | 獲取最大值 | MAX(price) |
MIN() | 獲取最小值 | MIN(price) |
GROUP_CONCAT() | 連接分組字符串 | GROUP_CONCAT(name) |
STDDEV() | 計算標準差 | STDDEV(price) |
VAR_POP() | 計算總體方差 | VAR_POP(sales) |
?? 分組函數十大注意事項
1. NULL 值處理
SELECT COUNT(*), -- 所有行數(包含NULL)COUNT(bonus), -- 非NULL行數AVG(COALESCE(bonus, 0)) -- NULL轉為0計算
FROM employees;
2. 分組字段選擇
-- 錯誤:非分組字段出現在SELECT
SELECT department, name, AVG(salary)
FROM employees; -- 報錯或未定義行為-- 正確:所有非聚合字段必須出現在GROUP BY
SELECT department, name, AVG(salary)
FROM employees
GROUP BY department, name;
3. WHERE vs HAVING
-- WHERE:分組前過濾行
SELECT department, AVG(salary)
FROM employees
WHERE hire_date > '2020-01-01' -- 先過濾
GROUP BY department;-- HAVING:分組后過濾組
SELECT department, AVG(salary) avg_sal
FROM employees
GROUP BY department
HAVING avg_sal > 5000; -- 后過濾
4. 性能優化策略
-- 低效:全表掃描
SELECT department, AVG(salary)
FROM employees
GROUP BY department;-- 高效:添加索引
ALTER TABLE employees ADD INDEX idx_dept (department);
5. 隱式排序問題
-- 結果順序不保證
SELECT department, COUNT(*)
FROM employees
GROUP BY department;-- 顯式排序
SELECT department, COUNT(*) AS emp_count
FROM employees
GROUP BY department
ORDER BY emp_count DESC;
6. 分組函數嵌套限制
-- 允許:單層分組函數
SELECT AVG(MAX(salary)) -- ? 錯誤嵌套-- 正確:使用子查詢
SELECT AVG(max_sal)
FROM (SELECT department, MAX(salary) AS max_salFROM employeesGROUP BY department
) dept_max;
7. DISTINCT 用法
-- 統計不重復值
SELECT COUNT(DISTINCT department), -- 不同部門數量COUNT(DISTINCT CASE WHEN salary > 5000 THEN 1 END) -- 高薪人數
FROM employees;
8. 空分組處理
-- 使用 COALESCE 處理空分組
SELECT COALESCE(department, '未分配') AS dept,COUNT(*)
FROM employees
GROUP BY department;
9. 多列分組順序
-- 分組順序影響結果
SELECT YEAR(hire_date) AS hire_year,department,COUNT(*)
FROM employees
GROUP BY hire_year, department; -- 先按年再按部門
10. GROUP_CONCAT 限制
-- 默認截斷長度1024字符
SET SESSION group_concat_max_len = 10000;SELECT department,GROUP_CONCAT(name ORDER BY salary DESC SEPARATOR '|')
FROM employees
GROUP BY department;
🚀 高級分組技巧
1. 多級分組分析
SELECT YEAR(order_date) AS order_year,QUARTER(order_date) AS quarter,product_category,SUM(amount) AS total_sales,COUNT(DISTINCT customer_id) AS customers
FROM orders
GROUP BY order_year, quarter, product_category
WITH ROLLUP; -- 添加小計和總計行
2. 分組百分比計算
SELECT department,COUNT(*) AS emp_count,ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS pct
FROM employees
GROUP BY department;
3. 分組排名
SELECT department,name,salary,RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;
4. 分組比較分析
SELECT department,AVG(salary) AS avg_salary,AVG(salary) - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees
GROUP BY department;
5. 時間序列分組
SELECT DATE_FORMAT(order_date, '%Y-%m') AS month,SUM(amount) AS monthly_sales,LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) AS prev_month
FROM orders
GROUP BY month;
🔧 性能優化指南
1. 索引策略
-- 復合索引優化分組
ALTER TABLE orders ADD INDEX idx_category_date (product_category, order_date);-- 覆蓋索引
EXPLAIN SELECT product_category, COUNT(*)
FROM orders
GROUP BY product_category; -- 使用索引
2. 臨時表優化
-- 增大臨時表內存
SET tmp_table_size = 256*1024*1024; -- 256MB
SET max_heap_table_size = 256*1024*1024;-- 監控臨時表使用
SHOW STATUS LIKE 'Created_tmp%';
3. 分區表優化
-- 按日期分區
CREATE TABLE sales (sale_id INT,sale_date DATE,amount DECIMAL(10,2)
) PARTITION BY RANGE (YEAR(sale_date)) (PARTITION p2020 VALUES LESS THAN (2021),PARTITION p2021 VALUES LESS THAN (2022),PARTITION p2022 VALUES LESS THAN (2023)
);-- 分區分組查詢
SELECT YEAR(sale_date), SUM(amount)
FROM sales
GROUP BY YEAR(sale_date); -- 僅掃描相關分區
4. 物化視圖(MySQL 8.0+)
-- 創建分組結果緩存
CREATE TABLE sales_summary AS
SELECT product_id, YEAR(order_date) AS year, SUM(amount) AS total
FROM orders
GROUP BY product_id, year;-- 定期刷新
REPLACE INTO sales_summary
SELECT product_id, YEAR(order_date), SUM(amount)
FROM orders
WHERE order_date > (SELECT MAX(order_date) FROM sales_summary)
GROUP BY product_id, YEAR(order_date);
💡 最佳實踐總結
1. 分組設計原則
-- 明確分組粒度
SELECT DATE(order_date) AS day, -- 按天HOUR(order_time) AS hour, -- 按小時COUNT(*)
FROM orders
GROUP BY day, hour;
2. 安全處理大數據集
-- 分頁處理大結果集
SELECT department, AVG(salary)
FROM employees
GROUP BY department
LIMIT 10 OFFSET 20; -- 第三頁
3. 結果驗證技巧
-- 驗證分組總數
SELECT COUNT(DISTINCT department)
FROM employees; -- 應與分組行數一致-- 交叉驗證
SELECT (SELECT COUNT(*) FROM employees) AS total,SUM(emp_count) AS group_total
FROM (SELECT department, COUNT(*) AS emp_countFROM employeesGROUP BY department
) dept_groups;
4. 執行計劃分析
-- 檢查分組性能
EXPLAIN
SELECT department, AVG(salary)
FROM employees
GROUP BY department;-- 關注以下指標:
-- 1. Using temporary (是否使用臨時表)
-- 2. Using filesort (是否文件排序)
-- 3. key (使用的索引)
5. 避免常見陷阱
-- 陷阱1:錯誤處理NULL
SELECT department, AVG(bonus) -- 忽略NULL
FROM employees;-- 陷阱2:混淆WHERE和HAVING
SELECT department, AVG(salary)
FROM employees
WHERE AVG(salary) > 5000; -- 錯誤!WHERE不能使用聚合函數-- 陷阱3:未排序的分頁
SELECT department, COUNT(*)
FROM employees
GROUP BY department
LIMIT 10; -- 結果隨機
🚀 綜合應用案例
銷售分析報告
SELECT c.country,p.category,YEAR(o.order_date) AS order_year,COUNT(DISTINCT o.customer_id) AS customers,COUNT(*) AS orders,SUM(o.amount) AS revenue,AVG(o.amount) AS avg_order_value,GROUP_CONCAT(DISTINCT p.product_name ORDER BY p.product_name SEPARATOR ', ') AS products
FROM orders o
JOIN products p ON o.product_id = p.id
JOIN customers c ON o.customer_id = c.id
WHERE o.order_date BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY c.country, p.category, order_year WITH ROLLUP
HAVING revenue > 10000
ORDER BY country, category, order_year DESC;