一、窗口函數核心概念
??本質??:對一組與當前行相關聯的行執行計算,??不改變原表行數??
??與聚合函數的區別??:
SELECT department, AVG(salary) -- 普通聚合:每個部門一行
FROM employees
GROUP BY department;SELECT name, salary, AVG(salary) OVER(PARTITION BY department) -- 窗口函數:保留所有行
FROM employees;
二、窗口函數完整語法結構
函數名([參數]) OVER ([PARTITION BY 分組字段][ORDER BY 排序字段 [ASC|DESC]][frame_clause]
)
1. 核心子句詳解
三、常用窗口函數分類
1. 排序函數
SELECT product_id,sales,ROW_NUMBER() OVER(ORDER BY sales DESC) AS rank1, -- 唯一連續序號RANK() OVER(ORDER BY sales DESC) AS rank2, -- 允許并列跳號DENSE_RANK() OVER(ORDER BY sales DESC) AS rank3 -- 允許并列不跳號
FROM sales_data;
2. 聚合窗口函數
SELECT date,revenue,SUM(revenue) OVER(ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS 3d_avg,AVG(revenue) OVER(PARTITION BY YEAR(date)) AS year_avg
FROM daily_sales;
3. 分布分析函數
SELECT student_id,score,PERCENT_RANK() OVER(ORDER BY score) AS percentile, -- 相對百分比排名CUME_DIST() OVER(ORDER BY score) AS cumulative_dist -- 累計分布
FROM exam_results;
4. 偏移函數
SELECT date,temperature,LAG(temperature, 1) OVER(ORDER BY date) AS prev_day_temp, -- 前一行LEAD(temperature, 1) OVER(ORDER BY date) AS next_day_temp, -- 后一行FIRST_VALUE(temperature) OVER(PARTITION BY WEEK(date)) AS week_first_temp
FROM weather;
四、典型應用場景
1. 動態分組TopN
-- 獲取每個部門薪資前三名
WITH ranked AS (SELECT name, department, salary,DENSE_RANK() OVER(PARTITION BY department ORDER BY salary DESC) AS rkFROM employees
)
SELECT * FROM ranked WHERE rk <= 3;
2. 累計計算
-- 計算累計銷售額與移動平均
SELECT order_date,daily_sales,SUM(daily_sales) OVER(ORDER BY order_date) AS cum_sum,AVG(daily_sales) OVER(ORDER BY order_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS 7d_ma
FROM orders;
3. 數據差異分析
-- 對比每月銷售額與上月差異
SELECT month,revenue,LAG(revenue, 1) OVER(ORDER BY month) AS prev_month,revenue - LAG(revenue, 1) OVER(ORDER BY month) AS mom_diff
FROM monthly_sales;
五、性能優化策略
1. 索引設計
-- 為窗口函數涉及的字段建立組合索引
ALTER TABLE sales ADD INDEX idx_dept_time (department, order_date);-- 執行計劃檢查
EXPLAIN
SELECT product_id,SUM(quantity) OVER(PARTITION BY product_id ORDER BY sale_date)
FROM sales;
2. 分區剪枝優化
-- 結合WHERE條件減少處理分區
SELECT *
FROM (SELECT user_id,ROW_NUMBER() OVER(PARTITION BY city ORDER BY reg_date) AS rnFROM usersWHERE city IN ('北京','上海') -- 提前過濾
) t
WHERE rn <= 100;
3. 避免重復計算
-- 使用WINDOW子句復用定義
SELECT AVG(sales) OVER w AS avg_sales,MAX(sales) OVER w AS max_sales
FROM sales_data
WINDOW w AS (PARTITION BY region ORDER BY month);
六、與臨時表結合的高級用法
1. 分階段計算
-- 第一階段:計算基礎窗口
CREATE TEMPORARY TABLE stage1 AS
SELECT user_id,SUM(amount) OVER(PARTITION BY user_id) AS total_amt
FROM transactions;-- 第二階段:二次聚合
SELECT AVG(total_amt) AS avg_amt_per_user
FROM stage1;
2. 遞歸窗口計算
-- 計算員工管理鏈層級
WITH RECURSIVE emp_tree AS (SELECT emp_id, manager_id, 1 AS level,CAST(emp_id AS CHAR(100)) AS pathFROM employeesWHERE manager_id IS NULLUNION ALLSELECT e.emp_id,e.manager_id,et.level + 1,CONCAT(et.path, '->', e.emp_id)FROM employees eJOIN emp_tree et ON e.manager_id = et.emp_id
)
SELECT emp_id,level,path,RANK() OVER(ORDER BY level) AS hierarchy_rank
FROM emp_tree;
七、常見錯誤排查
??錯誤現象?? | ??原因分析?? | ??解決方案?? |
---|
結果排序不符合預期 | 未正確使用ORDER BY | 明確指定排序字段和方向 |
窗口范圍計算錯誤 | ROWS與RANGE混淆使用 | 確認需要物理行偏移還是邏輯值范圍 |
性能急劇下降 | 未加分區條件導致全表掃描 | 添加WHERE條件或分區過濾 |
出現重復排名 | 使用了ROW_NUMBER而非RANK | 根據業務需求選擇正確的排名函數 |
八、版本特性差異
??MySQL版本?? | ??窗口函數支持?? |
---|
5.x | ? 不支持 |
8.0.2+ | ?? 基礎窗口函數 |
8.0.28+ | ?? 新增NTH_VALUE() 、NTILE() 等擴展函數 |
結語:最佳實踐原則
- ??明確窗口范圍??:始終指定
ROWS/RANGE
避免意外結果 - ??慎用無界窗口??:
UNBOUNDED PRECEDING
可能導致性能問題 - ??結合索引優化??:為
PARTITION BY
和ORDER BY
字段建立索引 - ??分階段處理??:對復雜計算使用臨時表拆分步驟
- ??版本驗證??:生產環境確認MySQL版本支持情況
-- 查看窗口函數執行計劃
EXPLAIN FORMAT=JSON
SELECT ... [包含窗口函數的查詢];-- 性能模式監控
SELECT * FROM performance_schema.events_stages_history_long
WHERE EVENT_NAME LIKE '%window%';