DAY 55 序列預測任務介紹
知識點回顧
- 序列預測介紹
- 單步預測
- 多步預測的2種方式
- 序列數據的處理:滑動窗口
- 多輸入多輸出任務的思路
- 經典機器學習在序列任務上的劣勢;以隨機森林為例
作業:手動構造類似的數據集(如cosx數據),觀察不同的機器學習模型的差異
使用lightgbm同樣效果非常差
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import lightgbm as lgb# =============================================================
# ===== 步驟1:數據準備 (與之前完全相同) =====
# =============================================================# 生成合成時間序列
x = np.linspace(0, 100, 1000)
y = np.cos(x) + 0.1 * x + np.random.normal(0, 0.5, 1000)# 定義參數
train_size = int(len(y) * 0.8)
seq_length = 30# 正確的數據標準化
train_data_raw = y[:train_size]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(train_data_raw.reshape(-1, 1))
scaled_y = scaler.transform(y.reshape(-1, 1)).flatten()# 創建時序數據集函數
def create_sequences(data, seq_length):X, y = [], []for i in range(len(data) - seq_length):X.append(data[i:i+seq_length])y.append(data[i+seq_length])return np.array(X), np.array(y)# 對完整數據應用滑動窗口
all_X, all_y = create_sequences(scaled_y, seq_length)# 劃分序列數據集
split_idx = train_size - seq_length
X_train_np = all_X[:split_idx]
y_train_np = all_y[:split_idx]
X_test_np = all_X[split_idx:]
y_test_np = all_y[split_idx:]# =========================================================================
# ===== 步驟2:為LightGBM模型準備數據 =====
# =========================================================================# 調整X的形狀為二維 [樣本數, 特征數]
n_samples_train = X_train_np.shape[0]
n_samples_test = X_test_np.shape[0]X_train_lgb = X_train_np.reshape(n_samples_train, -1)
X_test_lgb = X_test_np.reshape(n_samples_test, -1)print("為LightGBM準備的 X_train 形狀:", X_train_lgb.shape) # (770, 30)
print("為LightGBM準備的 X_test 形狀:", X_test_lgb.shape) # (200, 30)# =============================================================
# ===== 步驟3:創建、訓練和評估LightGBM模型 =====
# =============================================================# 創建LightGBM數據集
#train_data = lgb.Dataset(X_train_lgb, label=y_train_np)
#test_data = lgb.Dataset(X_test_lgb, label=y_test_np, reference=train_data)
lgb_model = lgb.LGBMRegressor(n_estimators=100,num_leaves=31,learning_rate=0.05,feature_fraction=0.9,random_state=42,n_jobs=-1
)# 訓練模型
print("\n開始訓練LightGBM模型...")
lgb_model.fit(X_train_lgb, y_train_np)
print("模型訓練完成!")# 做出預測
train_predict = lgb_model.predict(X_train_lgb)
test_predict = lgb_model.predict(X_test_lgb)# 反標準化預測結果
train_predict = scaler.inverse_transform(train_predict.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict.reshape(-1, 1))# 原始標簽也需要反標準化
y_train_orig = scaler.inverse_transform(y_train_np.reshape(-1, 1))
y_test_orig = scaler.inverse_transform(y_test_np.reshape(-1, 1))# 計算均方根誤差 (RMSE)
train_rmse = np.sqrt(mean_squared_error(y_train_orig, train_predict))
test_rmse = np.sqrt(mean_squared_error(y_test_orig, test_predict))print(f"\n訓練集 RMSE: {train_rmse:.4f}")
print(f"測試集 RMSE: {test_rmse:.4f}")# =============================================================
# ===== 步驟4:可視化結果 =====
# =============================================================plt.figure(figsize=(15, 7))
plt.plot(y, label='原始數據', color='gray', alpha=0.5)# 繪制訓練集的預測結果
train_predict_plot = np.empty_like(y)
train_predict_plot[:] = np.nan
train_predict_plot[seq_length:seq_length+len(train_predict)] = train_predict.flatten()
plt.plot(train_predict_plot, label='訓練集預測值 (LightGBM)', color='blue')# 繪制測試集的預測結果
test_predict_plot = np.empty_like(y)
test_predict_plot[:] = np.nan
test_predict_plot[len(train_predict) + seq_length : len(y)] = test_predict.flatten()
plt.plot(test_predict_plot, label='測試集預測值 (RF)', color='red')plt.title('時間序列預測結果對比 (LightGBM)')
plt.xlabel('時間步')
plt.ylabel('值')
plt.legend()
plt.grid(True)
plt.show()# 特征重要性可視化
lgb.plot_importance(lgb_model, height=0.8, title='特征重要性', importance_type='gain')
plt.show()
?
@浙大疏錦行?