【Python · PyTorch】循環神經網絡 RNN(簡單應用)
- 1. 簡介
- 2. 模擬客流預測(數據集轉化Tensor)
- 3.1 數據集介紹
- 3.2 訓練過程
- 3. 模擬股票預測(DataLoader加載數據集)
- 3.1 IBM 數據集
- 3.1.2 數據集介紹
- 3.1.3 訓練過程
- ① 利用Matplotlib繪圖
- ② 利用Seaborn繪圖
- 3.2 Amazon 數據集
- 3.2.2 數據集介紹
- 3.2.3 訓練結果
1. 簡介
此前介紹了RNN及其變體LSTM、GRU的結構,本章節介紹相關神經網絡的代碼,并通過 模擬數據 展示簡單的應用場景。
RNN核心代碼:
nn.RNN(input_size, hidden_size, num_layers, nonlinearity, bias, batch_first, dropout, bidirectional)
參數介紹:
input_size
:輸入層神經元數量,對應輸入特征的維度hidden_size
:隱藏層神經元數量num_layers
:RNN單元堆疊層數,默認1bias
:是否啟用偏置,默認Truebatch_first
:是否將batch放在第一位,默認FalseTrue
:input 為(batch, seq_len, input_size)
False
:input 為(seq_len, batch, input)
dropout
:丟棄率,值范圍為0~1bidirectional
:是否使用雙向RNN,默認False
調用時參數:
input
:前側輸入h[n-1]
:前側傳遞狀態
調用時返回:
-
output
:本層輸出 -
h[n]
:本層向后側傳遞狀態
GRU與RNN參數相同,但LSTM有所不同:
input/output
:本層輸入/輸出(h[n-1], c[n-1])/(h[n-1], c[n-1])
:傳遞狀態
輸入時
seq_len
表示序列長度/傳遞長度:即 輸入向量維度為(seq_len, batch, input)
- 以自然語言訓練為例,假設句子長度為30個單詞,單詞為50維向量,一次訓練10個句子。
- 則
seq_len=30
、input_size=50
、batch_size=10
,LSTM結構會根據傳入數據 向前傳遞30次 輸出最終結果。
2. 模擬客流預測(數據集轉化Tensor)
3.1 數據集介紹
模擬航班數據,經常用于學習機器學習算法。
3.2 訓練過程
① 導入三方庫
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
定義使用設備
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
② 讀取數據集
從CSV/Excel文件中讀取數據
# 模擬航空數據
pf = pd.read_csv('./data/flight.csv')
month, passengers = pf['Month'], pf['Passengers']
scaler = MinMaxScaler(feature_range=(-1, 1))
passengers = scaler.fit_transform(passengers.values.reshape(-1,1))
定義數據集劃分方法
def split_data(passengers, looback):# 1. 分段passengers = np.array(passengers)segments = []# 創建所有可能的時間序列for index in range(len(passengers) - lookback):segments.append(passengers[index: index + lookback])segments = np.array(segments)# 2. 確定train和test的數量test_set_size = int(np.round(0.2 * segments.shape[0]))train_set_size = segments.shape[0] - (test_set_size)# 3. 分割:訓練集和測試集:x和yx_train = segments[:train_set_size,:-1]y_train = segments[:train_set_size,-1] # 序列最后一個是yx_test = segments[train_set_size:,:-1]y_test = segments[train_set_size:,-1]return x_train, y_train, x_test, y_test
讀取數據集
lookback = 20 # 設置序列長度
x_train, y_train, x_test, y_test = split_data(passengers, lookback)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)
數據轉化為Tensor
x_train = torch.from_numpy(x_train).type(torch.Tensor).to(device)
x_test = torch.from_numpy(x_test).type(torch.Tensor).to(device)
y_train = torch.from_numpy(y_train).type(torch.Tensor).to(device)
y_test = torch.from_numpy(y_test).type(torch.Tensor).to(device)
③ 創建神經網絡
定義 輸入 / 隱藏 / 輸出 維度
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
定義三種神經網絡
class RNN(nn.Module):def __init__(self, input_dim, hidden_dim, num_layers, output_dim):super(RNN, self).__init__()self.rnn = nn.RNN(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, x):# output維度:[num_steps, batch_size, hidden_size]# hn維度 :[num_layers, batch_size, hidden_size]output, hn = self.rnn(x)# num_steps和hidden_size保留,取最后一次batchoutput = output[:,-1,:]# 最后幾步送入全連接output = self.fc(output)return output
class LSTM(nn.Module):def __init__(self, input_dim, hidden_dim, num_layers, output_dim):super(LSTM, self).__init__()self.input_dim = input_dimself.hidden_dim = hidden_dimself.num_layers = num_layersself.output_dim = output_dimself.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, x):# 初始化隱藏狀態和單元狀態h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)output, (hn, cn) = self.lstm(x, (h0, c0))output = output[:,-1,:]output = self.fc(output)return output
class GRU(nn.Module):def __init__(self, input_dim, hidden_dim, num_layers, output_dim):super(GRU, self).__init__()self.input_dim = input_dimself.hidden_dim = hidden_dimself.num_layers = num_layersself.output_dim = output_dimself.gru = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, x):# 初始化隱藏狀態和單元狀態h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)output, hn = self.gru(x, h0)output = output[:,-1,:]output = self.fc(output)return output
④ 訓練神經網絡
預先定義
# 隨機種子
torch.manual_seed(20)
# 創建神經網絡對象
rnn = RNN(input_dim, hidden_dim, num_layers, output_dim)
lstm = LSTM(input_dim, hidden_dim, num_layers, output_dim)
gru = GRU(input_dim, hidden_dim, num_layers, output_dim)# 確定神經網絡運行設備
rnn.to(device)
lstm.to(device)
gru.to(device)# 損失函數
rnn_loss_function = nn.MSELoss()
lstm_loss_function = nn.MSELoss()
gru_loss_function = nn.MSELoss()# 優化器
rnn_optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)
lstm_optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)
gru_optimizer = torch.optim.Adam(gru.parameters(), lr=0.001)# 訓練輪次
epochs = 200
# 訓練損失記錄
rnn_final_losses = []
lstm_final_losses = []
gru_final_losses = []
定義神經網絡訓練方法
def train_rnn():rnn.train()for epoch in range(epochs):# 1. 正向傳播y_train_pred_rnn = rnn(x_train)# 2. 計算誤差rnn_loss = rnn_loss_function(y_train_pred_rnn, y_train)rnn_final_losses.append(rnn_loss.item())# 3. 反向傳播rnn_optimizer.zero_grad()rnn_loss.backward()# 4. 優化參數rnn_optimizer.step()if epoch % 10 == 0:print("RNN:: Epoch: {}, Loss: {} ".format(epoch, rnn_loss.data))return y_train_pred_rnn
def train_lstm():lstm.train()for epoch in range(epochs):# 1. 正向傳播y_train_pred_lstm = lstm(x_train)# 2. 計算誤差lstm_loss = lstm_loss_function(y_train_pred_lstm, y_train)lstm_final_losses.append(lstm_loss.item())# 3. 反向傳播lstm_optimizer.zero_grad()lstm_loss.backward()# 4. 優化參數lstm_optimizer.step()if epoch % 10 == 0:print("LSTM:: Epoch: {}, Loss: {} ".format(epoch, lstm_loss.data))return y_train_pred_lstm
def train_gru():gru.train()for epoch in range(epochs):# 1. 正向傳播y_train_pred_gru = gru(x_train)# 2. 計算誤差gru_loss = gru_loss_function(y_train_pred_gru, y_train)gru_final_losses.append(gru_loss.item())# 3. 反向傳播gru_optimizer.zero_grad()gru_loss.backward()# 4. 優化參數gru_optimizer.step()if epoch % 10 == 0:print("GRU:: Epoch: {}, Loss: {} ".format(epoch, gru_loss.data))return y_train_pred_gru
執行訓練方法
y_train_pred_rnn = train_rnn()
torch.save(rnn.state_dict(), "rnn_test.pth")
print("Saved PyTorch Model State to rnn_test.pth")y_train_pred_lstm = train_lstm()
torch.save(lstm.state_dict(), "lstm_test.pth")
print("Saved PyTorch Model State to lstm_test.pth")y_train_pred_gru = train_gru()
torch.save(gru.state_dict(), "gru_test.pth")
print("Saved PyTorch Model State to gru_test.pth")
繪制訓練結果(最后一次)
數據逆歸一化 并轉換為DataFrame
original = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_train).detach().numpy()))rnn_predict = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_train_pred_rnn).detach().numpy()))
lstm_predict = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_train_pred_lstm).detach().numpy()))
gru_predict = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_train_pred_gru).detach().numpy()))
執行繪制程序
import seaborn as sns
sns.set_style("darkgrid") fig = plt.figure(figsize=(16, 6))# 畫左邊的趨勢圖
plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Data", color='blue')
ax = sns.lineplot(x = rnn_predict.index, y = rnn_predict[0], label="RNN Prediction", color='red')
ax = sns.lineplot(x = lstm_predict.index, y = lstm_predict[0], label="LSTM Prediction", color='darkred')
ax = sns.lineplot(x = gru_predict.index, y = gru_predict[0], label="GRU Prediction", color='black')ax.set_title('Passengers', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Members", size = 14)
ax.set_xticklabels('', size=10)# 畫右邊的Loss下降圖
plt.subplot(1, 2, 2)
ax = sns.lineplot(data=rnn_final_losses, label="RNN Loss", color='red')
ax = sns.lineplot(data=lstm_final_losses, label="LSTM Loss", color='darkblue')
ax = sns.lineplot(data=gru_final_losses, label="GRU Loss", color='black')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
plt.show()
⑤ 測試神經網絡
定義測試函數
# 測試 RNN
def test_rnn():rnn.eval()total = len(x_test)currect = 0with torch.no_grad():y_test_pred_rnn = rnn(x_test)return y_test_pred_rnn# 測試 LSTM
def test_lstm():lstm.eval()total = len(x_test)currect = 0with torch.no_grad():y_test_pred_lstm = lstm(x_test)return y_test_pred_lstm# 測試 RNN
def test_gru():gru.eval()total = len(x_test)currect = 0with torch.no_grad():y_test_pred_gru = gru(x_test)return y_test_pred_gru
執行測試程序
y_test_pred_rnn = test_rnn()
y_test_pred_lstm = test_lstm()
y_test_pred_gru = test_gru()
數據逆歸一化 并轉換為DataFrame
test_original = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_test).detach().numpy()))test_rnn_predict = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_test_pred_rnn).detach().numpy()))
test_lstm_predict = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_test_pred_lstm).detach().numpy()))
test_gru_predict = pd.DataFrame(scaler.inverse_transform(torch.Tensor.cpu(y_test_pred_gru).detach().numpy()))
執行繪制程序
import seaborn as sns
sns.set_style("darkgrid") ax = sns.lineplot(x = test_original.index, y = test_original[0], label="Data", color='blue')ax = sns.lineplot(x = test_rnn_predict.index, y = test_rnn_predict[0], label="RNN Prediction", color='red')
ax = sns.lineplot(x = test_lstm_predict.index, y = test_lstm_predict[0], label="LSTM Prediction", color='darkred')
ax = sns.lineplot(x = test_gru_predict.index, y = test_gru_predict[0], label="GRU Prediction", color='black')ax.set_title('Passengers', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Members", size = 14)
ax.set_xticklabels('', size=10)plt.show()
3. 模擬股票預測(DataLoader加載數據集)
3.1 IBM 數據集
3.1.2 數據集介紹
IBM股價數據,經常用于學習機器學習算法。
3.1.3 訓練過程
① 導入三方庫
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
定義使用設備
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
② 讀取數據集
PyTorch提供了一種標準的數據集讀取方法:
- 自定義繼承自
torch.utils.data.Dataset
類的XXDataset
類,并重寫__getitem__()
和__len__()
方法 - 利用
torch.utils.data.DataLoader
加載自定義DataSet
實例中讀取的數據
例如,官方提供的CIFAR10
數據集 就是 使用這種方法讀取:
import torchvision.datasets
from torch.utils.data import DataLoadertrain_data=torchvision.datasets.CIFAR10(root="datasets",train=False,transform=torchvision.transforms.ToTensor(),download=True)
train_loader=DataLoader(dataset=train_data,batch_size=4,shuffle=True)
DataLoader類初始化參數:
其中常用的參數包括:
- dataset:Dataset類
- batch_size:批量大小
- shuffle:是否每輪打亂數據
- num_workers:讀取數據時工作線程數(默認0:代表只使用主進程)
- drop_last:是否丟棄所余非完整batch數據(數據長度不能整除batch_size,是否丟棄最后不完整的batch)
- sampler:從數據集中抽取樣本的策略
- batch_sampler:類似于sampler,但每次返回一個
batch
的索引。與batch_size、shuffle、sampler和drop_last互斥
部分參數簡介:
num_worker 工作方式
DataLoader
一次創建num_worker
數量個名為worker
工作進程。并用batch_sampler
將batch
分配給worker
,由worker
將batch
加載進RAM/內存
。DataLoader
迭代時會從RAM/內存
中檢索并獲取batch
;若檢索失敗,則用num_worker
個worker
繼續加載batch
至RAM/內存
,DataLoader
再嘗試從中獲取batch
。
sampler / batch_sampler 采樣方式
- Random Sampler(隨機采樣)
- 隨機從數據集中選擇樣本,可設置隨機數種子,保證采樣結果相同
- Subset Random Sampler(子集隨機采樣)
- 從數據集指定子集隨機采集樣本,可用于數據集劃分(訓練集、驗證集等)
- Weighted Random Sampler(加權隨機采樣)
- 根據指定的樣本權重隨機采樣,可用于處理類別不平衡問題
- BatchSample(批采樣)
- 將樣本索引分為多個batch,每個batch包含指定數量樣本索引
有時設置 num_worker
會不同步致使程序卡頓,這里博主將其設置為 num_worker=0
避免卡頓。
自定義股價Dataset類
# 定義StockDataset類 繼承Dataset類 重寫__getitem()__和__len__()方法
class StockDataset(torch.utils.data.Dataset):# 初始化函數 得到數據def __init__(self, data, seq_length):self.data = dataself.seq_length = seq_length# Index是根據 batch_size 劃分數據后得到的索引,最后將data和對應的labels一并返回def __getitem__(self, idx):x = self.data[idx:idx + self.seq_length] # 輸入序列y = self.data[idx + self.seq_length] # 輸出值return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)# 該函數返回數據長度大小,DataLoader會調用此方法def __len__(self):return len(self.data) - self.seq_length
利用DataLoader讀取Dataset數據
train_length = 2000 # 訓練長度
seq_length = 20 # 序列長度
batch_size = 32 # 批量大小# 利用DataLoader讀取Dataset數據
train_dataset = StockDataset(origin_data[:train_length], seq_length)
test_dataset = StockDataset(origin_data[train_length:], seq_length)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle=True, num_workers = 0)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle=True, num_workers = 0)
③ 創建神經網絡
class RNN(nn.Module):def __init__(self, input_dim, hidden_dim, num_layers, output_dim):super(RNN, self).__init__()self.rnn = nn.RNN(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, x):output, hn = self.rnn(x)output = output[:,-1,:]output = self.fc(output)return output
④ 訓練神經網絡
預先定義
# 隨機種子
torch.manual_seed(20)
# 創建神經網絡對象
rnn = RNN(input_dim, hidden_dim, num_layers, output_dim)
# 確定神經網絡運行設備
rnn.to(device)
# 損失函數
rnn_loss_function = nn.MSELoss()
# 優化器
rnn_optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)
# 訓練輪次
epochs = 50
# 訓練損失記錄
rnn_final_losses = []
定義訓練函數
def train_rnn():min_loss = 1.0for epoch in range(epochs):rnn.train()rnn_loss = 0.0for x_train, y_train in train_dataloader:x_train = x_train.to(device=device)y_train = y_train.to(device=device)# 1. 正向傳播y_train_pred_rnn = rnn(x_train)# 2. 計算誤差loss_func_result = rnn_loss_function(y_train_pred_rnn, y_train)rnn_loss += loss_func_result.item()# 3. 反向傳播rnn_optimizer.zero_grad()loss_func_result.backward()# 4. 優化參數rnn_optimizer.step()rnn_loss = rnn_loss / len(train_dataloader)rnn_final_losses.append(rnn_loss)# 對比if(rnn_loss < min_loss):min_loss = rnn_losstorch.save(rnn.state_dict(), "rnn_test.pth")print("Saved PyTorch Model State to rnn_test.pth")if epoch % 10 == 0:print("RNN:: Epoch: {}, Loss: {} ".format(epoch + 1, rnn_loss))
執行訓練函數
train_rnn()
⑤ 測試神經網絡
# 使用訓練好的模型進行預測
rnn.load_state_dict(torch.load("rnn_test.pth"))
rnn.eval()
with torch.no_grad():# 準備所有輸入序列X_train = torch.stack([x for x, y in train_dataset])train_predictions = rnn(X_train.to(device)).squeeze().cpu().detach().numpy()with torch.no_grad():# 準備所有輸入序列X_test = torch.stack([x for x, y in test_dataset])test_predictions = rnn(X_test.to(device)).squeeze().cpu().detach().numpy()# 將預測結果逆歸一化
origin_data = scaler.inverse_transform(origin_data.reshape(-1, 1))
train_predictions = scaler.inverse_transform(train_predictions.reshape(-1, 1))
test_predictions = scaler.inverse_transform(test_predictions.reshape(-1, 1))
① 利用Matplotlib繪圖
執行繪圖程序
# 繪制結果
plt.figure(figsize=(12, 6))
plt.plot(origin_data, label='Original Data')
plt.plot(range(seq_length,seq_length+len(train_predictions)),train_predictions, label='RNN Train Predictions', linestyle='--')
plt.plot(range(seq_length+train_length,len(test_predictions)+seq_length+train_length), test_predictions, label='RNN Test Predictions', linestyle='--')plt.legend()
plt.title("Training Set Predictions")
plt.xlabel("Time Step")
plt.ylabel("Value")
plt.show()
② 利用Seaborn繪圖
將數據轉換為DataFrame
original = pd.DataFrame(origin_data)
df_train_predictions = pd.DataFrame(train_predictions)
df_test_predictions = pd.DataFrame(test_predictions)
執行繪圖程序
import seaborn as sns
sns.set_style("darkgrid") fig = plt.figure(figsize=(16, 6))
fig.subplots_adjust(hspace=0.2, wspace=0.2)# 畫左邊的趨勢圖
plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Original Data", color='blue')
ax = sns.lineplot(x = df_train_predictions.index + seq_length, y = df_train_predictions[0], label="RNN Train Prediction", color='red')
ax = sns.lineplot(x = df_test_predictions.index + seq_length + train_length, y = df_test_predictions[0], label="RNN Test Prediction", color='darkred')ax.set_title('Stock', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Value", size = 14)
ax.set_xticklabels('', size=10)# 畫右邊的Loss下降圖
plt.subplot(1, 2, 2)
ax = sns.lineplot(data=rnn_final_losses, label="RNN Loss", color='red')
ax = sns.lineplot(data=lstm_final_losses, label="LSTM Loss", color='darkblue')
ax = sns.lineplot(data=gru_final_losses, label="GRU Loss", color='black')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
plt.show()
3.2 Amazon 數據集
3.2.2 數據集介紹
Amazon股價數據,經常用于學習機器學習算法。
3.2.3 訓練結果
代碼與IBM數據集類似,這里直接展示運行結果。
訓練過程
Matplotlib繪圖
Seaborn繪圖