pytorch實現門控循環單元 (GRU）

?人工智能例子匯總：AI常見的算法和例子-CSDN博客??

特性	GRU	LSTM
計算效率	更快，參數更少	相對較慢，參數更多
結構復雜度	只有兩個門（更新門和重置門）	三個門（輸入門、遺忘門、輸出門）
處理長時依賴	一般適用于中等長度依賴	更適合處理超長時序依賴
訓練速度	訓練更快，梯度更穩定	訓練較慢，占用更多內存

例子：

import torch
import torch.nn as nn
import torch.optim as optim
import random
import matplotlib.pyplot as plt# 🏁 迷宮環境（5×5）
class MazeEnv:def __init__(self, size=5):self.size = sizeself.state = (0, 0)  # 起點self.goal = (size-1, size-1)  # 終點self.actions = [(0,1), (0,-1), (1,0), (-1,0)]  # 右、左、下、上def reset(self):self.state = (0, 0)  # 重置起點return self.statedef step(self, action):dx, dy = self.actions[action]x, y = self.statenx, ny = max(0, min(self.size-1, x+dx)), max(0, min(self.size-1, y+dy))reward = 1 if (nx, ny) == self.goal else -0.1done = (nx, ny) == self.goalself.state = (nx, ny)return (nx, ny), reward, done# 🤖 GRU 策略網絡
class GRUPolicy(nn.Module):def __init__(self, input_size, hidden_size, output_size):super(GRUPolicy, self).__init__()self.gru = nn.GRU(input_size, hidden_size, batch_first=True)self.fc = nn.Linear(hidden_size, output_size)def forward(self, x, hidden):out, hidden = self.gru(x, hidden)out = self.fc(out[:, -1, :])  # 只取最后時間步return out, hidden# 🎯 訓練參數
env = MazeEnv(size=5)
policy = GRUPolicy(input_size=2, hidden_size=16, output_size=4)
optimizer = optim.Adam(policy.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()# 🎓 訓練
num_episodes = 500
epsilon = 1.0  # 初始的ε值，控制探索的概率
epsilon_min = 0.01  # 最小ε值
epsilon_decay = 0.995  # ε衰減率
best_path = []  # 用于存儲最佳路徑for episode in range(num_episodes):state = env.reset()hidden = torch.zeros(1, 1, 16)  # GRU 初始狀態states, actions, rewards = [], [], []logits_list = []  for _ in range(20):  # 最多 20 步state_tensor = torch.tensor([[state[0], state[1]]], dtype=torch.float32).unsqueeze(0)logits, hidden = policy(state_tensor, hidden)logits_list.append(logits)# ε-greedy 策略if random.random() < epsilon:action = random.choice(range(4))  # 隨機選擇動作else:action = torch.argmax(logits, dim=1).item()  # 選擇最大值對應的動作next_state, reward, done = env.step(action)states.append(state)actions.append(action)rewards.append(reward)if done:print(f"Episode {episode} - Reached Goal!")# 找到最優路徑best_path = states + [next_state]  # 當前 episode 的路徑breakstate = next_state# 計算損失logits = torch.cat(logits_list, dim=0)  # (T, 4)action_tensor = torch.tensor(actions, dtype=torch.long)  # (T,)loss = loss_fn(logits, action_tensor)  optimizer.zero_grad()loss.backward()optimizer.step()# 衰減 εepsilon = max(epsilon_min, epsilon * epsilon_decay)if episode % 100 == 0:print(f"Episode {episode}, Loss: {loss.item():.4f}, Epsilon: {epsilon:.4f}")# 🧐 確保 best_path 已經記錄
if len(best_path) == 0:print("No path found during training.")
else:print(f"Best path: {best_path}")# 🚀 測試路徑（只繪制最佳路徑）
fig, ax = plt.subplots(figsize=(6,6))# 初始化迷宮圖
maze = [[0 for _ in range(5)] for _ in range(5)]  # 5×5 迷宮
ax.imshow(maze, cmap="coolwarm", origin="upper")# 畫網格
ax.set_xticks(range(5))
ax.set_yticks(range(5))
ax.grid(True, color="black", linewidth=0.5)# 畫出最佳路徑（紅色）
for (x, y) in best_path:ax.add_patch(plt.Rectangle((y, x), 1, 1, color="red", alpha=0.8))# 畫起點和終點
ax.text(0, 0, "S", ha="center", va="center", fontsize=14, color="white", fontweight="bold")
ax.text(4, 4, "G", ha="center", va="center", fontsize=14, color="white", fontweight="bold")plt.title("GRU RL Agent - Best Path")
plt.show()

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/news/894641.shtml
繁體地址，請注明出處：http://hk.pswp.cn/news/894641.shtml
英文地址，請注明出處：http://en.pswp.cn/news/894641.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！