- 🍨 本文為🔗365天深度學習訓練營 中的學習記錄博客
- 🍖 原作者:K同學啊
目標
- 實現CIFAR-10的彩色圖片識別
- 實現比P1周更復雜一點的CNN網絡
具體實現
(一)環境
語言環境:Python 3.10
編 譯 器: PyCharm
框 架: Pytorch 2.5.1
(二)具體步驟
1.
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision # 第一步:設置GPU
def USE_GPU(): if torch.cuda.is_available(): print('CUDA is available, will use GPU') device = torch.device("cuda") else: print('CUDA is not available. Will use CPU') device = torch.device("cpu") return device device = USE_GPU()
輸出:CUDA is available, will use GPU
# 第二步:導入數據。同樣的CIFAR-10也是torch內置了,可以自動下載
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=torchvision.transforms.ToTensor()) batch_size = 32
train_dataload = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataload = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size) # 取一個批次查看數據格式
# 數據的shape為:[batch_size, channel, height, weight]
# 其中batch_size為自己設定,channel,height和weight分別是圖片的通道數,高度和寬度。
imgs, labels = next(iter(train_dataload))
print(imgs.shape) # 查看一下圖片
import numpy as np
plt.figure(figsize=(20, 5))
for i, images in enumerate(imgs[:20]): # 使用numpy的transpose將張量(C,H, W)轉換成(H, W, C),便于可視化處理 npimg = imgs.numpy().transpose((1, 2, 0)) # 將整個figure分成2行10列,并繪制第i+1個子圖 plt.subplot(2, 10, i+1) plt.imshow(npimg, cmap=plt.cm.binary) plt.axis('off')
plt.show()
輸出:
Files already downloaded and verified
Files already downloaded and verified
torch.Size([32, 3, 32, 32])
# 第三步,構建CNN網絡
import torch.nn.functional as F num_classes = 10 # 因為CIFAR-10是10種類型
class Model(nn.Module): def __init__(self): super(Model, self).__init__() # 提取特征網絡 self.conv1 = nn.Conv2d(3, 64, 3) self.pool1 = nn.MaxPool2d(kernel_size=2) self.conv2 = nn.Conv2d(64, 64, 3) self.pool2 = nn.MaxPool2d(kernel_size=2) self.conv3 = nn.Conv2d(64, 128, 3) self.pool3 = nn.MaxPool2d(kernel_size=2) # 分類網絡 self.fc1 = nn.Linear(512, 256) self.fc2 = nn.Linear(256, num_classes) # 前向傳播 def forward(self, x): x = self.pool1(F.relu(self.conv1(x))) x = self.pool2(F.relu(self.conv2(x))) x = self.pool3(F.relu(self.conv3(x))) x = torch.flatten(x, 1) x = F.relu(self.fc1(x)) x = self.fc2(x) return x from torchinfo import summary
# 將模型轉移到GPU中
model = Model().to(device)
summary(model)
# 訓練模型
loss_fn = nn.CrossEntropyLoss() # 創建損失函數
learn_rate = 1e-2 # 設置學習率
opt = torch.optim.SGD(model.parameters(), lr=learn_rate) # 設置優化器 # 編寫訓練函數
def train(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) # 訓練集的大小 ,這里一共是60000張圖片 num_batches = len(dataloader) # 批次大小,這里是1875(60000/32=1875) train_acc, train_loss = 0, 0 # 初始化訓練正確率和損失率都為0 for X, y in dataloader: # 獲取圖片及標簽,X-圖片,y-標簽(也是實際值) X, y = X.to(device), y.to(device) # 計算預測誤差 pred = model(X) # 網絡輸出預測值 loss = loss_fn(pred, y) # 計算網絡輸出的預測值和實際值之間的差距 # 反向傳播 optimizer.zero_grad() # grad屬性歸零 loss.backward() # 反向傳播 optimizer.step() # 第一步自動更新 # 記錄正確率和損失率 train_acc += (pred.argmax(1) == y).type(torch.float).sum().item() train_loss += loss.item() train_acc /= size train_loss /= num_batches return train_acc, train_loss # 測試函數
def test(dataloader, model, loss_fn): size = len(dataloader.dataset) # 測試集大小,這里一共是10000張圖片 num_batches = len(dataloader) # 批次大小 ,這里312,即10000/32=312.5,向上取整 test_acc, test_loss = 0, 0 # 因為是測試,因此不用訓練,梯度也不用計算不用更新 with torch.no_grad(): for imgs, target in dataloader: imgs, target = imgs.to(device), target.to(device) # 計算loss target_pred = model(imgs) loss = loss_fn(target_pred, target) test_loss += loss.item() test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item() test_acc /= size test_loss /= num_batches return test_acc, test_loss # 正式訓練
epochs = 10
train_acc, train_loss, test_acc, test_loss = [], [], [], [] for epoch in range(epochs): model.train() epoch_train_acc, epoch_train_loss = train(train_dataload, model, loss_fn, opt) model.eval() epoch_test_acc, epoch_test_loss = test(test_dataload, model, loss_fn) train_acc.append(epoch_train_acc) train_loss.append(epoch_train_loss) test_acc.append(epoch_test_acc) test_loss.append(epoch_test_loss) template = 'Epoch:{:2d}, 訓練正確率:{:.1f}%, 訓練損失率:{:.3f}, 測試正確率:{:.1f}%, 測試損失率:{:.3f}' print(template.format(epoch+1, epoch_train_acc * 100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss)) print('Done') # 結果可視化
# 隱藏警告
import warnings
warnings.filterwarnings('ignore') # 忽略警告信息
plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常顯示中文標簽
plt.rcParams['axes.unicode_minus'] = False # 正常顯示+/-號
plt.rcParams['figure.dpi'] = 100 # 分辨率 epochs_range = range(epochs) plt.figure(figsize=(12, 3)) plt.subplot(1, 2, 1) # 第一張子圖
plt.plot(epochs_range, train_acc, label='訓練正確率')
plt.plot(epochs_range, test_acc, label='測試正確率')
plt.legend(loc='lower right')
plt.title('訓練和測試正確率比較') plt.subplot(1, 2, 2) # 第二張子圖
plt.plot(epochs_range, train_loss, label='訓練損失率')
plt.plot(epochs_range, test_loss, label='測試損失率')
plt.legend(loc='upper right')
plt.title('訓練和測試損失率比較') plt.show()# 保存模型
torch.save(model, './models/cnn-cifar10.pth')
再次設置epochs為50訓練結果:
epochs增加到100,訓練結果:
可以看到訓練集和測試集的差距有點大,不太理想。做一下數據增加試試:
data_transforms= { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]), 'test': transforms.Compose([ transforms.ToTensor(), ])
}
在dataset中:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms['train'])
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms['test'])
運行結果:
比較漂亮了,再調整batch_size=16和epochs=20,提高了近6個百分點。
batch_size=16,epochs=50:有第20輪左右的時候,驗證集的確認性基本就沒有再提高了。和上面基本一樣。
(三)總結
- epochs并不是越多越好。batch_size同樣的道理
- 數據增強確實可以提高模型訓練的準確性。