文章目錄
- 前言
- 0、數據集準備
- 1、數據集
- 2、dataset
- 3、model
- 4、訓練模型
前言
在pytorch中模型訓練一般分為以下幾個步驟:
0、數據集準備
1、數據集讀取(dataset模塊)
2、數據集轉換為tensor(dataloader模塊)
3、定義模型model(編寫模型代碼,主要是前向傳播)
4、定義損失函數loss
5、定義優化器optimizer
6、最后一步是模型訓練階段train:這一步會,利用循環把dataset->dataloader->model->loss->optimizer合并起來。
相比于普通的函數神經網絡并沒有特別神奇的地方,我們不妨訓練過程看成普通函數參數求解的過程,也就是最優化求解參數。以Alex模型為例,進行分類訓練。
0、數據集準備
分類數據不需要進行標注,只需要給出類別就可以了,對應分割,檢測需要借助labelme或者labelimg進行標注。將數據分為訓練集,驗證集,測試集。訓練集用于模型訓練,驗證集用于訓練過程中檢驗模型訓練參數的表現,測試集是模型訓練完成之后驗證模型的表現。
1、數據集
從這里下載數據集The TU Darmstadt Database (formerly the ETHZ Database)一個三種類型115 motorbikes + 50 x 2 cars + 112 cows = 327張照片,把數據分為訓練train和驗證集val
并對train和val文件夾形成對應的標簽文件,每一行為照片的名稱和對應的類別編號(從0開始):
2、dataset
現在寫一個名為dataset.py文件,寫一個VOCDataset的類,來讀取訓練集和驗證集,VOCDataset繼承了torch.utils.data.Dataset,并重寫父類的兩個函數__getitem__:返回每個圖像及其對應的標簽,def __len__返回數據集的數量:
import torch
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from PIL import Image
import osclass VOCDataset(Dataset):def __init__(self, img_dir, label_root, transform=None):self.img_root = img_dirself.label_root = label_rootself.transform = transform# 獲取所有圖像路徑self.img_paths= [os.path.join(self.img_root, f) for f in os.listdir(self.img_root) if f.endswith('.png')]# 讀取txt中class標簽,txt文件每行格式為: img_name class_idself.label_classes = {}with open(label_root, 'r') as f:for line in f:img_name, class_id = line.strip().split()self.label_classes[img_name] = int(class_id)def __len__(self):return len(self.img_paths)def __getitem__(self, idx):img_path = self.img_paths[idx]img = Image.open(img_path).convert('RGB')# 獲取對應的標簽img_name = os.path.basename(img_path)target = self.label_classes.get(img_name, -1)if target == -1:raise ValueError(f"Image {img_name} not found in label file.")if self.transform:img = self.transform(img)else:img = transforms.ToTensor()(img)return img, target
3、model
新建一個model.py的文件,寫一個Alex的類(參考動手學深度學習7.1),繼承torch.nn.Module,重寫forword函數:
from torch import nn
from torchvision import modelsclass AlexNet(nn.Module):def __init__(self,num_class=3):super(AlexNet, self).__init__()self.conv2d1=nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=1)self.pool1=nn.MaxPool2d(kernel_size=3,stride=2,padding=0)self.conv2d2=nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=2)self.pool2=nn.MaxPool2d(kernel_size=3,stride=2,padding=0)self.conv2d3=nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=1)self.conv2d4=nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=1)self.conv2d5=nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1)self.pool3=nn.MaxPool2d(kernel_size=3,stride=2,padding=0)# 全連接層4096self.fc1=nn.Linear(256*5*5,4096)self.fc2=nn.Linear(4096,4096)self.fc3=nn.Linear(4096,num_class)self.sequential = nn.Sequential(self.conv2d1,nn.ReLU(),self.pool1,self.conv2d2,nn.ReLU(),self.pool2,self.conv2d3,nn.ReLU(),self.conv2d4,nn.ReLU(),self.conv2d5,nn.ReLU(),self.pool3,nn.Flatten(),self.fc1,nn.ReLU(),nn.Dropout(0.5),self.fc2,nn.ReLU(),nn.Dropout(0.5),self.fc3)# 初始化權重for m in self.modules():if isinstance(m, nn.Conv2d):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')elif isinstance(m, nn.Linear):nn.init.normal_(m.weight, 0, 0.01)nn.init.constant_(m.bias, 0)def forward(self,x):x = self.sequential(x)return x
4、訓練模型
首先定義損失函數和優化器:
criterion = torch.nn.CrossEntropyLoss()optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
新建一個train.py的文件:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from dataset import VOCDataset
from model import AlexNet, ResnetPretrained
from torchvision import models
from torchvision.datasets import CIFAR10
from dataset import VOCDataset
import tensorboarddef train(model, train_dataset, val_dataset, num_epochs=20, batch_size=32, learning_rate=0.001):# 1. 創建數據加載器train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)# 2. 定義損失函數和優化器criterion = torch.nn.CrossEntropyLoss()optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)# 3. 修正學習率調度器(放在循環外)scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)# 4. 訓練模型best_acc = 0.0for epoch in range(num_epochs):model.train()running_loss = 0.0total = 0for i, (inputs, labels) in enumerate(train_loader):inputs, labels = inputs.cuda(), labels.cuda()optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, labels)loss.backward()optimizer.step()running_loss += loss.item() * inputs.size(0)total += inputs.size(0)if i % 100 == 0:avg_loss = running_loss / totalprint(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {avg_loss:.4f}')# 每個epoch結束后驗證model.eval()correct = 0total_val = 0val_loss = 0.0with torch.no_grad():for inputs, labels in val_loader:inputs, labels = inputs.cuda(), labels.cuda()outputs = model(inputs)loss = criterion(outputs, labels)_, predicted = torch.max(outputs.data, 1)total_val += labels.size(0)correct += (predicted == labels).sum().item()val_loss += loss.item() * inputs.size(0)epoch_acc = 100 * correct / total_valavg_val_loss = val_loss / total_valprint(f'Epoch {epoch+1}/{num_epochs} | 'f'Train Loss: {running_loss/total:.4f} | 'f'Val Loss: {avg_val_loss:.4f} | 'f'Val Acc: {epoch_acc:.2f}%')# 更新學習率(基于驗證集準確率)#scheduler.step(epoch_acc)# 保存最佳模型if epoch_acc > best_acc:best_acc = epoch_acctorch.save(model.state_dict(), 'best_alexnet_cifar10.pth')print(f'Best Validation Accuracy: {best_acc:.2f}%')if __name__ == "__main__":# 1. 定義數據集路徑train_img_dir = r'F:\dataset\tud\TUDarmstadt\PNGImages\train'val_img_dir = r'F:\dataset\tud\TUDarmstadt\PNGImages\val'train_label_file = r'F:\dataset\tud\TUDarmstadt\PNGImages/train_set.txt'val_label_file = r'F:\dataset\tud\TUDarmstadt\PNGImages/val_set.txt'# 2. 創建數據集實例# 增強數據增強transform_train = transforms.Compose([transforms.Resize((256, 256)), # 先放大transforms.RandomCrop(224), # 隨機裁剪transforms.RandomHorizontalFlip(),transforms.RandomRotation(15),transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])# 驗證集不需要數據增強,但需要同樣的預處理transform_val = transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])# 創建訓練和驗證數據集train_dataset = VOCDataset(train_img_dir, train_label_file, transform=transform_train)val_dataset = VOCDataset(val_img_dir, val_label_file, transform=transform_val)print(f'Train dataset size: {len(train_dataset)}')print(f'Validation dataset size: {len(val_dataset)}')# 2. 下載并利用CIFAR-10數據集進行分類# # # 定義數據增強和預處理# transform_train = transforms.Compose([# transforms.Resize((224, 224)),# transforms.RandomHorizontalFlip(),# transforms.RandomCrop(224, padding=4),# transforms.ToTensor(),# transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], # std=[0.2470, 0.2435, 0.2616])# ])# transform_val = transforms.Compose([# transforms.Resize((224, 224)),# transforms.ToTensor(),# transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], # std=[0.2470, 0.2435, 0.2616])# ])# # 下載CIFAR-10訓練集和驗證集# train_dataset = CIFAR10(root='data', train=True, download=True, transform=transform_train)# val_dataset = CIFAR10(root='data', train=False, download=True, transform=transform_val)# print(f'Train dataset size: {len(train_dataset)}')# print(f'Validation dataset size: {len(val_dataset)}')# 3. 創建模型實例model = AlexNet(num_class=10) # CIFAR-10有10個類別 # 檢查是否有可用的GPUdevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")model.to(device) # 將模型移動到GPU或CPU# 打印模型結構#print(model)# 4. 開始訓練train(model, train_dataset, val_dataset, num_epochs=20, batch_size=32, learning_rate=0.001)print('Finished Training')# 5. 保存模型torch.save(model.state_dict(), 'output/alexnet.pth')print('Model saved as alexnet.pth')
運行main函數就可以進行訓練了,后面會講一些如何改進這個模型和一些訓練技巧。
參考:
1
2
3