- 預訓練的概念
- 常見的分類預訓練模型
- 圖像預訓練模型的發展史
- 預訓練的策略
- 預訓練代碼實戰:resnet18
作業:
- 嘗試在cifar10對比如下其他的預訓練模型,觀察差異,盡可能和他人選擇的不同
- 嘗試通過ctrl進入resnet的內部,觀察殘差究竟是什么
預訓練模型選擇EfficientNet-B0
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os# 設置中文字體支持
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False # 解決負號顯示問題# 檢查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用設備: {device}")# 1. 數據預處理(訓練集增強,測試集標準化)
train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip(),transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),transforms.RandomRotation(15),transforms.ToTensor(),transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])test_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])# 2. 加載CIFAR-10數據集
train_dataset = datasets.CIFAR10(root='./data',train=True,download=True,transform=train_transform
)test_dataset = datasets.CIFAR10(root='./data',train=False,transform=test_transform
)# 3. 創建數據加載器
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)# 4. 定義EfficientNet-B0模型(替換原ResNet18)
def create_efficientnet_b0(pretrained=True, num_classes=10):model = models.efficientnet_b0(pretrained=pretrained) # 加載ImageNet預訓練權重# 修改最后一層全連接層(EfficientNet的分類層在classifier[1])in_features = model.classifier[1].in_features # 獲取原全連接層輸入特征數model.classifier[1] = nn.Linear(in_features, num_classes) # 改為10類輸出return model.to(device)# 5. 凍結/解凍模型層的函數(無需修改)
def freeze_model(model, freeze=True):"""凍結或解凍模型的卷積層參數"""for name, param in model.named_parameters():if 'fc' not in name: # 注意:EfficientNet中沒有'fc'層,這里實際會凍結所有非分類層參數param.requires_grad = not freeze # 原ResNet的fc層名稱不影響,邏輯通用frozen_params = sum(p.numel() for p in model.parameters() if not p.requires_grad)total_params = sum(p.numel() for p in model.parameters())if freeze:print(f"已凍結模型卷積層參數 ({frozen_params}/{total_params} 參數)")else:print(f"已解凍模型所有參數 ({total_params}/{total_params} 參數可訓練)")return model# 6. 訓練函數(無需修改)
def train_with_freeze_schedule(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs, freeze_epochs=5):train_loss_history = []test_loss_history = []train_acc_history = []test_acc_history = []all_iter_losses = []iter_indices = []if freeze_epochs > 0:model = freeze_model(model, freeze=True)for epoch in range(epochs):if epoch == freeze_epochs:model = freeze_model(model, freeze=False)optimizer.param_groups[0]['lr'] = 1e-4 # 解凍后降低學習率model.train()running_loss = 0.0correct_train = 0total_train = 0for batch_idx, (data, target) in enumerate(train_loader):data, target = data.to(device), target.to(device)optimizer.zero_grad()output = model(data)loss = criterion(output, target)loss.backward()optimizer.step()iter_loss = loss.item()all_iter_losses.append(iter_loss)iter_indices.append(epoch * len(train_loader) + batch_idx + 1)running_loss += iter_loss_, predicted = output.max(1)total_train += target.size(0)correct_train += predicted.eq(target).sum().item()if (batch_idx + 1) % 100 == 0:print(f"Epoch {epoch+1}/{epochs} | Batch {batch_idx+1}/{len(train_loader)} "f"| 單Batch損失: {iter_loss:.4f}")epoch_train_loss = running_loss / len(train_loader)epoch_train_acc = 100. * correct_train / total_trainmodel.eval()correct_test = 0total_test = 0test_loss = 0.0with torch.no_grad():for data, target in test_loader:data, target = data.to(device), target.to(device)output = model(data)test_loss += criterion(output, target).item()_, predicted = output.max(1)total_test += target.size(0)correct_test += predicted.eq(target).sum().item()epoch_test_loss = test_loss / len(test_loader)epoch_test_acc = 100. * correct_test / total_testtrain_loss_history.append(epoch_train_loss)test_loss_history.append(epoch_test_loss)train_acc_history.append(epoch_train_acc)test_acc_history.append(epoch_test_acc)if scheduler is not None:scheduler.step(epoch_test_loss)print(f"Epoch {epoch+1} 完成 | 訓練損失: {epoch_train_loss:.4f} "f"| 訓練準確率: {epoch_train_acc:.2f}% | 測試準確率: {epoch_test_acc:.2f}%")plot_iter_losses(all_iter_losses, iter_indices)plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history)return epoch_test_acc# 7. 繪制函數(無需修改)
def plot_iter_losses(losses, indices):plt.figure(figsize=(10, 4))plt.plot(indices, losses, 'b-', alpha=0.7)plt.xlabel('Iteration(Batch序號)')plt.ylabel('損失值')plt.title('訓練過程中的Iteration損失變化')plt.grid(True)plt.show()def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss):epochs = range(1, len(train_acc) + 1)plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.plot(epochs, train_acc, 'b-', label='訓練準確率')plt.plot(epochs, test_acc, 'r-', label='測試準確率')plt.xlabel('Epoch')plt.ylabel('準確率 (%)')plt.title('準確率隨Epoch變化')plt.legend()plt.grid(True)plt.subplot(1, 2, 2)plt.plot(epochs, train_loss, 'b-', label='訓練損失')plt.plot(epochs, test_loss, 'r-', label='測試損失')plt.xlabel('Epoch')plt.ylabel('損失值')plt.title('損失值隨Epoch變化')plt.legend()plt.grid(True)plt.tight_layout()plt.show()# 主函數(修改模型創建部分)
def main():epochs = 40freeze_epochs = 5learning_rate = 1e-3weight_decay = 1e-4# 替換為EfficientNet-B0模型model = create_efficientnet_b0(pretrained=True, num_classes=10)optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)criterion = nn.CrossEntropyLoss()scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)final_accuracy = train_with_freeze_schedule(model=model,train_loader=train_loader,test_loader=test_loader,criterion=criterion,optimizer=optimizer,scheduler=scheduler,device=device,epochs=epochs,freeze_epochs=freeze_epochs)print(f"訓練完成!最終測試準確率: {final_accuracy:.2f}%")if __name__ == "__main__":main()
已凍結模型卷積層參數 (3383818/4020358 參數)
Epoch 1/40 | Batch 100/782 | 單Batch損失: 2.3164
Epoch 1/40 | Batch 200/782 | 單Batch損失: 2.2713
Epoch 1/40 | Batch 300/782 | 單Batch損失: 2.2363
Epoch 1/40 | Batch 400/782 | 單Batch損失: 2.1228
Epoch 1/40 | Batch 500/782 | 單Batch損失: 2.0224
Epoch 1/40 | Batch 600/782 | 單Batch損失: 1.8438
Epoch 1/40 | Batch 700/782 | 單Batch損失: 2.1391
Epoch 1 完成 | 訓練損失: 2.1032 | 訓練準確率: 24.17% | 測試準確率: 37.09%
Epoch 2/40 | Batch 100/782 | 單Batch損失: 1.9168
Epoch 2/40 | Batch 200/782 | 單Batch損失: 1.6931
Epoch 2/40 | Batch 300/782 | 單Batch損失: 1.6612
Epoch 2/40 | Batch 400/782 | 單Batch損失: 1.8175
Epoch 2/40 | Batch 500/782 | 單Batch損失: 1.5269
Epoch 2/40 | Batch 600/782 | 單Batch損失: 1.4632
Epoch 2/40 | Batch 700/782 | 單Batch損失: 1.7014
Epoch 2 完成 | 訓練損失: 1.6859 | 訓練準確率: 42.29% | 測試準確率: 51.49%
Epoch 3/40 | Batch 100/782 | 單Batch損失: 1.4499
Epoch 3/40 | Batch 200/782 | 單Batch損失: 1.3999
Epoch 3/40 | Batch 300/782 | 單Batch損失: 1.5310
Epoch 3/40 | Batch 400/782 | 單Batch損失: 1.3701
Epoch 3/40 | Batch 500/782 | 單Batch損失: 1.3659
Epoch 3/40 | Batch 600/782 | 單Batch損失: 1.5181
Epoch 3/40 | Batch 700/782 | 單Batch損失: 1.2136
Epoch 3 完成 | 訓練損失: 1.4554 | 訓練準確率: 51.63% | 測試準確率: 58.29%
Epoch 4/40 | Batch 100/782 | 單Batch損失: 1.4563
Epoch 4/40 | Batch 200/782 | 單Batch損失: 1.3703
Epoch 4/40 | Batch 300/782 | 單Batch損失: 1.2104
Epoch 4/40 | Batch 400/782 | 單Batch損失: 1.4546
Epoch 4/40 | Batch 500/782 | 單Batch損失: 1.1735
Epoch 4/40 | Batch 600/782 | 單Batch損失: 1.2803
Epoch 4/40 | Batch 700/782 | 單Batch損失: 1.1265
Epoch 4 完成 | 訓練損失: 1.3068 | 訓練準確率: 57.07% | 測試準確率: 63.30%
Epoch 5/40 | Batch 100/782 | 單Batch損失: 1.1367
Epoch 5/40 | Batch 200/782 | 單Batch損失: 1.2661
Epoch 5/40 | Batch 300/782 | 單Batch損失: 1.2475
Epoch 5/40 | Batch 400/782 | 單Batch損失: 1.2228
Epoch 5/40 | Batch 500/782 | 單Batch損失: 1.0473
Epoch 5/40 | Batch 600/782 | 單Batch損失: 1.1418
Epoch 5/40 | Batch 700/782 | 單Batch損失: 1.0736
Epoch 5 完成 | 訓練損失: 1.2282 | 訓練準確率: 59.58% | 測試準確率: 64.66%
已解凍模型所有參數 (4020358/4020358 參數可訓練)
Epoch 6/40 | Batch 100/782 | 單Batch損失: 1.1183
Epoch 6/40 | Batch 200/782 | 單Batch損失: 1.2566
Epoch 6/40 | Batch 300/782 | 單Batch損失: 1.1152
Epoch 6/40 | Batch 400/782 | 單Batch損失: 0.8182
Epoch 6/40 | Batch 500/782 | 單Batch損失: 1.0990
Epoch 6/40 | Batch 600/782 | 單Batch損失: 1.2190
Epoch 6/40 | Batch 700/782 | 單Batch損失: 0.9595
Epoch 6 完成 | 訓練損失: 1.0763 | 訓練準確率: 62.93% | 測試準確率: 71.08%
Epoch 7/40 | Batch 100/782 | 單Batch損失: 0.9222
Epoch 7/40 | Batch 200/782 | 單Batch損失: 0.9484
Epoch 7/40 | Batch 300/782 | 單Batch損失: 0.8022
Epoch 7/40 | Batch 400/782 | 單Batch損失: 0.8303
Epoch 7/40 | Batch 500/782 | 單Batch損失: 0.8881
Epoch 7/40 | Batch 600/782 | 單Batch損失: 1.0032
Epoch 7/40 | Batch 700/782 | 單Batch損失: 0.9384
Epoch 7 完成 | 訓練損失: 0.9711 | 訓練準確率: 66.31% | 測試準確率: 74.27%
Epoch 8/40 | Batch 100/782 | 單Batch損失: 0.9462
Epoch 8/40 | Batch 200/782 | 單Batch損失: 0.9538
Epoch 8/40 | Batch 300/782 | 單Batch損失: 0.7513
Epoch 8/40 | Batch 400/782 | 單Batch損失: 0.9691
Epoch 8/40 | Batch 500/782 | 單Batch損失: 0.7982
Epoch 8/40 | Batch 600/782 | 單Batch損失: 1.0675
Epoch 8/40 | Batch 700/782 | 單Batch損失: 1.0165
Epoch 8 完成 | 訓練損失: 0.9008 | 訓練準確率: 68.85% | 測試準確率: 75.67%
Epoch 9/40 | Batch 100/782 | 單Batch損失: 0.8900
Epoch 9/40 | Batch 200/782 | 單Batch損失: 0.7397
Epoch 9/40 | Batch 300/782 | 單Batch損失: 0.8618
Epoch 9/40 | Batch 400/782 | 單Batch損失: 0.7714
Epoch 9/40 | Batch 500/782 | 單Batch損失: 0.9160
Epoch 9/40 | Batch 600/782 | 單Batch損失: 1.0390
Epoch 9/40 | Batch 700/782 | 單Batch損失: 1.0620
Epoch 9 完成 | 訓練損失: 0.8472 | 訓練準確率: 70.48% | 測試準確率: 77.11%
Epoch 10/40 | Batch 100/782 | 單Batch損失: 0.7377
Epoch 10/40 | Batch 200/782 | 單Batch損失: 0.8621
Epoch 10/40 | Batch 300/782 | 單Batch損失: 0.6710
Epoch 10/40 | Batch 400/782 | 單Batch損失: 0.8779
Epoch 10/40 | Batch 500/782 | 單Batch損失: 0.8361
Epoch 10/40 | Batch 600/782 | 單Batch損失: 0.6018
Epoch 10/40 | Batch 700/782 | 單Batch損失: 0.8028
Epoch 10 完成 | 訓練損失: 0.8086 | 訓練準確率: 71.89% | 測試準確率: 77.98%
Epoch 11/40 | Batch 100/782 | 單Batch損失: 0.8032
Epoch 11/40 | Batch 200/782 | 單Batch損失: 0.8740
Epoch 11/40 | Batch 300/782 | 單Batch損失: 0.7705
Epoch 11/40 | Batch 400/782 | 單Batch損失: 0.7489
Epoch 11/40 | Batch 500/782 | 單Batch損失: 0.7174
Epoch 11/40 | Batch 600/782 | 單Batch損失: 1.0705
Epoch 11/40 | Batch 700/782 | 單Batch損失: 0.8507
Epoch 11 完成 | 訓練損失: 0.7604 | 訓練準確率: 73.46% | 測試準確率: 79.17%
Epoch 12/40 | Batch 100/782 | 單Batch損失: 0.6001
Epoch 12/40 | Batch 200/782 | 單Batch損失: 0.7072
Epoch 12/40 | Batch 300/782 | 單Batch損失: 0.7770
Epoch 12/40 | Batch 400/782 | 單Batch損失: 0.7470
Epoch 12/40 | Batch 500/782 | 單Batch損失: 0.7277
Epoch 12/40 | Batch 600/782 | 單Batch損失: 0.7031
Epoch 12/40 | Batch 700/782 | 單Batch損失: 0.5620
Epoch 12 完成 | 訓練損失: 0.7359 | 訓練準確率: 74.51% | 測試準確率: 79.65%
Epoch 13/40 | Batch 100/782 | 單Batch損失: 0.8507
Epoch 13/40 | Batch 200/782 | 單Batch損失: 0.6815
Epoch 13/40 | Batch 300/782 | 單Batch損失: 0.8775
Epoch 13/40 | Batch 400/782 | 單Batch損失: 0.5377
Epoch 13/40 | Batch 500/782 | 單Batch損失: 1.1903
Epoch 13/40 | Batch 600/782 | 單Batch損失: 0.8762
Epoch 13/40 | Batch 700/782 | 單Batch損失: 0.6386
Epoch 13 完成 | 訓練損失: 0.7070 | 訓練準確率: 75.33% | 測試準確率: 80.40%
Epoch 14/40 | Batch 100/782 | 單Batch損失: 0.7123
Epoch 14/40 | Batch 200/782 | 單Batch損失: 0.5596
Epoch 14/40 | Batch 300/782 | 單Batch損失: 0.4609
Epoch 14/40 | Batch 400/782 | 單Batch損失: 0.6936
Epoch 14/40 | Batch 500/782 | 單Batch損失: 0.6428
Epoch 14/40 | Batch 600/782 | 單Batch損失: 0.8071
Epoch 14/40 | Batch 700/782 | 單Batch損失: 0.6818
Epoch 14 完成 | 訓練損失: 0.6840 | 訓練準確率: 76.14% | 測試準確率: 81.06%
Epoch 15/40 | Batch 100/782 | 單Batch損失: 0.6297
Epoch 15/40 | Batch 200/782 | 單Batch損失: 0.5602
Epoch 15/40 | Batch 300/782 | 單Batch損失: 0.5888
Epoch 15/40 | Batch 400/782 | 單Batch損失: 0.5709
Epoch 15/40 | Batch 500/782 | 單Batch損失: 1.0929
Epoch 15/40 | Batch 600/782 | 單Batch損失: 0.5833
Epoch 15/40 | Batch 700/782 | 單Batch損失: 0.5528
Epoch 15 完成 | 訓練損失: 0.6578 | 訓練準確率: 77.17% | 測試準確率: 81.35%
Epoch 16/40 | Batch 100/782 | 單Batch損失: 0.6508
Epoch 16/40 | Batch 200/782 | 單Batch損失: 0.5686
Epoch 16/40 | Batch 300/782 | 單Batch損失: 0.4537
Epoch 16/40 | Batch 400/782 | 單Batch損失: 0.6924
Epoch 16/40 | Batch 500/782 | 單Batch損失: 0.4872
Epoch 16/40 | Batch 600/782 | 單Batch損失: 0.6236
Epoch 16/40 | Batch 700/782 | 單Batch損失: 0.5524
Epoch 16 完成 | 訓練損失: 0.6445 | 訓練準確率: 77.41% | 測試準確率: 81.46%
Epoch 17/40 | Batch 100/782 | 單Batch損失: 0.7385
Epoch 17/40 | Batch 200/782 | 單Batch損失: 0.5500
Epoch 17/40 | Batch 300/782 | 單Batch損失: 0.8400
Epoch 17/40 | Batch 400/782 | 單Batch損失: 0.4430
Epoch 17/40 | Batch 500/782 | 單Batch損失: 0.5824
Epoch 17/40 | Batch 600/782 | 單Batch損失: 0.4824
Epoch 17/40 | Batch 700/782 | 單Batch損失: 0.6853
Epoch 17 完成 | 訓練損失: 0.6190 | 訓練準確率: 78.45% | 測試準確率: 82.05%
Epoch 18/40 | Batch 100/782 | 單Batch損失: 0.5869
Epoch 18/40 | Batch 200/782 | 單Batch損失: 0.5849
Epoch 18/40 | Batch 300/782 | 單Batch損失: 0.6970
Epoch 18/40 | Batch 400/782 | 單Batch損失: 0.5477
Epoch 18/40 | Batch 500/782 | 單Batch損失: 0.7288
Epoch 18/40 | Batch 600/782 | 單Batch損失: 0.5958
Epoch 18/40 | Batch 700/782 | 單Batch損失: 0.6478
Epoch 18 完成 | 訓練損失: 0.6108 | 訓練準確率: 78.48% | 測試準確率: 82.63%
Epoch 19/40 | Batch 100/782 | 單Batch損失: 0.4268
Epoch 19/40 | Batch 200/782 | 單Batch損失: 0.7144
Epoch 19/40 | Batch 300/782 | 單Batch損失: 0.5052
Epoch 19/40 | Batch 400/782 | 單Batch損失: 0.4274
Epoch 19/40 | Batch 500/782 | 單Batch損失: 0.7780
Epoch 19/40 | Batch 600/782 | 單Batch損失: 0.5814
Epoch 19/40 | Batch 700/782 | 單Batch損失: 0.4475
Epoch 19 完成 | 訓練損失: 0.5897 | 訓練準確率: 79.30% | 測試準確率: 82.75%
Epoch 20/40 | Batch 100/782 | 單Batch損失: 0.4922
Epoch 20/40 | Batch 200/782 | 單Batch損失: 0.5738
Epoch 20/40 | Batch 300/782 | 單Batch損失: 0.7347
Epoch 20/40 | Batch 400/782 | 單Batch損失: 0.4208
Epoch 20/40 | Batch 500/782 | 單Batch損失: 0.7527
Epoch 20/40 | Batch 600/782 | 單Batch損失: 0.4891
Epoch 20/40 | Batch 700/782 | 單Batch損失: 0.6357
Epoch 20 完成 | 訓練損失: 0.5719 | 訓練準確率: 80.02% | 測試準確率: 83.08%
Epoch 21/40 | Batch 100/782 | 單Batch損失: 0.3936
Epoch 21/40 | Batch 200/782 | 單Batch損失: 0.6397
Epoch 21/40 | Batch 300/782 | 單Batch損失: 0.7329
Epoch 21/40 | Batch 400/782 | 單Batch損失: 0.7254
Epoch 21/40 | Batch 500/782 | 單Batch損失: 0.4061
Epoch 21/40 | Batch 600/782 | 單Batch損失: 0.6170
Epoch 21/40 | Batch 700/782 | 單Batch損失: 0.7689
Epoch 21 完成 | 訓練損失: 0.5603 | 訓練準確率: 80.19% | 測試準確率: 83.10%
Epoch 22/40 | Batch 100/782 | 單Batch損失: 0.6171
Epoch 22/40 | Batch 200/782 | 單Batch損失: 0.6590
Epoch 22/40 | Batch 300/782 | 單Batch損失: 0.3658
Epoch 22/40 | Batch 400/782 | 單Batch損失: 0.4455
Epoch 22/40 | Batch 500/782 | 單Batch損失: 0.7133
Epoch 22/40 | Batch 600/782 | 單Batch損失: 0.5178
Epoch 22/40 | Batch 700/782 | 單Batch損失: 0.7108
Epoch 22 完成 | 訓練損失: 0.5525 | 訓練準確率: 80.57% | 測試準確率: 83.35%
Epoch 23/40 | Batch 100/782 | 單Batch損失: 0.4086
Epoch 23/40 | Batch 200/782 | 單Batch損失: 0.5742
Epoch 23/40 | Batch 300/782 | 單Batch損失: 0.4925
Epoch 23/40 | Batch 400/782 | 單Batch損失: 0.4574
Epoch 23/40 | Batch 500/782 | 單Batch損失: 0.5913
Epoch 23/40 | Batch 600/782 | 單Batch損失: 0.4460
Epoch 23/40 | Batch 700/782 | 單Batch損失: 0.6479
Epoch 23 完成 | 訓練損失: 0.5345 | 訓練準確率: 81.19% | 測試準確率: 83.44%
Epoch 24/40 | Batch 100/782 | 單Batch損失: 0.4379
Epoch 24/40 | Batch 200/782 | 單Batch損失: 0.4926
Epoch 24/40 | Batch 300/782 | 單Batch損失: 0.3960
Epoch 24/40 | Batch 400/782 | 單Batch損失: 0.3477
Epoch 24/40 | Batch 500/782 | 單Batch損失: 0.5204
Epoch 24/40 | Batch 600/782 | 單Batch損失: 0.3718
Epoch 24/40 | Batch 700/782 | 單Batch損失: 0.6802
Epoch 24 完成 | 訓練損失: 0.5202 | 訓練準確率: 81.71% | 測試準確率: 83.76%
Epoch 25/40 | Batch 100/782 | 單Batch損失: 0.3082
Epoch 25/40 | Batch 200/782 | 單Batch損失: 0.4438
Epoch 25/40 | Batch 300/782 | 單Batch損失: 0.3903
Epoch 25/40 | Batch 400/782 | 單Batch損失: 0.5898
Epoch 25/40 | Batch 500/782 | 單Batch損失: 0.4658
Epoch 25/40 | Batch 600/782 | 單Batch損失: 0.5179
Epoch 25/40 | Batch 700/782 | 單Batch損失: 0.6340
Epoch 25 完成 | 訓練損失: 0.5102 | 訓練準確率: 82.14% | 測試準確率: 83.75%
Epoch 26/40 | Batch 100/782 | 單Batch損失: 0.7765
Epoch 26/40 | Batch 200/782 | 單Batch損失: 0.4710
Epoch 26/40 | Batch 300/782 | 單Batch損失: 0.3533
Epoch 26/40 | Batch 400/782 | 單Batch損失: 0.6742
Epoch 26/40 | Batch 500/782 | 單Batch損失: 0.5138
Epoch 26/40 | Batch 600/782 | 單Batch損失: 0.4145
Epoch 26/40 | Batch 700/782 | 單Batch損失: 0.5744
Epoch 26 完成 | 訓練損失: 0.5080 | 訓練準確率: 82.23% | 測試準確率: 83.82%
Epoch 27/40 | Batch 100/782 | 單Batch損失: 0.5557
Epoch 27/40 | Batch 200/782 | 單Batch損失: 0.4858
Epoch 27/40 | Batch 300/782 | 單Batch損失: 0.4281
Epoch 27/40 | Batch 400/782 | 單Batch損失: 0.6110
Epoch 27/40 | Batch 500/782 | 單Batch損失: 0.5705
Epoch 27/40 | Batch 600/782 | 單Batch損失: 0.4077
Epoch 27/40 | Batch 700/782 | 單Batch損失: 0.4892
Epoch 27 完成 | 訓練損失: 0.4922 | 訓練準確率: 82.72% | 測試準確率: 82.90%
Epoch 28/40 | Batch 100/782 | 單Batch損失: 0.5795
Epoch 28/40 | Batch 200/782 | 單Batch損失: 0.6773
Epoch 28/40 | Batch 300/782 | 單Batch損失: 0.3849
Epoch 28/40 | Batch 400/782 | 單Batch損失: 0.6191
Epoch 28/40 | Batch 500/782 | 單Batch損失: 0.3032
Epoch 28/40 | Batch 600/782 | 單Batch損失: 0.3453
Epoch 28/40 | Batch 700/782 | 單Batch損失: 0.4695
Epoch 28 完成 | 訓練損失: 0.4641 | 訓練準確率: 83.81% | 測試準確率: 84.82%
Epoch 29/40 | Batch 100/782 | 單Batch損失: 0.5900
Epoch 29/40 | Batch 200/782 | 單Batch損失: 0.5450
Epoch 29/40 | Batch 300/782 | 單Batch損失: 0.4215
Epoch 29/40 | Batch 400/782 | 單Batch損失: 0.7093
Epoch 29/40 | Batch 500/782 | 單Batch損失: 0.6825
Epoch 29/40 | Batch 600/782 | 單Batch損失: 0.3683
Epoch 29/40 | Batch 700/782 | 單Batch損失: 0.6135
Epoch 29 完成 | 訓練損失: 0.4547 | 訓練準確率: 83.96% | 測試準確率: 84.68%
Epoch 30/40 | Batch 100/782 | 單Batch損失: 0.3630
Epoch 30/40 | Batch 200/782 | 單Batch損失: 0.4074
Epoch 30/40 | Batch 300/782 | 單Batch損失: 0.4970
Epoch 30/40 | Batch 400/782 | 單Batch損失: 0.4840
Epoch 30/40 | Batch 500/782 | 單Batch損失: 0.4881
Epoch 30/40 | Batch 600/782 | 單Batch損失: 0.3234
Epoch 30/40 | Batch 700/782 | 單Batch損失: 0.4636
Epoch 30 完成 | 訓練損失: 0.4473 | 訓練準確率: 84.16% | 測試準確率: 84.90%
Epoch 31/40 | Batch 100/782 | 單Batch損失: 0.5597
Epoch 31/40 | Batch 200/782 | 單Batch損失: 0.3877
Epoch 31/40 | Batch 300/782 | 單Batch損失: 0.4757
Epoch 31/40 | Batch 400/782 | 單Batch損失: 0.2445
Epoch 31/40 | Batch 500/782 | 單Batch損失: 0.3012
Epoch 31/40 | Batch 600/782 | 單Batch損失: 0.6129
Epoch 31/40 | Batch 700/782 | 單Batch損失: 0.6934
Epoch 31 完成 | 訓練損失: 0.4427 | 訓練準確率: 84.26% | 測試準確率: 84.76%
Epoch 32/40 | Batch 100/782 | 單Batch損失: 0.4522
Epoch 32/40 | Batch 200/782 | 單Batch損失: 0.4832
Epoch 32/40 | Batch 300/782 | 單Batch損失: 0.4875
Epoch 32/40 | Batch 400/782 | 單Batch損失: 0.3199
Epoch 32/40 | Batch 500/782 | 單Batch損失: 0.5490
Epoch 32/40 | Batch 600/782 | 單Batch損失: 0.6163
Epoch 32/40 | Batch 700/782 | 單Batch損失: 0.5564
Epoch 32 完成 | 訓練損失: 0.4346 | 訓練準確率: 84.73% | 測試準確率: 84.77%
Epoch 33/40 | Batch 100/782 | 單Batch損失: 0.4282
Epoch 33/40 | Batch 200/782 | 單Batch損失: 0.3507
Epoch 33/40 | Batch 300/782 | 單Batch損失: 0.4064
Epoch 33/40 | Batch 400/782 | 單Batch損失: 0.3014
Epoch 33/40 | Batch 500/782 | 單Batch損失: 0.5831
Epoch 33/40 | Batch 600/782 | 單Batch損失: 0.5103
Epoch 33/40 | Batch 700/782 | 單Batch損失: 0.5730
Epoch 33 完成 | 訓練損失: 0.4238 | 訓練準確率: 84.88% | 測試準確率: 85.15%
Epoch 34/40 | Batch 100/782 | 單Batch損失: 0.4217
Epoch 34/40 | Batch 200/782 | 單Batch損失: 0.5362
Epoch 34/40 | Batch 300/782 | 單Batch損失: 0.3879
Epoch 34/40 | Batch 400/782 | 單Batch損失: 0.2776
Epoch 34/40 | Batch 500/782 | 單Batch損失: 0.4530
Epoch 34/40 | Batch 600/782 | 單Batch損失: 0.2557
Epoch 34/40 | Batch 700/782 | 單Batch損失: 0.4566
Epoch 34 完成 | 訓練損失: 0.4187 | 訓練準確率: 85.34% | 測試準確率: 85.17%
Epoch 35/40 | Batch 100/782 | 單Batch損失: 0.2626
Epoch 35/40 | Batch 200/782 | 單Batch損失: 0.4444
Epoch 35/40 | Batch 300/782 | 單Batch損失: 0.3856
Epoch 35/40 | Batch 400/782 | 單Batch損失: 0.4605
Epoch 35/40 | Batch 500/782 | 單Batch損失: 0.4740
Epoch 35/40 | Batch 600/782 | 單Batch損失: 0.4703
Epoch 35/40 | Batch 700/782 | 單Batch損失: 0.3467
Epoch 35 完成 | 訓練損失: 0.4146 | 訓練準確率: 85.26% | 測試準確率: 85.32%
Epoch 36/40 | Batch 100/782 | 單Batch損失: 0.5474
Epoch 36/40 | Batch 200/782 | 單Batch損失: 0.4488
Epoch 36/40 | Batch 300/782 | 單Batch損失: 0.5720
Epoch 36/40 | Batch 400/782 | 單Batch損失: 0.5442
Epoch 36/40 | Batch 500/782 | 單Batch損失: 0.3863
Epoch 36/40 | Batch 600/782 | 單Batch損失: 0.3958
Epoch 36/40 | Batch 700/782 | 單Batch損失: 0.5257
Epoch 36 完成 | 訓練損失: 0.4102 | 訓練準確率: 85.49% | 測試準確率: 85.06%
Epoch 37/40 | Batch 100/782 | 單Batch損失: 0.4006
Epoch 37/40 | Batch 200/782 | 單Batch損失: 0.4335
Epoch 37/40 | Batch 300/782 | 單Batch損失: 0.4789
Epoch 37/40 | Batch 400/782 | 單Batch損失: 0.5122
Epoch 37/40 | Batch 500/782 | 單Batch損失: 0.4312
Epoch 37/40 | Batch 600/782 | 單Batch損失: 0.3565
Epoch 37/40 | Batch 700/782 | 單Batch損失: 0.2940
Epoch 37 完成 | 訓練損失: 0.4048 | 訓練準確率: 85.67% | 測試準確率: 84.99%
Epoch 38/40 | Batch 100/782 | 單Batch損失: 0.4945
Epoch 38/40 | Batch 200/782 | 單Batch損失: 0.3622
Epoch 38/40 | Batch 300/782 | 單Batch損失: 0.6336
Epoch 38/40 | Batch 400/782 | 單Batch損失: 0.3288
Epoch 38/40 | Batch 500/782 | 單Batch損失: 0.3184
Epoch 38/40 | Batch 600/782 | 單Batch損失: 0.2478
Epoch 38/40 | Batch 700/782 | 單Batch損失: 0.5312
Epoch 38 完成 | 訓練損失: 0.3950 | 訓練準確率: 85.93% | 測試準確率: 85.39%
Epoch 39/40 | Batch 100/782 | 單Batch損失: 0.2675
Epoch 39/40 | Batch 200/782 | 單Batch損失: 0.3396
Epoch 39/40 | Batch 300/782 | 單Batch損失: 0.1931
Epoch 39/40 | Batch 400/782 | 單Batch損失: 0.2008
Epoch 39/40 | Batch 500/782 | 單Batch損失: 0.3052
Epoch 39/40 | Batch 600/782 | 單Batch損失: 0.5663
Epoch 39/40 | Batch 700/782 | 單Batch損失: 0.5728
Epoch 39 完成 | 訓練損失: 0.3986 | 訓練準確率: 85.72% | 測試準確率: 85.52%
Epoch 40/40 | Batch 100/782 | 單Batch損失: 0.3565
Epoch 40/40 | Batch 200/782 | 單Batch損失: 0.4198
Epoch 40/40 | Batch 300/782 | 單Batch損失: 0.2875
Epoch 40/40 | Batch 400/782 | 單Batch損失: 0.3373
Epoch 40/40 | Batch 500/782 | 單Batch損失: 0.3926
Epoch 40/40 | Batch 600/782 | 單Batch損失: 0.5112
Epoch 40/40 | Batch 700/782 | 單Batch損失: 0.5156
Epoch 40 完成 | 訓練損失: 0.3911 | 訓練準確率: 86.07% | 測試準確率: 85.53%
示例代碼(ResNet18)訓練結果
訓練損失: 0.2713 | 訓練準確率: 90.29% | 測試準確率: 86.30%
對比分析
1. 準確率與過擬合
-
ResNet18:
- 訓練準確率?90.29%,測試準確率?86.30%,差距?3.99%(過擬合較明顯)。
- 殘差結構(如?
x + F(x)
)在小數據上擬合能力強(訓練損失低),但參數量(11.7M)較大,易記住訓練細節,導致測試泛化性略差。
-
EfficientNet-B0:
- 訓練準確率?86.07%,測試準確率?85.53%,差距?0.54%(過擬合極弱)。
- 輕量化設計(5.3M 參數量)結合復合縮放,在小數據上更 “克制”,避免過度擬合(訓練 - 測試損失曲線幾乎重合,圖 2、4 對比),泛化性更優。
2. 損失曲線與收斂性
-
訓練損失:
ResNet18(0.2713)低于 EfficientNet-B0(0.3911),說明 ResNet18 對訓練數據的擬合更徹底(殘差塊的特征復用能力強)。 -
測試損失:
兩者最終接近(ResNet18 略低),但 EfficientNet-B0 的損失曲線更平滑(圖 1 vs 圖 3),迭代損失波動小,收斂更穩定(得益于分組卷積和通道注意力的輕量化優化)。
3. 模型結構與性能適配
-
ResNet18:
經典殘差塊(如?BasicBlock
)對小尺寸圖像(32×32)的特征提取直接(無需復雜縮放),初期收斂快(圖 4 中前 5 epoch 準確率飆升),適合 “小數據 + 中等參數量” 場景。 -
EfficientNet-B0:
復合縮放(寬度、深度、分辨率均衡調整)在 ImageNet 上學習的全局特征(如大尺寸圖像的語義信息)遷移到 CIFAR10 時,需更多 epoch 適配小尺寸(圖 2 中準確率上升平緩),但輕量化設計(如 MBConv 模塊)減少了過擬合風險。
4. 總結
-
ResNet18 優勢:
測試準確率略高(+0.77%),訓練擬合能力強,適合對精度要求高、允許輕度過擬合的場景(如 CIFAR10 競賽級優化)。 -
EfficientNet-B0 優勢:
過擬合極低(訓練 - 測試差距 < 1%),參數量少(訓練速度快 30%+),適合資源受限環境或需要泛化性的任務(如小數據 + 實時推理)。
指標 | ResNet18 | EfficientNet-B0 |
測試準確率 | 86.30 | 85.53 |
過擬合程度 | 中(訓練-測試差距4%) | 低(差距0.5%) |
訓練速度 | 中等(11.7M參數量) | 快(5.3M參數量) |
適用場景 | 精度有限,小數據擬合 | 輕量化,泛化性優先 |
@浙大疏錦行