1、搭建模型的流程

1）步驟

1）先清楚需要搭建模型的構造，確定每層的通道數、步長、padding、卷積核大小等

2）創建模型類，將類繼承于nn.modules();

class VGG16(nn.Module):def __init__(self,num_classes = 1000):super(VGG16,self).__init__() # 繼承父類屬性和方法

3）根據模型前向傳播的順序，組建好每一個子模塊；一般是用容器nn.Sequential()來存放子模塊；一般是放在模型類的__init__()函數當中；
self.conv1 = nn.Sequential(nn.Conv2d(3,64,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)
4）添加forward方法（）——上面__init__()方法中只是創建了每一個子模塊，每個模塊都是獨立的，因此在forward函數中主要是根據前向傳播的順序，將各個子模塊串起來。
	# 前向傳播函數def forward(self,x):# 十三個卷積層x = self.conv1(x)x = self.conv2(x)x = self.conv3(x)x = self.conv4(x)x = self.conv5(x)x = self.conv6(x)x = self.conv7(x)x = self.conv8(x)x = self.conv9(x)x = self.conv10(x)x = self.conv11(x)x = self.conv12(x)x = self.conv13(x)x = self.conv14(x)# 將圖像扁平化為一維向量,[1,512,7,7]-->1*512*7*7output = x.view(x.size(0),-1)return output
5）到第4）步模型已經搭建好了，接下來實例化模型后，需要確定損失函數、梯度下降優化算法、學習率更新策略等
# 指定優化器，即梯度下降使用的梯度下降算法，一般有sgd和adam用的多
optimizer = optim.Adam(net.parameters(), lr=0.01)
# 指定學習率優化器，即學習率的變化函數
'''
milestones:分段點
0-75：lr
75-150:gamma*lr
150-:gamma*gamma*lr
'''
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)
# 指定損失函數，這里選擇的交叉熵損失函數來計算模型的loss
criterion = nn.CrossEntropyLoss()
# print(net)
6）如果數據樣本很大，則需要分批次進行訓練，即batch

7）訓練

8）測試

9）保存模型

?2）完整代碼——手寫minist數據集為例（這里使用的數據集是自帶的）

from collections import OrderedDict  # OrderedDict是字典的子類，可以記住元素的添加順序
from torch import optim
from torch import nn# 構建模型
class Net(nn.Module):def __init__(self):super(Net, self).__init__()'''一個卷積模塊一般有卷積層、激活層、池化層組成一個模型一般由多個卷積模塊和多個全連接層組成'''# input shape 28,28,3self.conv1 = nn.Sequential(OrderedDict([('conv1', nn.Conv2d(1, 16, (5, 5), (1, 1), (2, 2))),  # 28,28,16('ReLU1', nn.ReLU()),('pool1', nn.MaxPool2d(2)),  # 14,14,16]))self.conv2 = nn.Sequential(OrderedDict([('conv2', nn.Conv2d(16, 32, (5, 5), (1, 1), (2, 2))),  # 14,14,32('ReLU2', nn.ReLU()),('pool2', nn.MaxPool2d(2)) # 7,7,32]))self.linear = nn.Linear(32 * 7 * 7, 10)def forward(self, x):x = self.conv1(x)x = self.conv2(x)x = x.view(x.size(0), -1) # 將圖像扁平化為向量，即shape=【7,7,32】轉化為shape=【1,1,7*7*32】output = self.linear(x)return output# 模型實例化后，選擇優化器和學習
net = Net()
# 指定優化器，即梯度下降使用的梯度下降算法，一般有sgd和adam用的多
optimizer = optim.Adam(net.parameters(), lr=0.01)
# 指定學習率優化器，即學習率的變化函數
'''
milestones:分段點
0-75：lr
75-150:gamma*lr
150-:gamma*gamma*lr
'''
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)
# 指定損失函數，這里選擇的交叉熵損失函數來計算模型的loss
criterion = nn.CrossEntropyLoss()
# print(net)# 將數據分成指定大小的批次，然后將數據分批輸入后進行訓練
def iterate_minibatches(inputs, targets, batch_size, shuffle=True):assert len(inputs) == len(targets)if shuffle:indices = np.arange(len(inputs))np.random.shuffle(indices)start_idx = None# 按照步長為batchsize來計算得到每一個batch的起始樣本索引號for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):if shuffle:excerpt = indices[start_idx:start_idx + batch_size]else:excerpt = slice(start_idx, start_idx + batch_size) # 切片# 返回的是一個生成器對象，通過for循環可以取出，并且含有next函數yield inputs[excerpt], targets[excerpt] # 根據索引分割出了一個batch，返回對應的特征集和標簽集，一個生成器if start_idx is not None and start_idx + batch_size < len(inputs):excerpt = indices[start_idx + batch_size:] if shuffle else slice(start_idx + batch_size, len(inputs))yield inputs[excerpt], targets[excerpt]# 手寫訓練過程
# 導入數據
import numpy as np
import torch
from tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("data/MNIST_data/", one_hot=False)
train_data_images = []
train_data_labels = []
test_data_images = []
test_data_labels = []X_train, y_train = mnist.train.images, mnist.train.labels  # 返回的 X_train 是 numpy 下的 多維數組，(55000, 784), (55000,)
X_test, y_test = mnist.test.images, mnist.test.labels  # (10000, 784), (10000,)
X_valid, y_valid = mnist.validation.images, mnist.validation.labels  # (5000, 784),(5000, )train_data_images = np.concatenate((X_train, X_valid), axis=0)  # (60000, 784)
train_data_labels = np.concatenate((y_train, y_valid), axis=0)  # (60000,)
test_data_images = X_test  # (10000, 784)
test_data_labels = y_test  # (10000,)# 變形
train_data_images = np.reshape(train_data_images, [-1, 1, 28, 28])  # (60000, 1, 28, 28)
test_data_images = np.reshape(test_data_images, [-1, 1, 28, 28])  # (10000, 1, 28, 28)# 訓練過程
train_loss = 0
train_correct = 0
total = 0
for epoch in range(epochs=100):# iterate_minibatches（）對train_data_images, train_data_labels批量劃分，shuffle=True，表示亂序# iterate_minibatches（）返回的是一個迭代器對象for data, target in iterate_minibatches(train_data_images, train_data_labels, batch_size=8, shuffle=True):optimizer.zero_grad()  # 清除梯度output = net(data)loss = criterion(output, target)  # 計算誤差loss.backward() # 后向傳播optimizer.step()train_loss += loss.item()  # 計算1個epoch的loss和# 將預測得分最大的類別作為最終的預測類別，元組pred = torch.max(output, 1)  # max函數會返回兩個tensor，第一個tensor是每行的最大值；第二個tensor是每行最大值的索引。train_correct += np.sum(pred[1] == target)  # 計算1個epoch的accuracy，這里是通過判斷最大預測得分的類別索引和真實標簽的索引是否一致，是則表示預測成功total += target.size(0)Train_Accuracy = train_correct / total
Train_Loss = train_loss# 測試過程
test_loss = 0
test_correct = 0
total = 0
for data, target in iterate_minibatches(test_data_images, test_data_labels, batch_size=8, shuffle=True):output = net(data)loss = criterion(output, target)test_loss += loss.item()pred = torch.max(output, 1)test_correct += np.sum(pred[1] == target)total += target.size(0)Test_Accuracy = test_correct / total
Test_Loss = test_loss# 保存模型
model_out_path = "model.pth"
torch.save(net, model_out_path)
print("Checkpoint saved to {}".format(model_out_path))

2、搭建模型的四種方法

1）方法一——利用nn.Sequential（）

import torch.nn as nnclass Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 = nn.Sequential(  # input shape (1, 28, 28)nn.Conv2d(1, 16, 5, 1, 2),  # output shape (16, 28, 28)nn.ReLU(),nn.MaxPool2d(2),  # output shape (16, 14, 14))self.conv2 = nn.Sequential(nn.Conv2d(16, 32, 5, 1, 2),  # output shape (32, 14, 14)nn.ReLU(),nn.MaxPool2d(2),  # output shape (32, 7, 7))self.linear = nn.Linear(32 * 7 * 7, 10)def forward(self, x):x = self.conv1(x)x = self.conv2(x)x = x.view(x.size(0), -1)output = self.linear(x)return output

2）方法二——利用collections.orderDict()

import torch.nn as nn
from collections import OrderedDict  # OrderedDict是字典的子類，可以記住元素的添加順序class Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 = nn.Sequential(OrderedDict([('conv1', nn.Conv2d(1, 16, 5, 1, 2)),('ReLU1', nn.ReLU()),('pool1', nn.MaxPool2d(2)),]))self.conv2 = nn.Sequential(OrderedDict([('conv2', nn.Conv2d(16, 32, 5, 1, 2)),('ReLU2', nn.ReLU()),('pool2', nn.MaxPool2d(2)),]))self.linear = nn.Linear(32 * 7 * 7, 10)def forward(self, x):x = self.conv1(x)x = self.conv2(x)x = x.view(x.size(0), -1)output = self.linear(x)return output

3）方法三—— 先創建容器類，然后使用add_module函數向里面添加新模塊

import torch.nn as nnclass Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 = nn.Sequential()self.conv1.add_module('conv1', nn.Conv2d(1, 16, 5, 1, 2))self.conv1.add_module('ReLU1', nn.ReLU())self.conv1.add_module('pool1', nn.MaxPool2d(2))self.conv2 = nn.Sequential()self.conv2.add_module('conv2', nn.Conv2d(16, 32, 5, 1, 2))self.conv2.add_module('ReLU2', nn.ReLU())self.conv2.add_module('pool2', nn.MaxPool2d(2))self.linear = nn.Linear(32 * 7 * 7, 10)def forward(self, x):x = self.conv1(x)x = self.conv2(x)x = x.view(x.size(0), -1)output = self.linear(x)return output

4）方法四——利用nn.function中的函數

import torch.nn as nn
import torch.nn.functional as Fclass Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)self.linear = nn.Linear(32 * 7 * 7, 10)def forward(self, x):x = F.max_pool2d(F.relu(self.conv1(x)), 2)x = F.max_pool2d(F.relu(self.conv2(x)), 2)output = self.linear(x)return output

3、VGG16搭建

from torch import nnclass VGG16(nn.Module):def __init__(self,num_classes = 1000):super(VGG16,self).__init__() # 繼承父類屬性和方法# 根據前向傳播的順序，搭建各個子網絡模塊## 十三個卷積層，每個卷積模塊都有卷積層、激活層和池化層，用nn.Sequential()這個容器將各個模塊存放起來# [1,3,224,224]self.conv1 = nn.Sequential(nn.Conv2d(3,64,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,64,224,224]self.conv2 = nn.Sequential(nn.Conv2d(64,64,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算nn.MaxPool2d((2,2),(2,2)))# [1,64,112,112]self.conv3 = nn.Sequential(nn.Conv2d(64,128,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,128,112,112]self.conv4 = nn.Sequential(nn.Conv2d(128,128,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算nn.MaxPool2d((2,2),(2,2)))# [1,128,56,56]self.conv5 = nn.Sequential(nn.Conv2d(128,256,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,256,56,56]self.conv6 = nn.Sequential(nn.Conv2d(256,256,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,256,56,56]self.conv7 = nn.Sequential(nn.Conv2d(256,256,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算nn.MaxPool2d((2,2),(2,2)))# [1,256,28,28]self.conv8 = nn.Sequential(nn.Conv2d(256,512,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True))# [1,512,28,28]self.conv9 = nn.Sequential(nn.Conv2d(512,512,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True))# [1,512,28,28]self.conv10 = nn.Sequential(nn.Conv2d(512,512,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True),nn.MaxPool2d((2,2),(2,2)))# [1,512,14,14]self.conv11 = nn.Sequential(nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),nn.ReLU(inplace=True),)# [1,512,14,14]self.conv12 = nn.Sequential(nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),nn.ReLU(inplace=True),)# [1,512,14,14]-->[1,512,7,7]self.conv13 = nn.Sequential(nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),nn.ReLU(inplace=True),nn.MaxPool2d((2, 2), (2, 2)))# 三個全連接層，每個全連接層之間存在激活層和dropout層self.classfier = nn.Sequential(# [1*512*7*7]nn.Linear(1*512*7*7,4096),nn.ReLU(True),nn.Dropout(),# 4096nn.Linear(4096,4096),nn.ReLU(True),nn.Dropout(),# 4096-->1000nn.Linear(4096,num_classes))# 前向傳播函數def forward(self,x):# 十三個卷積層x = self.conv1(x)x = self.conv2(x)x = self.conv3(x)x = self.conv4(x)x = self.conv5(x)x = self.conv6(x)x = self.conv7(x)x = self.conv8(x)x = self.conv9(x)x = self.conv10(x)x = self.conv11(x)x = self.conv12(x)x = self.conv13(x)# 將圖像扁平化為一維向量,[1,512,7,7]-->1*512*7*7x = x.view(x.size(0),-1)# 三個全連接層output = self.classfier(x)return output## 測試
import torch
vgg16 = VGG16(21)
print(vgg16)input_ = torch.randn(1,3,224,224)
output = vgg16(input_)
print(output.shape)
print(output)

4、全卷積層實現方法

核心思想：其實就是將全連接層用卷積層去替換了，一般需要經過精心的設計，使得最后輸出的是【1,1，channels】的shape。這里以vgg16為例，vgg16最后的特征圖大小為【1,512,7,7】，若要變為1,1大小的特征圖，則可以使用7,7的卷積核進行卷積，然后利用num_classes個卷積核去進行卷積，最后就得到了特征圖【1，num_classes，1,1】，在輸出前使用激活函數得到分類得分。

		# 全卷積層self.conv14 = nn.Sequential(nn.Conv2d(512,num_classes,(7,7),(1,1)),nn.ReLU(inplace=True))

from torch import nnclass VGG16(nn.Module):def __init__(self,num_classes = 1000):super(VGG16,self).__init__() # 繼承父類屬性和方法# 根據前向傳播的順序，搭建各個子網絡模塊## 十三個卷積層，每個卷積模塊都有卷積層、激活層和池化層，用nn.Sequential()這個容器將各個模塊存放起來# [1,3,224,224]self.conv1 = nn.Sequential(nn.Conv2d(3,64,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,64,224,224]self.conv2 = nn.Sequential(nn.Conv2d(64,64,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算nn.MaxPool2d((2,2),(2,2)))# [1,64,112,112]self.conv3 = nn.Sequential(nn.Conv2d(64,128,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,128,112,112]self.conv4 = nn.Sequential(nn.Conv2d(128,128,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算nn.MaxPool2d((2,2),(2,2)))# [1,128,56,56]self.conv5 = nn.Sequential(nn.Conv2d(128,256,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,256,56,56]self.conv6 = nn.Sequential(nn.Conv2d(256,256,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算)# [1,256,56,56]self.conv7 = nn.Sequential(nn.Conv2d(256,256,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True), # inplace = True表示是否進行覆蓋計算nn.MaxPool2d((2,2),(2,2)))# [1,256,28,28]self.conv8 = nn.Sequential(nn.Conv2d(256,512,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True))# [1,512,28,28]self.conv9 = nn.Sequential(nn.Conv2d(512,512,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True))# [1,512,28,28]self.conv10 = nn.Sequential(nn.Conv2d(512,512,(3,3),(1,1),(1,1)),nn.ReLU(inplace=True),nn.MaxPool2d((2,2),(2,2)))# [1,512,14,14]self.conv11 = nn.Sequential(nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),nn.ReLU(inplace=True),)# [1,512,14,14]self.conv12 = nn.Sequential(nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),nn.ReLU(inplace=True),)# [1,512,14,14]-->[1,512,7,7]self.conv13 = nn.Sequential(nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),nn.ReLU(inplace=True),nn.MaxPool2d((2, 2), (2, 2)))# 全卷積層self.conv14 = nn.Sequential(nn.Conv2d(512,num_classes,(7,7),(1,1)),nn.ReLU(inplace=True))# 前向傳播函數def forward(self,x):# 十三個卷積層x = self.conv1(x)x = self.conv2(x)x = self.conv3(x)x = self.conv4(x)x = self.conv5(x)x = self.conv6(x)x = self.conv7(x)x = self.conv8(x)x = self.conv9(x)x = self.conv10(x)x = self.conv11(x)x = self.conv12(x)x = self.conv13(x)x = self.conv14(x)# 將圖像扁平化為一維向量,[1,512,7,7]-->1*512*7*7output = x.view(x.size(0),-1)return output## 測試
import torch
vgg16 = VGG16(21)
print(vgg16)input_ = torch.randn(1,3,224,224)
output = vgg16(input_)
print(output.shape)
print(output)

5、保存各個子模塊的輸出特征層

?在forward函數中，將需要保存的特征層的輸出保存在列表中即可，這里以ssd中的為例，其中feathers就是將需要的幾個特征圖保存了起來，便于后續進行特征圖訓練，實現多尺度的訓練。

    def forward(self, x):features = []for i in range(23):x = self.vgg[i](x)s = self.l2_norm(x)  # Conv4_3 L2 normalizationfeatures.append(s)# apply vgg up to fc7for i in range(23, len(self.vgg)):x = self.vgg[i](x)features.append(x)for k, v in enumerate(self.extras):x = F.relu(v(x), inplace=True)if k % 2 == 1:features.append(x)return tuple(features)