基于UNet和camvid數據集的道路分割

基于UNet和camvid數據集的道路分割h(1.3.0+)：

背景
語義分割是深度學習中的一個非常重要的研究方向，并且UNet是語義分割中一個非常經典的模型。在本次博客中，我嘗試用UNet對camvid dataset數據集進行道路分割，大致期望的效果如下：

原圖

道路分割效果

本博客的代碼參考了以下鏈接：

https://github.com/milesial/Pytorch-UNet
https://github.com/qubvel/segmentation_models.pytorch
1
2
數據集介紹及處理
之前的博客里，我幾乎不怎么介紹數據集，因為用到的數據集比較簡單；但是在使用camvid dataset的時候，我腦袋都大了，用了兩三個小時才搞清楚這個數據集到底是啥情況。

數據集下載鏈接
雖然數據集的主頁還可以訪問，但是下載鏈接好像都失效了，所以最后還是用了aws上存儲鏈接。

https://s3.amazonaws.com/fast-ai-imagelocal/camvid.tgz
1
數據說明
camvid數據集里包括三種重要信息，分別是RGB影像、語義分割圖和標簽說明。
RGB影像就不用多少了，為三通道RGB。
語義分割圖為單通道，其中像素值代表了當前像素的類別，其對應關系存儲在標簽說明里。
標簽說明對應了語義分割圖像素值和類別的關系，如下：

0?? ? Animal
1?? ? Archway
2?? ? Bicyclist
3?? ? Bridge
4 ?? ?Building
5?? ? Car
6?? ? CartLuggagePram
7?? ? Child
8?? ? Column_Pole
9 ?? ? Fence
10 LaneMkgsDriv
11 LaneMkgsNonDriv
12 Misc_Text
13 MotorcycleScooter
14 OtherMoving
15 ParkingBlock
16 Pedestrian
17 Road
18 RoadShoulder
19 Sidewalk
20 SignSymbol
21 Sky
22 SUVPickupTruck
23 TrafficCone
24 TrafficLight
25 Train
26 Tree
27 Truck_Bus
28 Tunnel
29 VegetationMisc
30 Void
31 Wall
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
數據處理
下載后數據后會有一個壓縮包，包括images和labels，分別對應的是RGB的影像和像素的標簽。
首先要做以下的一些處理，包括：

重命名labels的名稱，去掉名稱里的_P，保證labels和images的名稱一致
將原始數據集按照7:2:1的規則，分割成train:valid:test
rename.py
import os,sys

cur_path = 'D:/camvid/camvid/labels' # 你的數據集路徑

labels = os.listdir(cur_path)

for label in labels:
? ? old_label = str(label)
? ? new_label = label.replace('_P.png','.png')
? ? print(old_label, new_label)
? ? os.rename(os.path.join(cur_path,old_label),os.path.join(cur_path,new_label))
? ??
1
2
3
4
5
6
7
8
9
10
11
12
split_dataset.py
import os
import random
import shutil

# 數據集路徑
dataset_path = 'D:/camvid/camvid'
images_path = 'D:/camvid/camvid/images'
labels_path ? = 'D:/camvid/camvid/labels'

images_name = os.listdir(images_path)
images_num ?= len(images_name)
alpha ?= int( images_num ?* 0.7 )
beta ? = int( images_num ?* 0.9 )

print(images_num)

random.shuffle(images_name)

train_list = images_name[0:alpha]
valid_list = images_name[alpha:beta]
test_list ?= images_name[beta:images_num]

# 確認分割正確
print('train list: ',len(train_list))
print('valid list: ',len(valid_list))
print('test list: ',len(test_list))
print('total num: ',len(test_list)+len(valid_list)+len(train_list))

# 創建train,valid和test的文件夾
train_images_path = os.path.join(dataset_path,'train_images')
train_labels_path ?= os.path.join(dataset_path,'train_labels')
if os.path.exists(train_images_path)==False:
? ? os.mkdir(train_images_path )
if os.path.exists(train_labels_path)==False:
? ? os.mkdir(train_labels_path)

valid_images_path = os.path.join(dataset_path,'valid_images')
valid_labels_path ?= os.path.join(dataset_path,'valid_labels')
if os.path.exists(valid_images_path)==False:
? ? os.mkdir(valid_images_path )
if os.path.exists(valid_labels_path)==False:
? ? os.mkdir(valid_labels_path)

test_images_path = os.path.join(dataset_path,'test_images')
test_labels_path ?= os.path.join(dataset_path,'test_labels')
if os.path.exists(test_images_path)==False:
? ? os.mkdir(test_images_path )
if os.path.exists(test_labels_path)==False:
? ? os.mkdir(test_labels_path)

# 拷貝影像到指定目錄
for image in train_list:
? ? shutil.copy(os.path.join(images_path,image), os.path.join(train_images_path,image))
? ? shutil.copy(os.path.join(labels_path,image), os.path.join(train_labels_path,image))

for image in valid_list:
? ? shutil.copy(os.path.join(images_path,image), os.path.join(valid_images_path,image))
? ? shutil.copy(os.path.join(labels_path,image), os.path.join(valid_labels_path,image))

for image in test_list:
? ? shutil.copy(os.path.join(images_path,image), os.path.join(test_images_path,image))
? ? shutil.copy(os.path.join(labels_path,image), os.path.join(test_labels_path,image))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
代碼
代碼鏈接：https://github.com/Yannnnnnnnnnnn/learnPyTorch/blob/master/road%20segmentation%20(camvid).ipynb

# 導入庫
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import numpy as np
import cv2
import matplotlib.pyplot as plt

# 設置數據集路徑
DATA_DIR = 'D:/camvid/camvid' # 根據自己的路徑來設置

x_train_dir = os.path.join(DATA_DIR, 'train_images')
y_train_dir = os.path.join(DATA_DIR, 'train_labels')

x_valid_dir = os.path.join(DATA_DIR, 'valid_images')
y_valid_dir = os.path.join(DATA_DIR, 'valid_labels')

x_test_dir = os.path.join(DATA_DIR, 'test_images')
y_test_dir = os.path.join(DATA_DIR, 'test_labels')

# 導入pytorch
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
import torch.nn as nn
import torch.nn.functional as F
from torch import optim

# 自定義Dataloader
class Dataset(BaseDataset):
? ? """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
? ??
? ? Args:
? ? ? ? images_dir (str): path to images folder
? ? ? ? masks_dir (str): path to segmentation masks folder
? ? ? ? class_values (list): values of classes to extract from segmentation mask
? ? ? ? augmentation (albumentations.Compose): data transfromation pipeline?
? ? ? ? ? ? (e.g. flip, scale, etc.)
? ? ? ? preprocessing (albumentations.Compose): data preprocessing?
? ? ? ? ? ? (e.g. noralization, shape manipulation, etc.)
? ??
? ? """
? ??
? ? def __init__(
? ? ? ? ? ? self,?
? ? ? ? ? ? images_dir,?
? ? ? ? ? ? masks_dir,?
? ? ? ? ? ? augmentation=None,
? ? ):
? ? ? ? self.ids = os.listdir(images_dir)
? ? ? ? self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
? ? ? ? self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]
? ? ? ??
? ? ? ? self.augmentation = augmentation

? ??
? ? def __getitem__(self, i):
? ? ? ? ? ? ? ??
? ? ? ? # read data
? ? ? ? image = cv2.imread(self.images_fps[i])
? ? ? ? image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
? ? ? ? mask = cv2.imread(self.masks_fps[i], 0)
? ? ? ??
? ? ? ? #　抱歉代碼寫的這么粗暴，意思就是講mask里的道路設置為前景，而其他設置為背景
? ? ? ? # road
? ? ? ? mask = (mask==17)
? ? ? ? mask = mask.astype('float') ??
? ? ? ??
? ? ? ? # apply augmentations
? ? ? ? if self.augmentation:
? ? ? ? ? ? sample = self.augmentation(image=image, mask=mask)
? ? ? ? ? ? image, mask = sample['image'], sample['mask']
? ? ? ?
? ? ? ? # 這里必須設置一個mask的shape，因為前邊的形狀是（320,320）
? ? ? ? return image, mask.reshape(1,320,320)
? ? ? ??
? ? def __len__(self):
? ? ? ? return len(self.ids)

# 數據增強
# 關于albumentations 怎么用我就不廢話了
# 需要說明的是，我本身是打算用pytorch自帶的transform
# 然而我實在沒有搞明白，怎么同時對image和mask進行增強
# 如果連續調用兩次transform，那么image和mask的增強方式都不一致，肯定不行
# 如果將[image;mask]堆砌在一起，放到transform里，image和mask的增強方式倒是一樣了，但是transform最后一步的toTensor會把mask歸一化，這肯定也是不行的
import albumentations as albu
def get_training_augmentation():
? ? train_transform = [
? ? ? ? albu.HorizontalFlip(p=0.5),
? ? ? ? albu.Resize(height=320, width=320, always_apply=True),
? ? ? ? albu.ShiftScaleRotate(scale_limit=0.1, rotate_limit=20, shift_limit=0.1, p=1, border_mode=0),
? ? ]
? ? return albu.Compose(train_transform)

def get_test_augmentation():
? ? train_transform = [
? ? ? ? albu.Resize(height=320, width=320, always_apply=True),
? ? ]
? ? return albu.Compose(train_transform) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ??

augmented_dataset = Dataset(
? ? x_train_dir,?
? ? y_train_dir,?
? ? augmentation=get_training_augmentation(),?
)

# 定義UNet的基本模塊
# 代碼來自https://github.com/milesial/Pytorch-UNet
class DoubleConv(nn.Module):
? ? """(convolution => [BN] => ReLU) * 2"""

? ? def __init__(self, in_channels, out_channels):
? ? ? ? super().__init__()
? ? ? ? self.double_conv = nn.Sequential(
? ? ? ? ? ? nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
? ? ? ? ? ? nn.BatchNorm2d(out_channels),
? ? ? ? ? ? nn.ReLU(inplace=True),
? ? ? ? ? ? nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
? ? ? ? ? ? nn.BatchNorm2d(out_channels),
? ? ? ? ? ? nn.ReLU(inplace=True)
? ? ? ? )

? ? def forward(self, x):
? ? ? ? return self.double_conv(x)

class Down(nn.Module):
? ? """Downscaling with maxpool then double conv"""

? ? def __init__(self, in_channels, out_channels):
? ? ? ? super().__init__()
? ? ? ? self.maxpool_conv = nn.Sequential(
? ? ? ? ? ? nn.MaxPool2d(2),
? ? ? ? ? ? DoubleConv(in_channels, out_channels)
? ? ? ? )

? ? def forward(self, x):
? ? ? ? return self.maxpool_conv(x)

class Up(nn.Module):
? ? """Upscaling then double conv"""

? ? def __init__(self, in_channels, out_channels, bilinear=True):
? ? ? ? super().__init__()

? ? ? ? # if bilinear, use the normal convolutions to reduce the number of channels
? ? ? ? if bilinear:
? ? ? ? ? ? self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
? ? ? ? else:
? ? ? ? ? ? self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)

? ? ? ? self.conv = DoubleConv(in_channels, out_channels)

? ? def forward(self, x1, x2):
? ? ? ? x1 = self.up(x1)
? ? ? ? # input is CHW
? ? ? ? diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
? ? ? ? diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

? ? ? ? x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
? ? ? ? ? ? ? ? ? ? ? ? diffY // 2, diffY - diffY // 2])
? ? ? ? # if you have padding issues, see
? ? ? ? # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
? ? ? ? # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
? ? ? ? x = torch.cat([x2, x1], dim=1)
? ? ? ? return self.conv(x)
? ? ? ??
class OutConv(nn.Module):
? ? def __init__(self, in_channels, out_channels):
? ? ? ? super(OutConv, self).__init__()
? ? ? ? self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

? ? def forward(self, x):
? ? ? ? return self.conv(x)

# UNet
class UNet(nn.Module):
? ? def __init__(self, n_channels, n_classes, bilinear=True):
? ? ? ? super(UNet, self).__init__()
? ? ? ? self.n_channels = n_channels
? ? ? ? self.n_classes = n_classes
? ? ? ? self.bilinear = bilinear

?? ??? ?# 考慮到我電腦的顯卡大小，我降低了參數~~，無奈之舉
? ? ? ? self.inc = DoubleConv(n_channels, 32)
? ? ? ? self.down1 = Down(32, 64)
? ? ? ? self.down2 = Down(64, 128)
? ? ? ? self.down3 = Down(128, 256)
? ? ? ? self.down4 = Down(256, 256)
? ? ? ? self.up1 = Up(512, 128, bilinear)
? ? ? ? self.up2 = Up(256, 64, bilinear)
? ? ? ? self.up3 = Up(128, 32, bilinear)
? ? ? ? self.up4 = Up(64, 32, bilinear)
? ? ? ? self.outc = OutConv(32, n_classes)
? ? ? ? self.out ?= torch.sigmoid #此處記得有sigmoid
? ? def forward(self, x):
? ? ? ? x1 = self.inc(x)
? ? ? ? x2 = self.down1(x1)
? ? ? ? x3 = self.down2(x2)
? ? ? ? x4 = self.down3(x3)
? ? ? ? x5 = self.down4(x4)
? ? ? ? x = self.up1(x5, x4)
? ? ? ? x = self.up2(x, x3)
? ? ? ? x = self.up3(x, x2)
? ? ? ? x = self.up4(x, x1)
? ? ? ? logits = self.outc(x)
? ? ? ? logits = self.out(logits)
? ? ? ? return logits

# 設置train數據集
# 原諒我偷懶，并沒有valid，因為我并沒有train多少epoch
train_dataset = Dataset(
? ? x_train_dir,?
? ? y_train_dir,?
? ? augmentation=get_training_augmentation(),?
)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# 準備訓練，定義模型，我只做了兩分類（偷懶）
# 另外，由于我修改了UNet模型，所以encoder部分，肯定不能用預訓練模型
# 并且，我真的很反感每次都用預訓練模型，沒啥成就感。。。
net = UNet(n_channels=3, n_classes=1)

# 訓練
from torch.autograd import Variable
net.cuda()

# 這里我說一下我是怎么train的
# 先lr=0.01,train大概40個epoch
# 然后lr=0.005,train大概40個epoch
# 最后在lr=0.0001,train大概20個epoch
optimizer = optim.RMSprop(net.parameters(), lr=0.4, weight_decay=1e-8)

# 這個loss是專門用于二分類的，吳恩達的課程我記得前幾節課就講了
criterion = nn.BCELoss()

device = 'cuda'
for epoch in range(10):
? ??
? ? net.train()
? ? epoch_loss = 0
? ??
? ? for data in train_loader:
? ? ? ??
? ? ? ? # 修改一下數據格式
? ? ? ? images,labels = data
? ? ? ? images = images.permute(0,3,1,2) # 交換通道順序
? ? ? ? images = images/255. # 把image的值歸一化到[0,1]
? ? ? ? images = Variable(images.to(device=device, dtype=torch.float32))
? ? ? ? labels = Variable(labels.to(device=device, dtype=torch.float32))
? ? ? ??

? ? ? ? pred = net(images)
? ? ? ??
? ? ? ? # 這里我不知道是看了哪里的代碼
? ? ? ? # 最開始犯傻寫成了 loss = criterion(pred.view(-1), labels.view(-1))
? ? ? ? # 結果loss很久都不下降
? ? ? ? # 還不知道為啥
? ? ? ? loss = criterion(pred, labels)
? ? ? ? epoch_loss += loss.item()
? ? ? ??
? ? ? ? optimizer.zero_grad()
? ? ? ? loss.backward()
? ? ? ? optimizer.step()
? ? ? ? print('loss: ', loss.item())
? ? ? ?
?# 測試
?test_dataset_noaug = Dataset(
? ? x_train_dir,?
? ? y_train_dir,
? ? augmentation=get_test_augmentation(),
? ? )

image, mask = test_dataset_noaug[77]
show_image = image
with torch.no_grad():
? ? image = image/255.
? ? image = image.astype('float32')
? ? image = torch.from_numpy(image)
? ? image = image.permute(2,0,1)
? ? image = image.to()
? ? print(image.shape)
? ??
? ? pred = net(image.unsqueeze(0).cuda())
? ? pred = pred.cpu()

# 大于0.5我才認為是對的
pred = pred>0.5
# 展示圖如下
visualize(image=show_image,GT=mask[0,:,:],Pred=pred[0,0,:,:])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

結果及分析
看一下最終結果，做一下分析討論，總結經驗。

結果
關于結果，這里隨便展示幾個吧，感覺還行。

分析
這是我第一次train分割的網絡，有一些經驗，寫一寫。

最開始train的時候，我比較心貪，用的原始分辨率的影像，720*960；結果網絡參數太多，根本train不了，而且訓練效果也不好；最后降采樣才正常了，且效果變好了。
在訓練之前，務必搞清楚數據集的格式，不然都不知道在train啥。
我在選擇分割對象的時候，其實最開始也是用car,但是明顯這個類別在影像里特別少，效果一直不好；最后選取了sky,road和wall這種樣本較多的，效果才比較好；這說明樣本數量還是很重要的。
————————————————
版權聲明：本文為CSDN博主「Stone_Yannn」的原創文章，遵循CC 4.0 BY-SA版權協議，轉載請附上原文出處鏈接及本聲明。
原文鏈接：https://blog.csdn.net/u012348774/article/details/104300366