SSD列子

一、介紹
本博文主要介紹實現通過SSD物體檢測方式實現工件裂紋檢測。裂紋圖像如下所示：

二、關于SSD算法

具體算法不再闡述，詳細請參考：
https://blog.csdn.net/u013989576/article/details/73439202
https://blog.csdn.net/xiaohu2022/article/details/79833786
https://www.sohu.com/a/168738025_717210

三、訓練數據的制作
訓練數據制作的時候選擇LabelImg，關于LabelImg的安裝使用請參考：https://blog.csdn.net/xunan003/article/details/78720189

關于選取裂紋數據的一點建議：建議選的檢測框數據一定要小，這樣方便收斂。

這里使用的是VOC2007的數據格式，文件夾下面一共三個子文件夾。

其中，Annotations文件夾存放的是LbaelImg制作數據生成的xml文件。

JPEGImages存放的是原圖像，.jpg格式。

ImageSets下面有一個Main文件夾，Main文件夾下面主要是四個txt文件。

分別對應訓練集、測試集、驗證集等。該文件夾中的四個txt文件，是從Annotations文件夾中隨機選取的圖像名稱，并按照一定的比例劃分。

從xml文件生成Main文件夾的四個txt文件，實現源碼如下：

import os
import random

trainval_percent = 0.9
train_percent = 0.9
xmlfilepath = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_two_class/Annotations'
txtsavepath = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_two_class/ImageSets/Main'
total_xml = os.listdir(xmlfilepath)

num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)

ftrainval = open(txtsavepath+'/trainval.txt', 'w')
ftest = open(txtsavepath+'/test.txt', 'w')
ftrain = open(txtsavepath+'/train.txt', 'w')
fval = open(txtsavepath+'/val.txt', 'w')

for i ?in list:
? ? name=total_xml[i][:-4]+'\n'
? ? if i in trainval:
? ? ? ? ftrainval.write(name)
? ? ? ? if i in train:
? ? ? ? ? ? ftrain.write(name)
? ? ? ? else:
? ? ? ? ? ? fval.write(name)
? ? else:
? ? ? ? ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest .close()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
四、訓練數據
訓練數據的文件為train_ssd300.py，顧名思義就是圖像的輸入是300x300，不過不用擔心，代碼內部已經實現轉換的程序，可以輸入任意尺寸的圖像，源碼如下：

from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
import tensorflow as tf

from keras import backend as K
from focal_loss import focal_loss

img_height = 300 # Height of the model input images
img_width = 300 # Width of the model input images
img_channels = 3 # Number of color channels of the model input images
mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
n_classes = 1 # 類的數量，不算背景
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
#一共在六個不同scale層次上進行采樣，最后一個1.05應該是無效的，scales中的數字代表生成檢測框的長度是feature map的長度的0.1，0.2，0.37，0.54.。。倍，
# 長寬比例對應在aspect_ratios中，不同scale采樣的anchor數量和比例也不相同
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
scales = scales_pascal
aspect_ratios = [[1.0, 2.0, 0.5],
? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5],
? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
two_boxes_for_ar1 = True
steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation
normalize_coords = True

# 加載或者重新建立一個模型，二者選其一
# 1: Build the Keras model.

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
? ? ? ? ? ? ? ? n_classes=n_classes,
? ? ? ? ? ? ? ? mode='training',
? ? ? ? ? ? ? ? l2_regularization=0.0005,
? ? ? ? ? ? ? ? scales=scales,
? ? ? ? ? ? ? ? aspect_ratios_per_layer=aspect_ratios,
? ? ? ? ? ? ? ? two_boxes_for_ar1=two_boxes_for_ar1,
? ? ? ? ? ? ? ? steps=steps,
? ? ? ? ? ? ? ? offsets=offsets,
? ? ? ? ? ? ? ? clip_boxes=clip_boxes,
? ? ? ? ? ? ? ? variances=variances,
? ? ? ? ? ? ? ? normalize_coords=normalize_coords,
? ? ? ? ? ? ? ? subtract_mean=mean_color,
? ? ? ? ? ? ? ? swap_channels=swap_channels)

# 2: Load some weights into the model.

# TODO: Set the path to the weights you want to load.
weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'

model.load_weights(weights_path, by_name=True)
model.summary()
# 3: Instantiate an optimizer and the SSD loss function and compile the model.
# ? ?If you want to follow the original Caffe implementation, use the preset SGD
# ? ?optimizer, otherwise I'd recommend the commented-out Adam optimizer.

# adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
sgd = SGD(lr=0.0001, momentum=0.9, decay=0.001, nesterov=False)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=sgd, loss=ssd_loss.compute_loss, metrics=['accuracy'])

# model.compile(optimizer=sgd, ?loss='categorical_crossentropy', metrics=['accuracy'])
#模型加載結束

# 注意，這里出現了梯度爆炸

#加載數據

# 1: Instantiate two `DataGenerator` objects: One for training, one for validation.

# Optional: If you have enough memory, consider loading the images into memory for the reasons explained above.

train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.

# TODO: Set the paths to the datasets here.

# The directories that contain the images.
VOC_2007_images_dir ? ? ?= 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/JPEGImages/'
# VOC_2012_images_dir ? ? ?= '../../datasets/VOCdevkit/VOC2012/JPEGImages/'

# The directories that contain the annotations.
VOC_2007_annotations_dir ? ? ?= 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/Annotations/'
# VOC_2012_annotations_dir ? ? ?= '../../datasets/VOCdevkit/VOC2012/Annotations/'

# The paths to the image sets.
VOC_2007_train_image_set_filename ? ?= 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/train.txt'
# VOC_2012_train_image_set_filename ? ?= '../../datasets/VOCdevkit/VOC2012/ImageSets/Main/train.txt'
VOC_2007_val_image_set_filename ? ? ?= 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/val.txt'
# VOC_2012_val_image_set_filename ? ? ?= '../../datasets/VOCdevkit/VOC2012/ImageSets/Main/val.txt'
VOC_2007_trainval_image_set_filename = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/trainval.txt'
# VOC_2012_trainval_image_set_filename = '../../datasets/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt'
VOC_2007_test_image_set_filename ? ? = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/test.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
# classes = ['background',
# ? ? ? ? ? ?'aeroplane', 'bicycle', 'bird', 'boat',
# ? ? ? ? ? ?'bottle', 'bus', 'car', 'cat',
# ? ? ? ? ? ?'chair', 'cow', 'diningtable', 'dog',
# ? ? ? ? ? ?'horse', 'motorbike', 'person', 'pottedplant',
# ? ? ? ? ? ?'sheep', 'sofa', 'train', 'tvmonitor']

classes = ['background','neg']#類的名稱，此時要加上background

train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
? ? ? ? ? ? ? ? ? ? ? ? image_set_filenames=[VOC_2007_trainval_image_set_filename],
? ? ? ? ? ? ? ? ? ? ? ? annotations_dirs=[VOC_2007_annotations_dir],
? ? ? ? ? ? ? ? ? ? ? ? classes=classes,
? ? ? ? ? ? ? ? ? ? ? ? include_classes='all',
? ? ? ? ? ? ? ? ? ? ? ? exclude_truncated=False,
? ? ? ? ? ? ? ? ? ? ? ? exclude_difficult=False,
? ? ? ? ? ? ? ? ? ? ? ? ret=False)

val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
? ? ? ? ? ? ? ? ? ? ? image_set_filenames=[VOC_2007_test_image_set_filename],
? ? ? ? ? ? ? ? ? ? ? annotations_dirs=[VOC_2007_annotations_dir],
? ? ? ? ? ? ? ? ? ? ? classes=classes,
? ? ? ? ? ? ? ? ? ? ? include_classes='all',
? ? ? ? ? ? ? ? ? ? ? exclude_truncated=False,
? ? ? ? ? ? ? ? ? ? ? exclude_difficult=True,
? ? ? ? ? ? ? ? ? ? ? ret=False)

# Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
# speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
# option in the constructor, because in that cas the images are in memory already anyway. If you don't
# want to create HDF5 datasets, comment out the subsequent two function calls.

train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? resize=False,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? variable_image_size=True,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? verbose=True)

val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? resize=False,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? variable_image_size=True,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? verbose=True)

# 3: Set the batch size.

batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues.

# 4: Set the image transformations for pre-processing and data augmentation options.

# For the training generator:
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? img_width=img_width,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height, width=img_width)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
? ? ? ? ? ? ? ? ? ?model.get_layer('fc7_mbox_conf').output_shape[1:3],
? ? ? ? ? ? ? ? ? ?model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
? ? ? ? ? ? ? ? ? ?model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
? ? ? ? ? ? ? ? ? ?model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
? ? ? ? ? ? ? ? ? ?model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? img_width=img_width,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? n_classes=n_classes,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? predictor_sizes=predictor_sizes,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? scales=scales,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? aspect_ratios_per_layer=aspect_ratios,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? two_boxes_for_ar1=two_boxes_for_ar1,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? steps=steps,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? offsets=offsets,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? clip_boxes=clip_boxes,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? variances=variances,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? matching_type='multi',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? pos_iou_threshold=0.5,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? neg_iou_limit=0.5,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? normalize_coords=normalize_coords)

# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

train_generator = train_dataset.generate(batch_size=batch_size,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?shuffle=True,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?transformations=[ssd_data_augmentation],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?label_encoder=ssd_input_encoder,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?returns={'processed_images',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 'encoded_labels'},
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?keep_images_without_gt=False)

val_generator = val_dataset.generate(batch_size=batch_size,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?shuffle=False,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?transformations=[convert_to_3_channels,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? resize],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?label_encoder=ssd_input_encoder,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?returns={'processed_images',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 'encoded_labels'},
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?keep_images_without_gt=False)

# Get the number of samples in the training and validations datasets.
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size ? = val_dataset.get_dataset_size()

print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))
print("cuiwei")

def lr_schedule(epoch):#通過回調函數設置學習率
? ? if epoch < 80:
? ? ? ? return 0.0001
? ? elif epoch < 100:
? ? ? ? return 0.0001
? ? else:
? ? ? ? return 0.00001

# Define model callbacks.

# TODO: Set the filepath under which you want to save the model.
model_checkpoint = ModelCheckpoint(filepath='ssd300_model_liehen_expand.h5',#模型保存名稱
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?monitor='val_loss',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?verbose=1,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?save_best_only=True,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?save_weights_only=False,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?mode='auto',
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?period=1)
#model_checkpoint.best =

csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
? ? ? ? ? ? ? ? ? ? ? ?separator=',',
? ? ? ? ? ? ? ? ? ? ? ?append=True)

learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? verbose=1)

terminate_on_nan = TerminateOnNaN()

callbacks = [model_checkpoint,
? ? ? ? ? ? ?csv_logger,
? ? ? ? ? ? ?learning_rate_scheduler,
? ? ? ? ? ? ?terminate_on_nan]

# If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
initial_epoch ? = 0
final_epoch ? ? = 20
steps_per_epoch = 80

history = model.fit_generator(generator=train_generator,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? steps_per_epoch=steps_per_epoch,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? epochs=final_epoch,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? callbacks=callbacks,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? validation_data=val_generator,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? validation_steps=ceil(val_dataset_size/batch_size),
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? initial_epoch=initial_epoch)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
五、測試數據
訓練完成后，對模型進行測試，test_ssd300.py文件，源代碼如下：

from keras import backend as K
from keras.models import load_model
from keras.preprocessing import image
from keras.optimizers import Adam
from imageio import imread
import numpy as np
from matplotlib import pyplot as plt

from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
import cv2

# Set the image size.
img_height = 300
img_width = 300

# # TODO: Set the path to the `.h5` file of the model to be loaded.
# # model_path = 'ssd300_model.h5'
# model_path = 'VGG_VOC0712Plus_SSD_300x300_iter_240000.h5'
# # We need to create an SSDLoss object in order to pass that to the model loader.
# ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
#
# K.clear_session() # Clear previous models from memory.
#
# model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
# ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?'L2Normalization': L2Normalization,
# ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?'DecodeDetections': DecodeDetections,
# ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?'compute_loss': ssd_loss.compute_loss})
K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, 3),
? ? ? ? ? ? ? ? n_classes=1,
? ? ? ? ? ? ? ? mode='inference',
? ? ? ? ? ? ? ? l2_regularization=0.0005,
? ? ? ? ? ? ? ? scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
? ? ? ? ? ? ? ? aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5, 3.0, 1.0/3.0],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5],
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?[1.0, 2.0, 0.5]],
? ? ? ? ? ? ? ? two_boxes_for_ar1=True,
? ? ? ? ? ? ? ? steps=[8, 16, 32, 64, 100, 300],
? ? ? ? ? ? ? ? offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
? ? ? ? ? ? ? ? clip_boxes=False,
? ? ? ? ? ? ? ? variances=[0.1, 0.1, 0.2, 0.2],
? ? ? ? ? ? ? ? normalize_coords=True,
? ? ? ? ? ? ? ? subtract_mean=[123, 117, 104],
? ? ? ? ? ? ? ? swap_channels=[2, 1, 0],
? ? ? ? ? ? ? ? confidence_thresh=0.5,
? ? ? ? ? ? ? ? iou_threshold=0.45,
? ? ? ? ? ? ? ? top_k=200,
? ? ? ? ? ? ? ? nms_max_output_size=400)

# 2: Load the trained weights into the model.

# TODO: Set the path of the trained weights.
# weights_path ='VGG_VOC0712Plus_SSD_300x300_iter_240000.h5'
# weights_path ='ssd300_model_liehen_small.h5'
# weights_path ='ssd300_model_liehen_expand.h5'
weights_path ='ssd300_model_liehen.h5'
model.load_weights(weights_path, by_name=True)

# 3: Compile the model so that Keras won't complain the next time you load it.

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
model.summary()

orig_images = [] # Store the images here.
input_images = [] # Store resized versions of the images here.

# We'll only load one image in this example.
# img_path = 'VOC2007/JPEGImages/16.jpg'
img_path='F:/Data/crack image/ChallengeDataset/ChallengeDataset/train/neg/428.jpg'
image_opencv=cv2.imread(img_path)
# img_path='VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/000001.jpg'
orig_images.append(imread(img_path))
img = image.load_img(img_path, target_size=(img_height, img_width))
img = image.img_to_array(img)
input_images.append(img)
input_images = np.array(input_images)

#對新的圖像進行預測
y_pred = model.predict(input_images)
#
confidence_threshold = 0

y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]

np.set_printoptions(precision=2, suppress=True, linewidth=90)
print("Predicted boxes:\n")
print(' ? class ? conf xmin ? ymin ? xmax ? ymax')
print(y_pred_thresh[0])

# Display the image and draw the predicted boxes onto it.

# Set the colors for the bounding boxes
colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
# classes = ['background',
# ? ? ? ? ? ?'aeroplane', 'bicycle', 'bird', 'boat',
# ? ? ? ? ? ?'bottle', 'bus', 'car', 'cat',
# ? ? ? ? ? ?'chair', 'cow', 'diningtable', 'dog',
# ? ? ? ? ? ?'horse', 'motorbike', 'person', 'pottedplant',
# ? ? ? ? ? ?'sheep', 'sofa', 'train', 'tvmonitor']

classes=['background','neg']

plt.figure(figsize=(20,12))
plt.imshow(orig_images[0])

current_axis = plt.gca()

for box in y_pred_thresh[0]:
? ? # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
? ? xmin = box[2] * orig_images[0].shape[1] / img_width
? ? ymin = box[3] * orig_images[0].shape[0] / img_height
? ? xmax = box[4] * orig_images[0].shape[1] / img_width
? ? ymax = box[5] * orig_images[0].shape[0] / img_height
? ? color = colors[int(box[0])]
? ? label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
? ? current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2))
? ? current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})
? ? cv2.putText(image_opencv, label, (int(xmin), int(ymin)-10), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 255, 0), 1)
? ? cv2.rectangle(image_opencv, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)

cv2.namedWindow("Canvas",0)
cv2.imshow("Canvas", image_opencv)
cv2.waitKey(0)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
測試結果：

六、源代碼和數據
SSD裂紋檢測源代碼：https://download.csdn.net/download/qq_29462849/10748838
裂紋圖像數據：https://download.csdn.net/download/qq_29462849/10748828

七、需要注意的問題
該SSD源代碼，在設置學習率的時候，需要設置小些，本代碼中設置0.0001，設置過大，會導致梯度爆炸，這個親身體驗過~~~
在制作數據的時候，需要把前景和背景區別開來，對裂紋比較明顯的特征，檢測框可以設置小些，而且盡量不要包含無關的背景；對裂紋不明顯的特征，需要設置檢測框大些，不明顯的特征只能通過長裂紋來和背景區分。
八、另外一個思路
如果不需要定位裂紋在圖像中的位置，只需要識別整幅圖像是否有裂紋，并以此來做識別分類。有一個思路很好，那就是對整幅圖像進行切分，比如在y軸方向做切割，切割成四份圖像，如下圖所示。因為裂紋大都是處于同一水平位置上的，研y軸進行切割，可以最大程度保留完整的裂紋。把切割好的裂紋和非裂紋圖像挑選出來，分別給予標簽，這樣送進分類網絡，比如DenseNet中進行訓練。

網絡訓練完成后，就可以對場景圖像進行識別了，只不過在識別的過程中同樣需要對場景圖像進行切分，然后對每一個切片圖像進行分類識別。判斷依據，只要場景圖像的切片圖像有一個被分類為有裂紋的，那該場景圖像就是有裂紋的。

關于切片的方向和數量，可以根據自己工況的需要來完成，不一定非得是四份。

如有疑問，歡迎加企鵝516999497交流~

————————————————
版權聲明：本文為CSDN博主「Oliver Cui」的原創文章，遵循 CC 4.0 BY-SA 版權協議，轉載請附上原文出處鏈接及本聲明。
原文鏈接：https://blog.csdn.net/qq_29462849/article/details/83472430