數據集簡介:
訓練集共有2160張貓的圖片, 分為12類. train_list.txt是其標注文件
測試集共有240張貓的圖片. 不含標注信息.
訓練集圖像(部分)
驗證集圖像(部分)
標簽
部分代碼:
# 定義訓練數據集
class TrainData(Dataset):def __init__(self):super().__init__()self.color_jitter = T.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05)self.normalize = T.Normalize(mean=0, std=1)self.random_crop = T.RandomCrop(224, pad_if_needed=True)def __getitem__(self, index):# 讀取圖片image_path = train_paths[index]image = np.array(Image.open(image_path)) # H, W, Ctry:image = image.transpose([2, 0, 1])[:3] # C, H, Wexcept:image = np.array([image, image, image]) # C, H, W# 圖像增廣features = self.color_jitter(image.transpose([1, 2, 0]))features = self.random_crop(features)features = self.normalize(features.transpose([2, 0, 1])).astype(np.float32)# 讀取標簽labels = train_labels[index]return features, labelsdef __len__(self):return len(train_paths)# 定義驗證數據集
class ValidData(Dataset):def __init__(self):super().__init__()self.normalize = T.Normalize(mean=0, std=1)def __getitem__(self, index):# 讀取圖片image_path = valid_paths[index]image = np.array(Image.open(image_path)) # H, W, Ctry:image = image.transpose([2, 0, 1])[:3] # C, H, Wexcept:image = np.array([image, image, image]) # C, H, W# 圖像變換features = cv2.resize(image.transpose([1, 2, 0]), (256, 256)).transpose([2, 0, 1]).astype(np.float32)features = self.normalize(features)# 讀取標簽labels = valid_labels[index]return features, labelsdef __len__(self):return len(valid_paths)
# 調用resnet50模型
paddle.vision.set_image_backend('cv2')
model = paddle.vision.models.resnet50(pretrained=True, num_classes=12)# 定義數據迭代器
train_dataloader = DataLoader(train_data, batch_size=256, shuffle=True, drop_last=False)# 定義優化器
opt = paddle.optimizer.Adam(learning_rate=1e-4, parameters=model.parameters(), weight_decay=paddle.regularizer.L2Decay(1e-4))# 定義損失函數
loss_fn = paddle.nn.CrossEntropyLoss()# 設置gpu環境
paddle.set_device('gpu:0')# 整體訓練流程
for epoch_id in range(15):model.train()for batch_id, data in enumerate(train_dataloader()):# 讀取數據features, labels = datafeatures = paddle.to_tensor(features)labels = paddle.to_tensor(labels)# 前向傳播predicts = model(features)# 損失計算loss = loss_fn(predicts, labels)# 反向傳播avg_loss = paddle.mean(loss)avg_loss.backward()# 更新opt.step()# 清零梯度opt.clear_grad()# 打印損失if batch_id % 2 == 0:print('epoch_id:{}, batch_id:{}, loss:{}'.format(epoch_id, batch_id, avg_loss.numpy()))model.eval()print('開始評估')i = 0acc = 0for image, label in valid_data:image = paddle.to_tensor([image])pre = list(np.array(model(image)[0]))max_item = max(pre)pre = pre.index(max_item)i += 1if pre == label:acc += 1if i % 10 == 0:print('精度:', acc / i)paddle.save(model.state_dict(), 'acc{}.model'.format(acc / i))
# 進行預測和提交
# 首先拿到預測文件的路徑列表def listdir(path, list_name):for file in os.listdir(path):file_path = os.path.join(path, file)if os.path.isdir(file_path):listdir(file_path, list_name)else:list_name.append(file_path)
test_path = []
listdir('cat_12_test', test_path)# 加載訓練好的模型
pre_model = paddle.vision.models.resnet50(pretrained=True, num_classes=12)
pre_model.set_state_dict(paddle.load('acc0.9285714285714286.model'))
pre_model.eval()pre_classes = []
normalize = T.Normalize(mean=0, std=1)
# 生成預測結果
for path in test_path:image_path = pathimage = np.array(Image.open(image_path)) # H, W, Ctry:image = image.transpose([2, 0, 1])[:3] # C, H, Wexcept:image = np.array([image, image, image]) # C, H, W# 圖像變換features = cv2.resize(image.transpose([1, 2, 0]), (256, 256)).transpose([2, 0, 1]).astype(np.float32)features = normalize(features)features = paddle.to_tensor([features])pre = list(np.array(pre_model(features)[0]))# print(pre)max_item = max(pre)pre = pre.index(max_item)print("圖片:", path, "預測結果:", pre)pre_classes.append(pre)print(pre_classes)