[AI]從零開始的YOLO數據集增強教程

一、前言

? ? ? ? 不知道大家在訓練YOLO時有沒有遇到過這樣的情況，明明數據集已經準備了很多了，但是YOLO還是不認識某個物品，或者置信度低。那么有沒有辦法讓我們不制作新數據集的情況下讓代碼幫我們生成新的數據集來訓練模型呢？當然有，并且現在最主流的辦法就是將原本的圖像進行翻轉，改變亮度，以及添加噪聲等。經過了這些步驟，就增加了我們數據集的多樣性，相當于增加了YOLO的樣本數量，這樣，YOLO模型就能夠認識更多樣的對象，從而實現數據集增強。那么本次教程，就來教大家如何使用簡單的處理代碼實現對YOLO數據集的增強！

二、需要準備什么？

? ? ? ? 既然需要對YOLO的訓練數據集進行增強，這里需要大已經安裝好YOLO環境并且對YOLO的訓練非常熟悉。如果你還沒有安裝好YOLO的推理環境可以直接看下面的教程：

YOLO環境搭建：[AI]小白向的YOLO安裝教程-CSDN博客

如果你還不會訓練YOLO模型可以看下面的教程：

YOLO模型訓練：[AI]YOLO如何訓練對象檢測模型（詳細）_yolo模型-CSDN博客

當部署好YOLO環境并且對YOLO推理非常熟悉以后就可以進行下面的步驟了。

三、YOLO數據集增強

? ? ? ? 這里我們需要對數據集進行增強，首先我們需要一個已經制作好的數據集，這里數據集的數量不用太多，我這里就準備了200張已經框好的數據集，用于識別花卉的碳黑病：

準備好對應的數據集，我們創新一個名為“Augment.py”的文件，然后把下方的代碼粘貼進這個py文件中，如圖所示：

# -*- coding: utf-8 -*-import torch
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import random
random.seed(0)class DataAugmentationOnDetection:def __init__(self):super(DataAugmentationOnDetection, self).__init__()# 以下的幾個參數類型中，image的類型全部如下類型# 參數類型： image：Image.open(path)def resize_keep_ratio(self, image, boxes, target_size):"""參數類型： image：Image.open(path)， boxes:Tensor， target_size:int功能：將圖像縮放到size尺寸，調整相應的boxes,同時保持長寬比（最長的邊是target size"""old_size = image.size[0:2]  # 原始圖像大小# 取最小的縮放比例ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size)))  # 計算原始圖像寬高與目標圖像大小的比例，并取其中的較小值new_size = tuple([int(i * ratio) for i in old_size])  # 根據上邊求得的比例計算在保持比例前提下得到的圖像大小# boxes 不用變化，因為是等比例變化return image.resize(new_size, Image.BILINEAR), boxesdef resizeDown_keep_ratio(self, image, boxes, target_size):""" 與上面的函數功能類似，但它只降低圖片的尺寸，不會擴大圖片尺寸"""old_size = image.size[0:2]  # 原始圖像大小# 取最小的縮放比例ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size)))  # 計算原始圖像寬高與目標圖像大小的比例，并取其中的較小值ratio = min(ratio, 1)new_size = tuple([int(i * ratio) for i in old_size])  # 根據上邊求得的比例計算在保持比例前提下得到的圖像大小# boxes 不用變化，因為是等比例變化return image.resize(new_size, Image.BILINEAR), boxesdef resize(self, img, boxes, size):# ---------------------------------------------------------# 類型為 img=Image.open(path)，boxes:Tensor，size:int# 功能為：將圖像長和寬縮放到指定值size，并且相應調整boxes# ---------------------------------------------------------return img.resize((size, size), Image.BILINEAR), boxesdef random_flip_horizon(self, img, boxes, h_rate=1):# -------------------------------------# 隨機水平翻轉# -------------------------------------if np.random.random() < h_rate:transform = transforms.RandomHorizontalFlip(p=1)img = transform(img)if len(boxes) > 0:x = 1 - boxes[:, 1]boxes[:, 1] = xreturn img, boxesdef random_flip_vertical(self, img, boxes, v_rate=1):# 隨機垂直翻轉if np.random.random() < v_rate:transform = transforms.RandomVerticalFlip(p=1)img = transform(img)if len(boxes) > 0:y = 1 - boxes[:, 2]boxes[:, 2] = yreturn img, boxesdef center_crop(self, img, boxes, target_size=None):# -------------------------------------# 中心裁剪 ，裁剪成 (size, size) 的正方形, 僅限圖形，w,h# 這里用比例是很難算的，轉成x1,y1, x2, y2格式來計算# -------------------------------------w, h = img.sizesize = min(w, h)if len(boxes) > 0:# 轉換到xyxy格式label = boxes[:, 0].reshape([-1, 1])x_, y_, w_, h_ = boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]x1 = (w * x_ - 0.5 * w * w_).reshape([-1, 1])y1 = (h * y_ - 0.5 * h * h_).reshape([-1, 1])x2 = (w * x_ + 0.5 * w * w_).reshape([-1, 1])y2 = (h * y_ + 0.5 * h * h_).reshape([-1, 1])boxes_xyxy = torch.cat([x1, y1, x2, y2], dim=1)# 邊框轉換if w > h:boxes_xyxy[:, [0, 2]] = boxes_xyxy[:, [0, 2]] - (w - h) / 2else:boxes_xyxy[:, [1, 3]] = boxes_xyxy[:, [1, 3]] - (h - w) / 2in_boundary = [i for i in range(boxes_xyxy.shape[0])]for i in range(boxes_xyxy.shape[0]):# 判斷x是否超出界限if (boxes_xyxy[i, 0] < 0 and boxes_xyxy[i, 2] < 0) or (boxes_xyxy[i, 0] > size and boxes_xyxy[i, 2] > size):in_boundary.remove(i)# 判斷y是否超出界限elif (boxes_xyxy[i, 1] < 0 and boxes_xyxy[i, 3] < 0) or (boxes_xyxy[i, 1] > size and boxes_xyxy[i, 3] > size):in_boundary.append(i)boxes_xyxy = boxes_xyxy[in_boundary]boxes = boxes_xyxy.clamp(min=0, max=size).reshape([-1, 4])  # 壓縮到固定范圍label = label[in_boundary]# 轉換到YOLO格式x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]xc = ((x1 + x2) / (2 * size)).reshape([-1, 1])yc = ((y1 + y2) / (2 * size)).reshape([-1, 1])wc = ((x2 - x1) / size).reshape([-1, 1])hc = ((y2 - y1) / size).reshape([-1, 1])boxes = torch.cat([xc, yc, wc, hc], dim=1)# 圖像轉換transform = transforms.CenterCrop(size)img = transform(img)if target_size:img = img.resize((target_size, target_size), Image.BILINEAR)if len(boxes) > 0:return img, torch.cat([label.reshape([-1, 1]), boxes], dim=1)else:return img, boxes# ------------------------------------------------------# 以下img皆為Tensor類型# ------------------------------------------------------def random_bright(self, img, u=120, p=1):# -------------------------------------# 隨機亮度變換# -------------------------------------if np.random.random() < p:alpha=np.random.uniform(-u, u)/255img += alphaimg=img.clamp(min=0.0, max=1.0)return imgdef random_contrast(self, img, lower=0.5, upper=1.5, p=1):# -------------------------------------# 隨機增強對比度# -------------------------------------if np.random.random() < p:alpha=np.random.uniform(lower, upper)img*=alphaimg=img.clamp(min=0, max=1.0)return imgdef random_saturation(self, img,lower=0.5, upper=1.5, p=1):# 隨機飽和度變換，針對彩色三通道圖像，中間通道乘以一個值if np.random.random() < p:alpha=np.random.uniform(lower, upper)img[1]=img[1]*alphaimg[1]=img[1].clamp(min=0,max=1.0)return imgdef add_gasuss_noise(self, img, mean=0, std=0.1):noise=torch.normal(mean,std,img.shape)img+=noiseimg=img.clamp(min=0, max=1.0)return imgdef add_salt_noise(self, img):noise=torch.rand(img.shape)alpha=np.random.random()/5 + 0.7img[noise[:,:,:]>alpha]=1.0return imgdef add_pepper_noise(self, img):noise=torch.rand(img.shape)alpha=np.random.random()/5 + 0.7img[noise[:, :, :]>alpha]=0return imgdef plot_pics(img, boxes):# 顯示圖像和候選框，img是Image.Open()類型, boxes是Tensor類型plt.imshow(img)label_colors = [(213, 110, 89)]w, h = img.sizefor i in range(boxes.shape[0]):box = boxes[i, 1:]xc, yc, wc, hc = boxx = w * xc - 0.5 * w * wcy = h * yc - 0.5 * h * hcbox_w, box_h = w * wc, h * hcplt.gca().add_patch(plt.Rectangle(xy=(x, y), width=box_w, height=box_h,edgecolor=[c / 255 for c in label_colors[0]],fill=False, linewidth=2))plt.show()def get_image_list(image_path):# 根據圖片文件，查找所有圖片并返回列表files_list = []for root, sub_dirs, files in os.walk(image_path):for special_file in files:special_file = special_file[0: len(special_file)]files_list.append(special_file)return files_listdef get_label_file(label_path, image_name):# 根據圖片信息，查找對應的labelfname = os.path.join(label_path, image_name[0: len(image_name)-4]+".txt")data2 = []if not os.path.exists(fname):return data2if os.path.getsize(fname) == 0:return data2else:with open(fname, 'r', encoding='utf-8') as infile:# 讀取并轉換標簽for line in infile:data_line = line.strip("\n").split()data2.append([float(i) for i in data_line])return data2def save_Yolo(img, boxes, save_path, prefix, image_name):# img: 需要時Image類型的數據， prefix 前綴# 將結果保存到save path指示的路徑中if not os.path.exists(save_path) or \not os.path.exists(os.path.join(save_path, "images")):os.makedirs(os.path.join(save_path, "images"))os.makedirs(os.path.join(save_path, "labels"))try:img.save(os.path.join(save_path, "images", prefix + image_name))with open(os.path.join(save_path, "labels", prefix + image_name[0:len(image_name)-4] + ".txt"), 'w', encoding="utf-8") as f:if len(boxes) > 0:  # 判斷是否為空# 寫入新的label到文件中for data in boxes:str_in = ""for i, a in enumerate(data):if i == 0:str_in += str(int(a))else:str_in += " " + str(float(a))f.write(str_in + '\n')except:print("ERROR: ", image_name, " is bad.")def runAugumentation(image_path, label_path, save_path):image_list = get_image_list(image_path)for image_name in image_list:print("dealing: " + image_name)img = Image.open(os.path.join(image_path, image_name))boxes = get_label_file(label_path, image_name)boxes = torch.tensor(boxes)# 下面是執行的數據增強功能，可自行選擇# Image類型的參數DAD = DataAugmentationOnDetection()""" 尺寸變換   """# 縮小尺寸# t_img, t_boxes = DAD.resizeDown_keep_ratio(img, boxes, 1024)# save_Yolo(t_img, boxes, save_path, prefix="rs_", image_name=image_name)# 水平旋轉t_img, t_boxes = DAD.random_flip_horizon(img, boxes.clone())save_Yolo(t_img, t_boxes, save_path, prefix="fh_", image_name=image_name)# 豎直旋轉t_img, t_boxes = DAD.random_flip_vertical(img, boxes.clone())save_Yolo(t_img, t_boxes, save_path, prefix="fv_", image_name=image_name)# center_cropt_img, t_boxes = DAD.center_crop(img, boxes.clone(), 1024)save_Yolo(t_img, t_boxes, save_path, prefix="cc_", image_name=image_name)""" 圖像變換，用tensor類型"""to_tensor = transforms.ToTensor()to_image = transforms.ToPILImage()img = to_tensor(img)# random_brightt_img, t_boxes = DAD.random_bright(img.clone()), boxessave_Yolo(to_image(t_img), boxes, save_path, prefix="rb_", image_name=image_name)# random_contrast 對比度變化t_img, t_boxes = DAD.random_contrast(img.clone()), boxessave_Yolo(to_image(t_img), boxes, save_path, prefix="rc_", image_name=image_name)# random_saturation 飽和度變化t_img, t_boxes = DAD.random_saturation(img.clone()), boxessave_Yolo(to_image(t_img), boxes, save_path, prefix="rs_", image_name=image_name)# 高斯噪聲t_img, t_boxes = DAD.add_gasuss_noise(img.clone()), boxessave_Yolo(to_image(t_img), boxes, save_path, prefix="gn_", image_name=image_name)# add_salt_noiset_img, t_boxes = DAD.add_salt_noise(img.clone()), boxessave_Yolo(to_image(t_img), boxes, save_path, prefix="sn_", image_name=image_name)# add_pepper_noiset_img, t_boxes = DAD.add_pepper_noise(img.clone()), boxessave_Yolo(to_image(t_img), boxes, save_path, prefix="pn_", image_name=image_name)print("end:     " + image_name)if __name__ == '__main__':# 圖像和標簽文件夾image_path = r"./train/images"label_path = r"./train/labels"save_path = r"./save"    # 結果保存位置路徑，可以是一個不存在的文件夾# 運行runAugumentation(image_path, label_path, save_path)

粘貼完成以后，如圖所示：

在上方的代碼中，我們找到“if __name__ == '__main__':”的位置，在使用代碼前，需要對這里的路徑進行一些簡單的修改。來到上述位置后，我們可以看到如圖所示的代碼：

這里在配置之前有一個前提，那就是我們的數據集已經制作好了。我的數據集結構如圖所示：

如上圖可以看到，我們的“Augment.py”與train目錄在同一級，在train目錄中有images目錄與labels目錄：

這就是非常常見的YOLO目錄結構，這里就不多說了。

根據代碼中的變量，我們可知，第一個“image_path”需要傳入我們的數據集的圖片路徑，后面的“label_path”需要傳入我們數據集中標簽的路徑。最后一個“save_path”就是我們保存新生成的數據集與標簽的路徑。修改好上面的內容以后，我們直接保存即可。我們進入YOLO的虛擬環境，然后直接運行這個py文件即可：