使用imgaug庫可以方便地進行圖像增強操作,包括調整圖像大小。以下是使用imgaug庫調整圖像大小并修改對應的XML標簽框的示例腳本:
- 注意修改輸入文件夾路徑、輸出文件夾路徑和目標尺寸為自己內容。
input_folder = "path/to/your/input_folder"
output_folder = "path/to/your/output_folder"
target_size = (640, 640)
- 輸入文件夾結構:
全部代碼:
import os
import xml.etree.ElementTree as ET
from PIL import Image
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import numpy as npdef resize_and_augment(image_path, xml_folder, output_folder, target_size):# 讀取圖像image = Image.open(image_path)# 獲取XML文件路徑xml_filename = os.path.splitext(os.path.basename(image_path))[0] + '.xml'xml_path = os.path.join(xml_folder, xml_filename)# 解析XML文件tree = ET.parse(xml_path)root = tree.getroot()# 讀取原始圖像大小original_size = image.size# 使用imgaug進行圖像和標簽框調整seq = iaa.Sequential([iaa.Resize({"height": target_size[0], "width": target_size[1]}),])# 將PIL圖像轉換為numpy數組image_np = np.array(image)# 從XML中提取所有標簽框信息bounding_boxes = []for object_tag in root.iter('object'):bbox_tag = object_tag.find('bndbox')xmin = float(bbox_tag.find('xmin').text)ymin = float(bbox_tag.find('ymin').text)xmax = float(bbox_tag.find('xmax').text)ymax = float(bbox_tag.find('ymax').text)bounding_boxes.append(BoundingBox(x1=xmin, y1=ymin, x2=xmax, y2=ymax))# 創建BoundingBoxesOnImage對象bbs = BoundingBoxesOnImage(bounding_boxes, shape=image_np.shape)# 進行圖像和標簽框調整augmented_image, bbs_aug = seq(image=image_np, bounding_boxes=bbs)# 創建輸出文件夾路徑output_images_folder = os.path.join(output_folder, 'images')output_xml_folder = os.path.join(output_folder, 'xml')# 確保輸出文件夾存在,如果不存在則創建os.makedirs(output_images_folder, exist_ok=True)os.makedirs(output_xml_folder, exist_ok=True)# 保存調整后的圖像augmented_image_path = os.path.join(output_images_folder, os.path.basename(image_path))Image.fromarray(augmented_image).save(augmented_image_path)# 更新XML文件update_xml(xml_path, bbs_aug, original_size, target_size, output_xml_folder)def update_xml(xml_path, bbs_aug, original_size, new_size, output_folder):# 解析XML文件tree = ET.parse(xml_path)root = tree.getroot()# 更新圖像尺寸for size_tag in root.iter('size'):width_tag = size_tag.find('width')height_tag = size_tag.find('height')width_tag.text = str(new_size[1])height_tag.text = str(new_size[0])# 更新對象邊界框for object_tag, bbox_aug in zip(root.iter('object'), bbs_aug.bounding_boxes):bbox_tag = object_tag.find('bndbox')# 更新坐標for coord, coord_value in zip(['xmin', 'ymin', 'xmax', 'ymax'], [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2]):coord_tag = bbox_tag.find(coord)coord_tag.text = str(int(coord_value))# 保存更新后的XML文件到輸出文件夾updated_xml_filename = os.path.basename(xml_path)updated_xml_path = os.path.join(output_folder, updated_xml_filename)tree.write(updated_xml_path)if __name__ == "__main__":# 輸入文件夾路徑、輸出文件夾路徑和目標尺寸input_folder = "path/to/your/input_folder"output_folder = "path/to/your/output_folder"target_size = (640, 640)# 遍歷圖像文件夾for filename in os.listdir(os.path.join(input_folder, 'images')):if filename.endswith('.jpg'):image_path = os.path.join(input_folder, 'images', filename)resize_and_augment(image_path, os.path.join(input_folder, 'xml'), output_folder, target_size)