在目標檢測任務中,不同框架使用的標注格式各不相同。常見的框架中,YOLO 使用 .txt 文件進行標注,而 PASCAL VOC 則使用 .xml 文件。如果你需要將一個 YOLO 格式的數據集轉換為 VOC 格式以便適配其他模型,本文提供了一個結構清晰、可維護性強的 Python 腳本。
🧩 輸入輸出目錄結構
? 輸入目錄結構(YOLO 格式)
<YOLO數據集名稱>
├── train/
│ ├── images/
│ │ ├── img_000001.bmp
│ │ └── ...
│ └── labels/
│ ├── img_000001.txt
│ └── ...
└── val/├── images/│ ├── img_000100.bmp│ └── ...└── labels/├── img_000100.txt└── ...
? 輸出目錄結構(VOC 格式)
<VOC格式數據集名稱>
├── JPEGImages/ # 轉換后的圖像文件(.jpg)
├── Annotations/ # 對應的XML標注文件
└── ImageSets/└── Main/├── train.txt└── val.txt
🛠? 配置參數說明
YOLO_DATASET_ROOT = '' # YOLO格式數據集根目錄(輸入)
VOC_OUTPUT_DIR = '' # VOC格式輸出目錄(輸出)
CLASS_NAMES = [] # 類別名稱列表,示例:['person', 'car', 'dog']
SPLITS = ['train', 'val'] # 數據集劃分類型(訓練集、驗證集等)
VERBOSE = True # 是否輸出詳細日志
?? 注意:你需要根據自己的項目路徑和類別信息填寫 YOLO_DATASET_ROOT
、VOC_OUTPUT_DIR
和 CLASS_NAMES
。
目前腳本默認處理 .bmp 圖像并將其轉為 .jpg,你可以根據需求修改擴展名以支持 .png、.jpeg 等格式。
完整代碼如下:
import os
import xml.etree.ElementTree as ET
from xml.dom import minidom
import cv2# -----------------------------
# 超參數配置(Hyperparameters)
# -----------------------------
YOLO_DATASET_ROOT = '' # YOLO格式數據集根目錄(輸入)
VOC_OUTPUT_DIR = '' # VOC格式輸出目錄(輸出)
CLASS_NAMES = [] # 類別名稱列表,示例:['person', 'car', 'dog']
SPLITS = ['train', 'val'] # 數據集劃分類型(訓練集、驗證集等)
VERBOSE = True # 是否輸出詳細日志def create_voc_annotation(image_path, label_path, annotations_output_dir):"""根據圖像和YOLO標簽生成PASCAL VOC格式的XML標注文件。"""image = cv2.imread(image_path)height, width, depth = image.shapeannotation = ET.Element('annotation')# .bmp -> .jpgfilename = os.path.basename(image_path).replace('.bmp', '.jpg')ET.SubElement(annotation, 'folder').text = 'JPEGImages'ET.SubElement(annotation, 'filename').text = filenameET.SubElement(annotation, 'path').text = os.path.join(VOC_OUTPUT_DIR, 'JPEGImages', filename)source = ET.SubElement(annotation, 'source')ET.SubElement(source, 'database').text = 'Custom Dataset'size = ET.SubElement(annotation, 'size')ET.SubElement(size, 'width').text = str(width)ET.SubElement(size, 'height').text = str(height)ET.SubElement(size, 'depth').text = str(depth)ET.SubElement(annotation, 'segmented').text = '0'if os.path.exists(label_path):with open(label_path, 'r') as f:for line in f.readlines():data = line.strip().split()class_id = int(data[0])x_center = float(data[1]) * widthy_center = float(data[2]) * heightbbox_width = float(data[3]) * widthbbox_height = float(data[4]) * heightxmin = int(x_center - bbox_width / 2)ymin = int(y_center - bbox_height / 2)xmax = int(x_center + bbox_width / 2)ymax = int(y_center + bbox_height / 2)obj = ET.SubElement(annotation, 'object')ET.SubElement(obj, 'name').text = CLASS_NAMES[class_id]ET.SubElement(obj, 'pose').text = 'Unspecified'ET.SubElement(obj, 'truncated').text = '0'ET.SubElement(obj, 'difficult').text = '0'bndbox = ET.SubElement(obj, 'bndbox')ET.SubElement(bndbox, 'xmin').text = str(xmin)ET.SubElement(bndbox, 'ymin').text = str(ymin)ET.SubElement(bndbox, 'xmax').text = str(xmax)ET.SubElement(bndbox, 'ymax').text = str(ymax)# 保存XML文件xml_str = minidom.parseString(ET.tostring(annotation)).toprettyxml(indent=" ")xml_filename = filename.replace('.jpg', '.xml')xml_path = os.path.join(annotations_output_dir, xml_filename) # 確保這里只有一層Annotations目錄with open(xml_path, "w") as f:f.write(xml_str)if VERBOSE:print(f"? 已生成標注文件: {xml_filename}")def convert_dataset(input_dir, output_dir):"""將YOLO格式的數據集轉換為VOC格式。包括圖像格式轉換(.bmp -> .jpg)、生成XML標注文件,并創建ImageSets/Main/train.txt/val.txt。"""print("🔄 開始轉換YOLO格式數據集到VOC格式...")if not os.path.exists(output_dir):os.makedirs(output_dir)for split in SPLITS:images_dir = os.path.join(input_dir, split, 'images')labels_dir = os.path.join(input_dir, split, 'labels')output_images_dir = os.path.join(output_dir, 'JPEGImages')output_annotations_dir = os.path.join(output_dir, 'Annotations')output_imagesets_dir = os.path.join(output_dir, 'ImageSets', 'Main')os.makedirs(output_images_dir, exist_ok=True)os.makedirs(output_annotations_dir, exist_ok=True)os.makedirs(output_imagesets_dir, exist_ok=True)set_file_path = os.path.join(output_imagesets_dir, f"{split}.txt")set_file = open(set_file_path, 'w')count = 0for filename in os.listdir(images_dir):if filename.endswith('.bmp'):image_path = os.path.join(images_dir, filename)label_path = os.path.join(labels_dir, filename.replace('.bmp', '.txt'))# 圖像轉換new_image_name = filename.replace('.bmp', '.jpg')new_image_path = os.path.join(output_images_dir, new_image_name)image = cv2.imread(image_path)cv2.imwrite(new_image_path, image)# 寫入ImageSets/Main/train.txt或val.txtbase_name = new_image_name.replace('.jpg', '')set_file.write(f"{base_name}\n")# 生成XML標注文件create_voc_annotation(new_image_path, label_path, output_annotations_dir) # 確保傳入的是Annotations目錄路徑count += 1if VERBOSE and count % 10 == 0:print(f"🖼? 已處理 {count} 張圖片...")set_file.close()print(f"? 完成 [{split}] 分割集處理,共處理 {count} 張圖片")print("🎉 數據集轉換完成!")if __name__ == "__main__":convert_dataset(YOLO_DATASET_ROOT, VOC_OUTPUT_DIR)
轉換后效果:
驗證生成的VOC數據集中圖片質量和數量是否合適可以用下面的腳本:
import os
import cv2
from xml.etree import ElementTree as ET# -----------------------------
# 超參數配置(Hyperparameters)
# -----------------------------
DATASET_ROOT = '' # VOC格式數據集根目錄
CLASS_NAMES = [] # 類別列表, 示例: ['car', 'person', 'dog']
VERBOSE = True # 是否輸出詳細日志def count_images_in_set(imagesets_dir, set_name):"""統計ImageSets/Main目錄下指定集合(train/val)的圖片數量。"""set_file_path = os.path.join(imagesets_dir, f"{set_name}.txt")if not os.path.exists(set_file_path):print(f"[警告] 找不到 {set_name}.txt 文件,請確認是否生成正確劃分文件。")return 0with open(set_file_path, 'r') as f:lines = [line.strip() for line in f.readlines() if line.strip()]return len(lines)def check_images(jpeg_dir):"""檢查JPEGImages目錄下的所有圖片是否都能正常加載。"""print("[檢查] 驗證圖像是否可讀...")error_images = []for filename in os.listdir(jpeg_dir):if filename.lower().endswith(('.jpg', '.jpeg', '.png')):image_path = os.path.join(jpeg_dir, filename)try:img = cv2.imread(image_path)if img is None:raise ValueError("無法加載圖像")except Exception as e:error_images.append(filename)if VERBOSE:print(f" ? 圖像加載失敗: {filename} | 原因: {str(e)}")return error_imagesdef validate_annotations(annotations_dir, jpeg_dir):"""驗證Annotations目錄下的XML標注文件是否與對應的圖片匹配。"""print("[檢查] 驗證XML標注文件是否有效...")error_annotations = []for filename in os.listdir(annotations_dir):if filename.endswith('.xml'):xml_path = os.path.join(annotations_dir, filename)try:tree = ET.parse(xml_path)root = tree.getroot()jpg_filename = root.find('filename').textif not os.path.exists(os.path.join(jpeg_dir, jpg_filename)):raise FileNotFoundError(f"找不到對應的圖像:{jpg_filename}")except Exception as e:error_annotations.append(filename)if VERBOSE:print(f" ? 標注文件異常: {filename} | 原因: {str(e)}")return error_annotationsdef verify_imagesets(imagesets_dir, jpeg_dir):"""確保ImageSets/Main中列出的所有圖像都存在于JPEGImages中。"""print("[檢查] 驗證ImageSets/Main中列出的圖像是否存在...")missing_files = []for set_name in ['train', 'val']:set_file_path = os.path.join(imagesets_dir, f"{set_name}.txt")if not os.path.exists(set_file_path):continuewith open(set_file_path, 'r') as f:for line in f:img_id = line.strip()if not img_id:continueimg_path = os.path.join(jpeg_dir, f"{img_id}.jpg")if not os.path.exists(img_path):missing_files.append(f"{img_id}.jpg")if VERBOSE:print(f" ? 圖像缺失: {img_id}.jpg")return missing_filesdef main():print("🔍 開始驗證VOC格式數據集...\n")# 構建路徑jpeg_dir = os.path.join(DATASET_ROOT, 'JPEGImages')annotations_dir = os.path.join(DATASET_ROOT, 'Annotations')imagesets_dir = os.path.join(DATASET_ROOT, 'ImageSets', 'Main')# 檢查是否存在必要目錄for dir_path in [jpeg_dir, annotations_dir, imagesets_dir]:if not os.path.exists(dir_path):print(f"[錯誤] 必要目錄不存在: {dir_path}")exit(1)# 1. 檢查圖像是否可讀error_images = check_images(jpeg_dir)if error_images:print(f"?? 共發現 {len(error_images)} 張圖片加載失敗:")for img in error_images:print(f" - {img}")else:print("? 所有圖像均可正常加載。\n")# 2. 檢查XML標注文件是否有效error_annotations = validate_annotations(annotations_dir, jpeg_dir)if error_annotations:print(f"?? 共發現 {len(error_annotations)} 個無效或不匹配的XML標注文件:")for ann in error_annotations:print(f" - {ann}")else:print("? 所有XML標注文件均有效且與對應圖像匹配。\n")# 3. 檢查ImageSets/Main中引用的圖像是否存在missing_files = verify_imagesets(imagesets_dir, jpeg_dir)if missing_files:print(f"?? 共發現 {len(missing_files)} 張圖像在ImageSets中被引用但實際不存在:")for img in missing_files:print(f" - {img}")else:print("? ImageSets/Main中引用的所有圖像均存在。\n")# 4. 輸出訓練集和驗證集的圖像數量train_count = count_images_in_set(imagesets_dir, 'train')val_count = count_images_in_set(imagesets_dir, 'val')total_count = train_count + val_countprint("📊 數據集統計:")print(f" - 訓練集: {train_count} 張")print(f" - 驗證集: {val_count} 張")print(f" - 總數: {total_count} 張\n")print("🎉 驗證完成!")if __name__ == "__main__":main()
驗證效果為: