????????目標檢測任務重,擔心數據集中各標簽類別不均衡,想統計XML標注文件中各標注類別的標簽數量,可以使用以下腳本:
import os
import glob
import xml.etree.ElementTree as etdef count_labels(source_dir):file_list = glob.glob(os.path.join(source_dir,"*.xml"))labels = {}for file in file_list:tree = et.ElementTree()tree.parse(file)objects = tree.findall(".//object")for object in objects:name = object.find(".//name")label = name.textif label in labels.keys():labels[label] += 1else:labels[label] = 1count_all = 0print(source_dir)for i in labels.keys():print(i,labels[i])count_all += labels[i]print("all_labels : ", count_all)print("all_images : ", len(file_list))if __name__ == '__main__':count_labels("/disk/test_xml")
? ? ? ? ?