1.deepseek版本
import numpy as np
from collections import defaultdictdef calculate_iou(box1, box2):"""計算兩個邊界框的交并比(IoU):param box1: [x1, y1, x2, y2]:param box2: [x1, y1, x2, y2]:return: IoU"""# 計算交集區域x1_min, y1_min, x1_max, y1_max = box1x2_min, y2_min, x2_max, y2_max = box2inter_x1 = max(x1_min, x2_min)inter_y1 = max(y1_min, y2_min)inter_x2 = min(x1_max, x2_max)inter_y2 = min(y1_max, y2_max)# 計算交集面積inter_width = max(0, inter_x2 - inter_x1)inter_height = max(0, inter_y2 - inter_y1)inter_area = inter_width * inter_height# 計算并集面積box1_area = (x1_max - x1_min) * (y1_max - y1_min)box2_area = (x2_max - x2_min) * (y2_max - y2_min)union_area = box1_area + box2_area - inter_area# 避免除以零iou = inter_area / union_area if union_area > 0 else 0.0return ioudef evaluate_detection(gt_dict, pred_dict, class_list, iou_threshold=0.5):"""評估目標檢測模型性能:param gt_dict: 真實標注字典 {image_id: {'boxes': [[x1,y1,x2,y2], ...], 'labels': [label1, ...]}}:param pred_dict: 預測結果字典 {image_id: {'boxes': [[x1,y1,x2,y2], ...], 'scores': [score1, ...], 'labels': [label1, ...]}}:param class_list: 所有類別ID列表:param iou_threshold: IoU閾值:return: 評估結果字典"""# 初始化存儲結構results = {'mAP': 0.0,'per_class': {}}# 為每個類別初始化存儲for class_id in class_list:results['per_class'][class_id] = {'AP': 0.0, # 平均精度'Recall': 0.0, # 檢出率'FPR': 0.0, # 誤檢率'Precision': 0.0, # 精確率'TP': 0, # 真正例'FP': 0, # 假正例'FN': 0, # 假負例'gt_count': 0, # 真實框總數'pred_count': 0 # 預測框總數}# 存儲每個類別的所有預測信息(用于AP計算)class_preds = {class_id: [] for class_id in class_list}# 第一步:遍歷所有圖像,收集匹配結果all_image_ids = set(gt_dict.keys()) | set(pred_dict.keys())for img_id in all_image_ids:# 獲取當前圖像的標注和預測gt_ann = gt_dict.get(img_id, {'boxes': [], 'labels': []})pred_ann = pred_dict.get(img_id, {'boxes': [], 'scores': [], 'labels': []})# 按類別組織真實框gt_boxes_by_class = {class_id: [] for class_id in class_list}for box, label in zip(gt_ann['boxes'], gt_ann['labels']):if label in class_list:gt_boxes_by_class[label].append(box)results['per_class'][label]['gt_count'] += 1# 按類別組織預測框pred_boxes_by_class = {class_id: [] for class_id in class_list}for box, score, label in zip(pred_ann['boxes'], pred_ann['scores'], pred_ann['labels']):if label in class_list:pred_boxes_by_class[label].append((box, score))results['per_class'][label]['pred_count'] += 1# 對每個類別單獨處理for class_id in class_list:gt_boxes = gt_boxes_by_class[class_id]pred_boxes = pred_boxes_by_class[class_id]# 如果沒有預測框,所有真實框都是FNif len(pred_boxes) == 0:results['per_class'][class_id]['FN'] += len(gt_boxes)continue# 如果沒有真實框,所有預測框都是FPif len(gt_boxes) == 0:results['per_class'][class_id]['FP'] += len(pred_boxes)# 記錄FP用于AP計算for box, score in pred_boxes:class_preds[class_id].append((score, 0)) # 0表示FPcontinue# 按置信度降序排序預測框pred_boxes_sorted = sorted(pred_boxes, key=lambda x: x[1], reverse=True)# 初始化匹配矩陣gt_matched = [False] * len(gt_boxes)pred_matched = [False] * len(pred_boxes_sorted)# 嘗試匹配每個預測框for pred_idx, (pred_box, score) in enumerate(pred_boxes_sorted):best_iou = 0.0best_gt_idx = -1# 尋找最佳匹配的真實框for gt_idx, gt_box in enumerate(gt_boxes):if gt_matched[gt_idx]:continueiou = calculate_iou(pred_box, gt_box)if iou > best_iou:best_iou = ioubest_gt_idx = gt_idx# 檢查是否超過IoU閾值if best_iou >= iou_threshold:gt_matched[best_gt_idx] = Truepred_matched[pred_idx] = Trueclass_preds[class_id].append((score, 1)) # 1表示TPelse:class_preds[class_id].append((score, 0)) # 0表示FP# 統計當前圖像的結果results['per_class'][class_id]['TP'] += sum(pred_matched)results['per_class'][class_id]['FP'] += len(pred_matched) - sum(pred_matched)results['per_class'][class_id]['FN'] += len(gt_matched) - sum(gt_matched)# 第二步:計算每個類別的指標aps = []for class_id in class_list:class_data = results['per_class'][class_id]tp = class_data['TP']fp = class_data['FP']fn = class_data['FN']gt_count = class_data['gt_count']pred_count = class_data['pred_count']# 計算檢出率(Recall)recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0# 計算誤檢率(FPR)# 注意:在目標檢測中,負樣本是無窮的,這里使用近似計算# FPR = FP / (FP + TN) ≈ FP / (所有非目標區域)# 我們使用每張圖像的平均預測數作為分母的近似num_images = len(all_image_ids)fpr = fp / (fp + num_images * 100) # 假設每張圖像有100個潛在負樣本區域# 計算精確率(Precision)precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0# 計算AP(Average Precision)ap = 0.0pred_records = class_preds[class_id]if pred_records:# 按置信度降序排序pred_records_sorted = sorted(pred_records, key=lambda x: x[0], reverse=True)# 計算累積TP和FPcum_tp = 0cum_fp = 0precisions = []recalls = []for score, is_tp in pred_records_sorted:cum_tp += is_tpcum_fp += (1 - is_tp)p = cum_tp / (cum_tp + cum_fp) if (cum_tp + cum_fp) > 0 else 0r = cum_tp / gt_count if gt_count > 0 else 0precisions.append(p)recalls.append(r)# 平滑PR曲線(保證單調遞減)for i in range(len(precisions)-2, -1, -1):precisions[i] = max(precisions[i], precisions[i+1])# 計算AP(PR曲線下面積)ap = 0for i in range(1, len(recalls)):if recalls[i] != recalls[i-1]:ap += (recalls[i] - recalls[i-1]) * precisions[i]# 更新結果class_data['Recall'] = recallclass_data['FPR'] = fprclass_data['Precision'] = precisionclass_data['AP'] = apaps.append(ap)# 計算mAP(所有類別AP的平均)results['mAP'] = sum(aps) / len(aps) if aps else 0.0return results# 示例用法
if __name__ == "__main__":# 示例數據class_list = [0, 1, 2] # 類別ID列表# 真實標注(模擬數據)gt_dict = {'img1': {'boxes': [[10, 10, 50, 50], [30, 30, 70, 70]], 'labels': [0, 1]},'img2': {'boxes': [], 'labels': []}, # 真實框為空'img3': {'boxes': [[20, 20, 60, 60]], 'labels': [2]},'img4': {'boxes': [[15, 15, 55, 55]], 'labels': [1]} # 標簽存在但真實框為空的情況已在循環中處理}# 預測結果(模擬數據)pred_dict = {'img1': {'boxes': [[12, 12, 52, 52], [28, 28, 68, 68], [100, 100, 150, 150]], 'scores': [0.9, 0.8, 0.7], 'labels': [0, 1, 1]},'img2': {'boxes': [[40, 40, 80, 80]], 'scores': [0.85], 'labels': [0]}, # 誤檢'img3': {'boxes': [], 'scores': [], 'labels': []}, # 預測框為空'img4': {'boxes': [[15, 15, 55, 55]], 'scores': [0.75], 'labels': [1]}}# 執行評估results = evaluate_detection(gt_dict, pred_dict, class_list)# 打印結果print(f"mAP: {results['mAP']:.4f}")print("\nPer-class metrics:")for class_id, metrics in results['per_class'].items():print(f"Class {class_id}:")print(f" AP: {metrics['AP']:.4f}")print(f" Recall: {metrics['Recall']:.4f}")print(f" FPR: {metrics['FPR']:.4f}")print(f" Precision: {metrics['Precision']:.4f}")print(f" TP: {metrics['TP']}, FP: {metrics['FP']}, FN: {metrics['FN']}")print(f" GT Count: {metrics['gt_count']}, Pred Count: {metrics['pred_count']}")
2.豆包版本
import numpy as np
from collections import defaultdictdef calculate_iou(box1, box2):"""計算兩個邊界框的交并比(IoU)box格式: [x1, y1, x2, y2]"""x1, y1, x2, y2 = box1x1_p, y1_p, x2_p, y2_p = box2# 計算交集區域inter_x1 = max(x1, x1_p)inter_y1 = max(y1, y1_p)inter_x2 = min(x2, x2_p)inter_y2 = min(y2, y2_p)inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)# 計算每個框的面積area1 = (x2 - x1) * (y2 - y1)area2 = (x2_p - x1_p) * (y2_p - y1_p)# 計算并集面積union_area = area1 + area2 - inter_area# 計算IoUif union_area == 0:return 0return inter_area / union_areadef evaluate_detection(true_boxes_list, pred_boxes_list, iou_threshold=0.5):"""評估目標檢測結果參數:true_boxes_list: 真實框列表,每個元素是一張圖片的真實框,每個真實框格式: {'box': [x1, y1, x2, y2], 'label': 標簽名}pred_boxes_list: 預測框列表,每個元素是一張圖片的預測框,每個預測框格式: {'box': [x1, y1, x2, y2], 'label': 標簽名, 'score': 置信度}iou_threshold: IoU閾值,用于判斷預測框是否與真實框匹配返回:評估結果字典,包含每個標簽的檢出率、誤檢率以及整體mAP"""# 收集所有出現過的標簽all_labels = set()for true_boxes in true_boxes_list:for box in true_boxes:all_labels.add(box['label'])for pred_boxes in pred_boxes_list:for box in pred_boxes:all_labels.add(box['label'])all_labels = sorted(list(all_labels))# 初始化評估指標results = {'per_label': {label: {'tp': 0, 'fp': 0, 'fn': 0, 'precision': [], 'recall': [], 'ap': 0} for label in all_labels},'mAP': 0}# 處理每張圖片for img_idx, (true_boxes, pred_boxes) in enumerate(zip(true_boxes_list, pred_boxes_list)):# 按置信度降序排序預測框pred_boxes_sorted = sorted(pred_boxes, key=lambda x: x['score'], reverse=True)# 跟蹤已匹配的真實框matched_true = set()# 處理預測框for pred in pred_boxes_sorted:pred_label = pred['label']best_iou = 0best_true_idx = -1# 尋找最佳匹配的真實框for true_idx, true in enumerate(true_boxes):if true_idx not in matched_true and true['label'] == pred_label:iou = calculate_iou(true['box'], pred['box'])if iou > best_iou and iou >= iou_threshold:best_iou = ioubest_true_idx = true_idx# 判斷是TP還是FPif best_true_idx != -1:results['per_label'][pred_label]['tp'] += 1matched_true.add(best_true_idx)else:results['per_label'][pred_label]['fp'] += 1# 計算未匹配的真實框作為FNfor true_idx, true in enumerate(true_boxes):if true_idx not in matched_true:results['per_label'][true['label']]['fn'] += 1# 計算每個標簽的檢出率、誤檢率和APaps = []for label in all_labels:stats = results['per_label'][label]tp = stats['tp']fp = stats['fp']fn = stats['fn']# 計算檢出率 (召回率)if tp + fn == 0:detection_rate = 0.0 # 真實框為空的情況else:detection_rate = tp / (tp + fn)# 計算誤檢率if tp + fp == 0:false_detection_rate = 0.0 # 預測框為空的情況else:false_detection_rate = fp / (tp + fp)# 存儲計算結果results['per_label'][label]['detection_rate'] = detection_rateresults['per_label'][label]['false_detection_rate'] = false_detection_rate# 這里簡化了AP計算,實際應用中應根據置信度閾值計算PR曲線并求面積# 對于本示例,我們使用簡單的P-R計算if tp + fp == 0:precision = 0.0else:precision = tp / (tp + fp)if tp + fn == 0:recall = 0.0else:recall = tp / (tp + fn)results['per_label'][label]['precision'].append(precision)results['per_label'][label]['recall'].append(recall)# 簡單AP計算(實際應用中應使用更精確的方法)results['per_label'][label]['ap'] = precision * recallaps.append(results['per_label'][label]['ap'])# 計算mAPif len(aps) > 0:results['mAP'] = sum(aps) / len(aps)else:results['mAP'] = 0.0return resultsdef print_evaluation_results(results):"""打印評估結果"""print("目標檢測模型評估結果:")print("======================")# 打印每個標簽的結果for label, stats in results['per_label'].items():print(f"\n標簽: {label}")print(f" 檢出率: {stats['detection_rate']:.4f}")print(f" 誤檢率: {stats['false_detection_rate']:.4f}")print(f" 精確率: {stats['precision'][0]:.4f}" if stats['precision'] else " 精確率: N/A")print(f" 召回率: {stats['recall'][0]:.4f}" if stats['recall'] else " 召回率: N/A")print(f" AP: {stats['ap']:.4f}")print(f" TP: {stats['tp']}, FP: {stats['fp']}, FN: {stats['fn']}")# 打印mAPprint("\n======================")print(f"mAP: {results['mAP']:.4f}")# 示例用法
if __name__ == "__main__":# 示例1: 正常情況 - 有真實框和預測框true_boxes1 = [{'box': [10, 10, 50, 50], 'label': 'car'},{'box': [60, 60, 100, 100], 'label': 'person'}]pred_boxes1 = [{'box': [12, 12, 52, 52], 'label': 'car', 'score': 0.9},{'box': [65, 65, 105, 105], 'label': 'person', 'score': 0.85},{'box': [120, 120, 150, 150], 'label': 'bike', 'score': 0.7} # 誤檢]# 示例2: 真實框為空的情況true_boxes2 = []pred_boxes2 = [{'box': [20, 20, 40, 40], 'label': 'car', 'score': 0.6} # 誤檢]# 示例3: 預測框為空的情況true_boxes3 = [{'box': [30, 30, 70, 70], 'label': 'car'}]pred_boxes3 = [] # 漏檢# 示例4: 標簽存在但真實框為空的情況true_boxes4 = [{'box': [80, 80, 120, 120], 'label': 'person'}]pred_boxes4 = [{'box': [100, 100, 140, 140], 'label': 'bike', 'score': 0.5} # 對于bike標簽,真實框為空]# 組合所有示例true_boxes_list = [true_boxes1, true_boxes2, true_boxes3, true_boxes4]pred_boxes_list = [pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4]# 評估evaluation_results = evaluate_detection(true_boxes_list, pred_boxes_list, iou_threshold=0.5)# 打印結果print_evaluation_results(evaluation_results)
3.baidu版
import numpy as np
from collections import defaultdictclass DetectionEvaluator:def __init__(self, iou_threshold=0.5):self.iou_threshold = iou_thresholdself.reset()def reset(self):self.gt_counts = defaultdict(int) # 每類真實框數量self.pred_counts = defaultdict(int) # 每類預測框數量self.tp = defaultdict(list) # 每類真正例(按置信度排序)self.fp = defaultdict(list) # 每類假正例(按置信度排序)self.scores = defaultdict(list) # 每類預測置信度def calculate_iou(self, box1, box2):"""計算兩個邊界框的IoU"""x1 = max(box1[0], box2[0])y1 = max(box1[1], box2[1])x2 = min(box1[2], box2[2])y2 = min(box1[3], box2[3])inter_area = max(0, x2 - x1) * max(0, y2 - y1)box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])union_area = box1_area + box2_area - inter_areareturn inter_area / union_area if union_area > 0 else 0def evaluate_image(self, gt_boxes, pred_boxes):"""評估單張圖像:param gt_boxes: 字典 {class_id: [[x1,y1,x2,y2], ...]}:param pred_boxes: 字典 {class_id: [[x1,y1,x2,y2,score], ...]}"""# 處理真實框為空的情況if not gt_boxes:for class_id in pred_boxes:for pred in pred_boxes[class_id]:self.fp[class_id].append(1)self.tp[class_id].append(0)self.scores[class_id].append(pred[4])self.pred_counts[class_id] += 1return# 處理預測框為空的情況if not pred_boxes:for class_id in gt_boxes:self.gt_counts[class_id] += len(gt_boxes[class_id])return# 統計每類真實框數量for class_id in gt_boxes:self.gt_counts[class_id] += len(gt_boxes[class_id])# 處理每類預測結果for class_id in pred_boxes:preds = pred_boxes[class_id]self.pred_counts[class_id] += len(preds)# 按置信度降序排序preds = sorted(preds, key=lambda x: x[4], reverse=True)# 初始化匹配狀態gt_matched = set()for pred in preds:pred_box = pred[:4]max_iou = 0best_gt_idx = -1# 查找匹配的真實框if class_id in gt_boxes:for gt_idx, gt_box in enumerate(gt_boxes[class_id]):if gt_idx in gt_matched:continueiou = self.calculate_iou(pred_box, gt_box)if iou > max_iou:max_iou = ioubest_gt_idx = gt_idx# 判斷TP/FPif max_iou >= self.iou_threshold:gt_matched.add(best_gt_idx)self.tp[class_id].append(1)self.fp[class_id].append(0)else:self.tp[class_id].append(0)self.fp[class_id].append(1)self.scores[class_id].append(pred[4])def calculate_metrics(self):"""計算各類評估指標"""metrics = {}for class_id in set(self.gt_counts.keys()).union(set(self.pred_counts.keys())):gt_count = self.gt_counts.get(class_id, 0)pred_count = self.pred_counts.get(class_id, 0)tp = np.array(self.tp.get(class_id, []))fp = np.array(self.fp.get(class_id, []))scores = np.array(self.scores.get(class_id, []))# 檢出率 = TP / (TP + FN) = TP / GT總數detection_rate = np.sum(tp) / gt_count if gt_count > 0 else 0# 誤檢率 = FP / (TP + FP) = FP / 預測總數false_alarm_rate = np.sum(fp) / pred_count if pred_count > 0 else 0# 計算APap = self.calculate_ap(tp, fp, gt_count)metrics[class_id] = {'檢出率': round(detection_rate,3),'誤檢率': round(false_alarm_rate,3),'ap': round(ap,3),'gt_count': gt_count,'pred_count': pred_count,'tp_count': int(np.sum(tp)),'fp_count': int(np.sum(fp))}# 計算mAPaps = [metrics[c]['ap'] for c in metrics if metrics[c]['gt_count'] > 0]mAP = np.mean(aps) if aps else 0return {'per_class': metrics,'mAP': mAP}def calculate_ap(self, tp, fp, gt_count):"""計算單類別的AP值"""if gt_count == 0:return 0tp_cumsum = np.cumsum(tp)fp_cumsum = np.cumsum(fp)recalls = tp_cumsum / gt_countprecisions = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-6)# 計算PR曲線下面積ap = 0for t in np.arange(0, 1.1, 0.1):mask = recalls >= tif np.any(mask):p = np.max(precisions[mask])else:p = 0ap += p / 11return ap# 使用示例
if __name__ == "__main__":evaluator = DetectionEvaluator(iou_threshold=0.5)# 示例1: 正常情況gt_boxes = {'cat': [[10, 10, 50, 50], [100, 100, 150, 150]], # 類別0的兩個真實框'dog': [[200, 200, 250, 250]] # 類別1的一個真實框}pred_boxes = {'cat': [[12, 12, 48, 48, 0.9], [105, 105, 155, 155, 0.8]], # 類別0的兩個預測框'dog': [[210, 210, 240, 240, 0.7], [300, 300, 350, 350, 0.6]] # 類別1的兩個預測框}evaluator.evaluate_image(gt_boxes, pred_boxes)# 示例2: 真實框為空的情況gt_boxes_empty = {}pred_boxes_normal = {'cat': [[10, 10, 50, 50, 0.9]]}evaluator.evaluate_image(gt_boxes_empty, pred_boxes_normal)# 示例3: 預測框為空的情況gt_boxes_normal = {'cat': [[10, 10, 50, 50]]}pred_boxes_empty = {}evaluator.evaluate_image(gt_boxes_normal, pred_boxes_empty)# 示例4: 標簽存在但真實框為空的情況gt_boxes_with_empty_class = {'bird': [], # 類別0存在但真實框為空'dog': [[200, 200, 250, 250]]}pred_boxes_normal = {'cat': [[10, 10, 50, 50, 0.9]], # 類別0的預測框將被視為FP'dog': [[210, 210, 240, 240, 0.7]]}# 示例4: 標簽存在但真實框為空的情況gt_boxes_with_empty_class = {'dog': [[200, 200, 250, 250]]}pred_boxes_normal = {'cat': [[10, 10, 50, 50, 0.9]], # 類別0的預測框將被視為FP'dog': [[210, 210, 240, 240, 0.7]],'bird':[[10, 33, 22, 50, 0.9],[110, 323, 222, 520, 0.3]]}evaluator.evaluate_image(gt_boxes_with_empty_class, pred_boxes_normal)# 計算最終指標metrics = evaluator.calculate_metrics()import jsonprint("評估結果:", json.dumps(metrics, indent=4,ensure_ascii=False))
? ? ? ? 代碼可直接運行,如果需要評估本地txt結果和標注xml結果的相關檢出率,誤檢率和map,可自行根據驗證輸入調用相關函數即可。