> 文章列表 > 【深度学习】关于xml文件中不存在 difficult 参数导致的 AP 为 0

【深度学习】关于xml文件中不存在 difficult 参数导致的 AP 为 0

【深度学习】关于xml文件中不存在 difficult 参数导致的 AP 为 0

文章参考自 关于eval.py中MAP,AP计算为0的问题,已经解决!!!(若不是类别名字大小写问题,可尝试参考本文)

YOLOX训练VOC格式数据集出现 AP=0 可查看:解决YOLOX训练时AP为0

一、问题描述:

今天在和一位同学重新学习研究 YOLOX 的过程中,发现所用数据的 AP 为 0,
之前也发过一篇关于 YOLOX AP 为 0 的解决方案,但此次出现该问题主要的原因是 xml标签 不存在 difficult 这一参数导致的 voc_eval 计算 AP 出错

二、解决问题:

那既然已知是 difficult 参数导致的问题,那么就对它进行相应的修改

1. 首先注释掉从xml文件获取 difficult 参数这一操作

    for obj in tree.findall("object"):obj_struct = {}obj_struct["name"] = obj.find("name").textobj_struct["pose"] = obj.find("pose").textobj_struct["truncated"] = int(obj.find("truncated").text)# obj_struct["difficult"] = int(obj.find("difficult").text)bbox = obj.find("bndbox")

2. 修改 difficult 的值
由于不存在 difficult ,所以会导致 difficult = np.array([x["difficult"] for x in R]).astype(bool) 出错,进而导致 npos = npos + sum(~difficult) 出错,再导致 rec = tp / float(npos) 出错,然后 fptp 的计算也会出错

        if ovmax > ovthresh:if not R["difficult"][jmax]:if not R["det"][jmax]:tp[d] = 1.0R["det"][jmax] = 1else:fp[d] = 1.0else:fp[d] = 1.0

因为无 difficult,意味着 difficult 全为 0,那我们只需要将 difficult 赋值为 长度为 R 的全零数组即可,即:

difficult = np.zeros(len(R)).astype(np.bool)

至于 npos 可修改也可不做修改,

# 自增,非difficult样本数量,如果数据集没有 difficult,npos数量 就是 gt数量。
npos = npos + sum(~difficult)

若做修改:

# len(R) 即为 gt数量
npos = npos + len(R)

完成上述修改,再去运行程序,即可获得正确的 AP 值


下面是修改后的完整的 voc_eval.py

#!/usr/bin/env python3
# Code are based on
# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
# Copyright (c) Bharath Hariharan.
# Copyright (c) Megvii, Inc. and its affiliates.import os
import pickle
import xml.etree.ElementTree as ETimport numpy as npdef parse_rec(filename):"""Parse a PASCAL VOC xml file"""tree = ET.parse(filename)objects = []for obj in tree.findall("object"):obj_struct = {}obj_struct["name"] = obj.find("name").textobj_struct["pose"] = obj.find("pose").textobj_struct["truncated"] = int(obj.find("truncated").text)# obj_struct["difficult"] = int(obj.find("difficult").text)bbox = obj.find("bndbox")obj_struct["bbox"] = [int(bbox.find("xmin").text),int(bbox.find("ymin").text),int(bbox.find("xmax").text),int(bbox.find("ymax").text),]objects.append(obj_struct)return objectsdef voc_ap(rec, prec, use_07_metric=False):"""Compute VOC AP given precision and recall.If use_07_metric is true, uses theVOC 07 11 point method (default:False)."""if use_07_metric:# 11 point metricap = 0.0for t in np.arange(0.0, 1.1, 0.1):if np.sum(rec >= t) == 0:p = 0else:p = np.max(prec[rec >= t])ap = ap + p / 11.0else:# correct AP calculation# first append sentinel values at the endmrec = np.concatenate(([0.0], rec, [1.0]))mpre = np.concatenate(([0.0], prec, [0.0]))# compute the precision envelopefor i in range(mpre.size - 1, 0, -1):mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])# to calculate area under PR curve, look for points# where X axis (recall) changes valuei = np.where(mrec[1:] != mrec[:-1])[0]# and sum (\\Delta recall) * precap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])return apdef voc_eval(detpath,annopath,imagesetfile,classname,cachedir,ovthresh=0.5,use_07_metric=False,
):# first load gtif not os.path.isdir(cachedir):os.mkdir(cachedir)cachefile = os.path.join(cachedir, "annots.pkl")# read list of imageswith open(imagesetfile, "r") as f:lines = f.readlines()imagenames = [x.strip() for x in lines]if not os.path.isfile(cachefile):# load annotsrecs = {}for i, imagename in enumerate(imagenames):recs[imagename] = parse_rec(annopath.format(imagename))if i % 100 == 0:print(f"Reading annotation for {i + 1}/{len(imagenames)}")# saveprint(f"Saving cached annotations to {cachefile}")with open(cachefile, "wb") as f:pickle.dump(recs, f)else:# loadwith open(cachefile, "rb") as f:recs = pickle.load(f)# extract gt objects for this classclass_recs = {}npos = 0for imagename in imagenames:R = [obj for obj in recs[imagename] if obj["name"] == classname]bbox = np.array([x["bbox"] for x in R])# difficult = np.array([x["difficult"] for x in R]).astype(bool)difficult = np.zeros(len(R)).astype(np.bool)det = [False] * len(R)# npos = npos + len(R)npos = npos + sum(~difficult)class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}# read detsdetfile = detpath.format(classname)with open(detfile, "r") as f:lines = f.readlines()if len(lines) == 0:return 0, 0, 0splitlines = [x.strip().split(" ") for x in lines]image_ids = [x[0] for x in splitlines]confidence = np.array([float(x[1]) for x in splitlines])BB = np.array([[float(z) for z in x[2:]] for x in splitlines])# sort by confidencesorted_ind = np.argsort(-confidence)BB = BB[sorted_ind, :]image_ids = [image_ids[x] for x in sorted_ind]# go down dets and mark TPs and FPsnd = len(image_ids)tp = np.zeros(nd)fp = np.zeros(nd)for d in range(nd):R = class_recs[image_ids[d]]bb = BB[d, :].astype(float)ovmax = -np.infBBGT = R["bbox"].astype(float)if BBGT.size > 0:# compute overlaps# intersectionixmin = np.maximum(BBGT[:, 0], bb[0])iymin = np.maximum(BBGT[:, 1], bb[1])ixmax = np.minimum(BBGT[:, 2], bb[2])iymax = np.minimum(BBGT[:, 3], bb[3])iw = np.maximum(ixmax - ixmin + 1.0, 0.0)ih = np.maximum(iymax - iymin + 1.0, 0.0)inters = iw * ih# unionuni = ((bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)+ (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - inters)overlaps = inters / uniovmax = np.max(overlaps)jmax = np.argmax(overlaps)if ovmax > ovthresh:if not R["difficult"][jmax]:if not R["det"][jmax]:tp[d] = 1.0R["det"][jmax] = 1else:fp[d] = 1.0else:fp[d] = 1.0# compute precision recallfp = np.cumsum(fp)tp = np.cumsum(tp)rec = tp / float(npos)# avoid divide by zero in case the first detection matches a difficult# ground truthprec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)ap = voc_ap(rec, prec, use_07_metric)return rec, prec, ap