Increase limit on number of detections per image in {COCO,LVIS}Evaluator

Lydia Chan · facebook-github-bot · commit 23486b6f5034 · 2021-09-02T13:47:17.000-07:00
Summary: ## Context - The current limit on the number of detections per image (`K`) in LVIS is 300. - Implementing AP_pool/AP_fixed requires removing this default limit on `K` - [Literature](https://arxiv.org/pdf/2102.01066.pdf) has shown that increasing `K` correlates with AP gains ## This Diff - Changed limit on number of detections per image (`K`) to be customizable for LVIS and COCO through `TEST.DETECTIONS_PER_IMAGE` in the config - For COCO: - Maintain the default `max_dets_per_image` to be [1, 10, 100] as from [COCOEval](https://www.internalfb.com/code/fbsource/[88bb57c3054a]/fbcode/deeplearning/projects/cocoApi/PythonAPI/pycocotools/cocoeval.py?lines=28-29) - Allow users to input a custom integer for `TEST.DETECTIONS_PER_IMAGE` in the config, and use [1, 10, `TEST.DETECTIONS_PER_IMAGE`] for COCOEval - For LVIS: - Maintain the default `max_dets_per_image` to be 300 as from [LVISEval](https://www.internalfb.com/code/fbsource/[f6b86d023721]/fbcode/deeplearning/projects/lvisApi/lvis/eval.py?lines=528-529) - Allow users to input a custom integer for `TEST.DETECTIONS_PER_IMAGE` in the config, and use this in LVISEval - Added `COCOevalMaxDets` for evaluating AP with the custom limit on number of detections per image (since default `COCOeval` uses 100 as limit on detections per image for evaluating AP) ## Inference Runs using this Diff - Performed inference using `K = {300, 1000, 10000, 100000}` - Launched fblearner flows for object detector baseline models with N1055536 (LVIS) and N1055756 (COCO) - Recorded [results of running inference](https://docs.google.com/spreadsheets/d/1rgdjN2KvxcYfKCkGUC4tMw0XQJ5oZL0dwjOIh84YRg8/edit?usp=sharing) Reviewed By: ppwwyyxx Differential Revision: D30077359 fbshipit-source-id: 372eb5e0d7c228fb77fe23bf80d53597ec66287b
diff --git a/detectron2/evaluation/coco_evaluation.py b/detectron2/evaluation/coco_evaluation.py
@@ -47,6 +47,7 @@ def __init__(
         distributed=True,
         output_dir=None,
         *,
+        max_dets_per_image=None,
         use_fast_impl=True,
         kpt_oks_sigmas=(),
     ):
@@ -71,6 +72,10 @@ def __init__(
                 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
                    contains all the results in the format they are produced by the model.
                 2. "coco_instances_results.json" a json file in COCO's result format.
+            max_dets_per_image (int): limit on the maximum number of detections per image.
+                By default in COCO, this limit is to 100, but this can be customized
+                to be greater, as is needed in evaluation metrics AP fixed and AP pool
+                (see https://arxiv.org/pdf/2102.01066.pdf)
             use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP.
                 Although the results should be very close to the official implementation in COCO
                 API, it is still recommended to compute results with the official API for use in
@@ -85,6 +90,17 @@ def __init__(
         self._output_dir = output_dir
         self._use_fast_impl = use_fast_impl
 
+        # COCOeval requires the limit on the number of detections per image (maxDets) to be a list
+        # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the
+        # 3rd element (100) is used as the limit on the number of detections per image when
+        # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval,
+        # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults.
+        if max_dets_per_image is None:
+            max_dets_per_image = [1, 10, 100]
+        else:
+            max_dets_per_image = [1, 10, max_dets_per_image]
+        self._max_dets_per_image = max_dets_per_image
+
         if tasks is not None and isinstance(tasks, CfgNode):
             kpt_oks_sigmas = (
                 tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas
@@ -239,6 +255,7 @@ def _eval_predictions(self, predictions, img_ids=None):
                     kpt_oks_sigmas=self._kpt_oks_sigmas,
                     use_fast_impl=self._use_fast_impl,
                     img_ids=img_ids,
+                    max_dets_per_image=self._max_dets_per_image,
                 )
                 if len(coco_results) > 0
                 else None  # cocoapi does not handle empty results very well
@@ -533,7 +550,13 @@ def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area
 
 
 def _evaluate_predictions_on_coco(
-    coco_gt, coco_results, iou_type, kpt_oks_sigmas=None, use_fast_impl=True, img_ids=None
+    coco_gt,
+    coco_results,
+    iou_type,
+    kpt_oks_sigmas=None,
+    use_fast_impl=True,
+    img_ids=None,
+    max_dets_per_image=None,
 ):
     """
     Evaluate the coco results using COCOEval API.
@@ -551,6 +574,19 @@ def _evaluate_predictions_on_coco(
 
     coco_dt = coco_gt.loadRes(coco_results)
     coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type)
+    # For COCO, the default max_dets_per_image is [1, 10, 100].
+    if max_dets_per_image is None:
+        max_dets_per_image = [1, 10, 100]  # Default from COCOEval
+    else:
+        assert (
+            len(max_dets_per_image) >= 3
+        ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3"
+        # In the case that user supplies a custom input for max_dets_per_image,
+        # apply COCOevalMaxDets to evaluate AP with the custom input.
+        if max_dets_per_image[2] != 100:
+            coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type)
+    coco_eval.params.maxDets = max_dets_per_image
+
     if img_ids is not None:
         coco_eval.params.imgIds = img_ids
 
@@ -577,3 +613,94 @@ def _evaluate_predictions_on_coco(
     coco_eval.summarize()
 
     return coco_eval
+
+
+class COCOevalMaxDets(COCOeval):
+    """
+    Modified version of COCOeval for evaluating AP with a custom
+    maxDets (by default for COCO, maxDets is 100)
+    """
+
+    def summarize(self):
+        """
+        Compute and display summary metrics for evaluation results given
+        a custom value for  max_dets_per_image
+        """
+
+        def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
+            p = self.params
+            iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
+            titleStr = "Average Precision" if ap == 1 else "Average Recall"
+            typeStr = "(AP)" if ap == 1 else "(AR)"
+            iouStr = (
+                "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+                if iouThr is None
+                else "{:0.2f}".format(iouThr)
+            )
+
+            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+            if ap == 1:
+                # dimension of precision: [TxRxKxAxM]
+                s = self.eval["precision"]
+                # IoU
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:, :, :, aind, mind]
+            else:
+                # dimension of recall: [TxKxAxM]
+                s = self.eval["recall"]
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:, :, aind, mind]
+            if len(s[s > -1]) == 0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s > -1])
+            print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
+            return mean_s
+
+        def _summarizeDets():
+            stats = np.zeros((12,))
+            # Evaluate AP using the custom limit on maximum detections per image
+            stats[0] = _summarize(1, maxDets=self.params.maxDets[2])
+            stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
+            stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
+            stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
+            stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
+            stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
+            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+            stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
+            stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
+            stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
+            return stats
+
+        def _summarizeKps():
+            stats = np.zeros((10,))
+            stats[0] = _summarize(1, maxDets=20)
+            stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
+            stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
+            stats[3] = _summarize(1, maxDets=20, areaRng="medium")
+            stats[4] = _summarize(1, maxDets=20, areaRng="large")
+            stats[5] = _summarize(0, maxDets=20)
+            stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
+            stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
+            stats[8] = _summarize(0, maxDets=20, areaRng="medium")
+            stats[9] = _summarize(0, maxDets=20, areaRng="large")
+            return stats
+
+        if not self.eval:
+            raise Exception("Please run accumulate() first")
+        iouType = self.params.iouType
+        if iouType == "segm" or iouType == "bbox":
+            summarize = _summarizeDets
+        elif iouType == "keypoints":
+            summarize = _summarizeKps
+        self.stats = summarize()
+
+    def __str__(self):
+        self.summarize()
diff --git a/detectron2/evaluation/lvis_evaluation.py b/detectron2/evaluation/lvis_evaluation.py
@@ -25,7 +25,15 @@ class LVISEvaluator(DatasetEvaluator):
     LVIS's metrics and evaluation API.
     """
 
-    def __init__(self, dataset_name, tasks=None, distributed=True, output_dir=None):
+    def __init__(
+        self,
+        dataset_name,
+        tasks=None,
+        distributed=True,
+        output_dir=None,
+        *,
+        max_dets_per_image=None,
+    ):
         """
         Args:
             dataset_name (str): name of the dataset to be evaluated.
@@ -37,6 +45,8 @@ def __init__(self, dataset_name, tasks=None, distributed=True, output_dir=None):
             distributed (True): if True, will collect results from all ranks for evaluation.
                 Otherwise, will evaluate the results in the current process.
             output_dir (str): optional, an output directory to dump results.
+            max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
+                This limit, by default of the LVIS dataset, is 300.
         """
         from lvis import LVIS
 
@@ -53,6 +63,7 @@ def __init__(self, dataset_name, tasks=None, distributed=True, output_dir=None):
 
         self._distributed = distributed
         self._output_dir = output_dir
+        self._max_dets_per_image = max_dets_per_image
 
         self._cpu_device = torch.device("cpu")
 
@@ -158,7 +169,11 @@ def _eval_predictions(self, predictions):
         self._logger.info("Evaluating predictions ...")
         for task in sorted(tasks):
             res = _evaluate_predictions_on_lvis(
-                self._lvis_api, lvis_results, task, class_names=self._metadata.get("thing_classes")
+                self._lvis_api,
+                lvis_results,
+                task,
+                max_dets_per_image=self._max_dets_per_image,
+                class_names=self._metadata.get("thing_classes"),
             )
             self._results[task] = res
 
@@ -313,11 +328,14 @@ def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area
     }
 
 
-def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None):
+def _evaluate_predictions_on_lvis(
+    lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None
+):
     """
     Args:
         iou_type (str):
-        kpt_oks_sigmas (list[float]):
+        max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
+            This limit, by default of the LVIS dataset, is 300.
         class_names (None or list[str]): if provided, will use it to predict
             per-category AP.
 
@@ -344,9 +362,13 @@ def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=N
         for c in lvis_results:
             c.pop("bbox", None)
 
+    if max_dets_per_image is None:
+        max_dets_per_image = 300  # Default for LVIS dataset
+
     from lvis import LVISEval, LVISResults
 
-    lvis_results = LVISResults(lvis_gt, lvis_results)
+    logger.info(f"Evaluating with max detections per image = {max_dets_per_image}")
+    lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image)
     lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
     lvis_eval.run()
     lvis_eval.print_results()