Fix MeanAverageRecall compute mAR@K using top-K detections per image [COCO-compliant] (#2136)

stop1one · pre-commit-ci[bot] · Borda · web-flow · commit 124be19928e4 · 2026-02-12T17:59:11.000+09:00
* Supersedes #1967 * fix: COCO-compliant mAR calculation * Add complex test of mAP * fix(metrics): cast optional detections fields in mAR metric for mypy * Add `create_yolo_dataset` utility and refactor tests with fixtures for reusable scenarios * Refine docstrings for clarity and consistency, adding inline formatting and fixing typos in test helpers and metrics. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: jirka <6035284+Borda@users.noreply.github.com>
diff --git a/src/supervision/metrics/mean_average_recall.py b/src/supervision/metrics/mean_average_recall.py
@@ -2,7 +2,7 @@
 
 from copy import deepcopy
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -376,7 +376,7 @@ def _compute(
                     stats.append(
                         (
                             np.zeros((0, iou_thresholds.size), dtype=bool),
-                            np.zeros((0,), dtype=np.float32),
+                            np.zeros((0,), dtype=int),
                             np.zeros((0,), dtype=int),
                             targets.class_id,
                         )
@@ -406,12 +406,18 @@ def _compute(
                         iou,
                         iou_thresholds,
                     )
+
+                    sorted_indices = np.argsort(
+                        -cast(npt.NDArray[np.float32], predictions.confidence)
+                    )
                     stats.append(
                         (
-                            matches,
-                            predictions.confidence,
-                            predictions.class_id,
-                            targets.class_id,
+                            matches[sorted_indices],
+                            np.arange(len(predictions)),
+                            cast(npt.NDArray[np.int32], predictions.class_id)[
+                                sorted_indices
+                            ],
+                            cast(npt.NDArray[np.int32], targets.class_id),
                         )
                     )
 
@@ -448,28 +454,24 @@ def _compute(
     def _compute_average_recall_for_classes(
         self,
         matches: npt.NDArray[np.bool_],
-        prediction_confidence: npt.NDArray[np.float32],
+        prediction_indices: npt.NDArray[np.int32],
         prediction_class_ids: npt.NDArray[np.int32],
         true_class_ids: npt.NDArray[np.int32],
     ) -> tuple[
         npt.NDArray[np.float64],
         npt.NDArray[np.float64],
         npt.NDArray[np.int32],
     ]:
-        sorted_indices = np.argsort(-prediction_confidence)
-        matches = matches[sorted_indices]
-        prediction_class_ids = prediction_class_ids[sorted_indices]
         unique_classes, class_counts = np.unique(true_class_ids, return_counts=True)
 
         recalls_at_k = []
         for max_detections in self.max_detections:
             # Shape: PxTh,P,C,C -> CxThx3
             confusion_matrix = self._compute_confusion_matrix(
-                matches,
-                prediction_class_ids,
+                matches[prediction_indices < max_detections],
+                prediction_class_ids[prediction_indices < max_detections],
                 unique_classes,
                 class_counts,
-                max_detections=max_detections,
             )
 
             # Shape: CxThx3 -> CxTh
@@ -522,7 +524,6 @@ def _compute_confusion_matrix(
         sorted_prediction_class_ids: npt.NDArray[np.int32],
         unique_classes: npt.NDArray[np.int32],
         class_counts: npt.NDArray[np.int32],
-        max_detections: int | None = None,
     ) -> npt.NDArray[np.float64]:
         """
         Compute the confusion matrix for each class and IoU threshold.
@@ -567,7 +568,7 @@ class ids.
                 false_positives = np.full(num_thresholds, num_predictions)
                 false_negatives = np.zeros(num_thresholds)
             else:
-                limited_matches = sorted_matches[is_class][slice(max_detections)]
+                limited_matches = sorted_matches[is_class]
                 true_positives = limited_matches.sum(0)
 
                 false_positives = (1 - limited_matches).sum(0)
@@ -641,8 +642,6 @@ def _make_empty_content(self) -> npt.NDArray[Any]:
 
         raise ValueError(f"Invalid metric target: {self._metric_target}")
 
-        raise ValueError(f"Invalid metric target: {self._metric_target}")
-
     def _filter_detections_by_size(
         self, detections: Detections, size_category: ObjectSizeCategory
     ) -> Detections:
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -303,3 +303,114 @@ class _FakeYoloNasResults:
 
     def __init__(self, prediction: _FakeYoloNasPrediction):
         self.prediction = prediction
+
+
+def create_yolo_dataset(
+    dataset_dir: str,
+    num_images: int = 15,
+    image_size: tuple[int, int, int] = (640, 640, 3),
+    classes: list[str] | None = None,
+    objects_per_image_range: tuple[int, int] = (2, 4),
+    seed: int = 42,
+) -> dict[str, Any]:
+    """
+    Create a synthetic YOLO-format dataset on disk.
+
+    Generates dummy images with YOLO-format annotations, `data.yaml` file,
+    and directory structure suitable for testing dataset loading.
+
+    Args:
+        dataset_dir: Root directory path for the dataset.
+        num_images: Number of images to generate.
+        image_size: Image dimensions as `(width, height, channels)`.
+        classes: List of class names. Defaults to `["class_0", "class_1"]`.
+        objects_per_image_range: Range of objects per image as `(min, max)`.
+            Actual count will cycle through this range.
+        seed: Random seed for reproducibility.
+
+    Returns:
+        Dictionary containing:
+            - `tmpdir`: Root dataset directory path
+            - `images_dir`: Images directory path
+            - `labels_dir`: Labels directory path
+            - `data_yaml_path`: `data.yaml` file path
+            - `num_images`: Number of images created
+            - `image_size`: Image dimensions
+            - `image_annotations`: List of annotations per image
+
+    Examples:
+        >>> from pathlib import Path
+        >>> import tempfile
+        >>> tmpdir = Path(tempfile.mkdtemp())
+        >>> dataset_info = create_yolo_dataset(str(tmpdir), num_images=5)
+        >>> dataset_info["num_images"]
+        5
+        >>> len(list(Path(dataset_info["images_dir"]).glob("*.jpg")))
+        5
+    """
+    from pathlib import Path
+
+    import cv2
+
+    if classes is None:
+        classes = ["class_0", "class_1"]
+
+    np.random.seed(seed)
+
+    dataset_path = Path(dataset_dir)
+    images_dir = dataset_path / "images"
+    labels_dir = dataset_path / "labels"
+    images_dir.mkdir(parents=True, exist_ok=True)
+    labels_dir.mkdir(parents=True, exist_ok=True)
+
+    min_objects, max_objects = objects_per_image_range
+    num_classes = len(classes)
+    image_annotations = []
+
+    for i in range(num_images):
+        # Create dummy image
+        img_path = images_dir / f"image_{i:03d}.jpg"
+        img = np.zeros(image_size, dtype=np.uint8)
+        cv2.imwrite(str(img_path), img)
+
+        # Determine number of objects for this image
+        num_objects = min_objects + (i % (max_objects - min_objects + 1))
+        yolo_lines = []
+        objects = []
+
+        for j in range(num_objects):
+            class_id = j % num_classes
+            # Random positions with spacing to avoid overlap
+            x_center = 0.15 + (j * 0.25) + np.random.uniform(-0.05, 0.05)
+            y_center = 0.15 + (j * 0.2) + np.random.uniform(-0.05, 0.05)
+            width = 0.12
+            height = 0.12
+
+            # Clip to valid range [0, 1]
+            x_center = np.clip(x_center, width / 2, 1 - width / 2)
+            y_center = np.clip(y_center, height / 2, 1 - height / 2)
+
+            yolo_lines.append(
+                f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"
+            )
+            objects.append((class_id, x_center, y_center, width, height))
+
+        # Write YOLO annotation file
+        label_path = labels_dir / f"image_{i:03d}.txt"
+        label_path.write_text("".join(yolo_lines))
+        image_annotations.append(objects)
+
+    # Create data.yaml
+    data_yaml_path = dataset_path / "data.yaml"
+    yaml_content = "names:\n" + "\n".join(f"- {cls}" for cls in classes) + "\n"
+    data_yaml_path.write_text(yaml_content)
+
+    return {
+        "tmpdir": dataset_path,
+        "images_dir": str(images_dir),
+        "labels_dir": str(labels_dir),
+        "data_yaml_path": str(data_yaml_path),
+        "num_images": num_images,
+        "image_size": image_size,
+        "image_annotations": image_annotations,
+    }
diff --git a/tests/metrics/test_mean_average_recall.py b/tests/metrics/test_mean_average_recall.py