diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6e7fcd59e..02010285e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,4 +1,4 @@
# These owners will be the default owners for everything in
# the repo. They will be requested for review when someone
# opens a pull request.
-* @SkalskiP @onuralpszr
+* @SkalskiP @soumik12345
diff --git a/docs/changelog.md b/docs/changelog.md
index 73efcf0b1..4ebec5002 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,17 @@
# Changelog
+### 0.26.1 Jul 22, 2025
+
+- Fixed [1894](https://github.com/roboflow/supervision/pull/1894): Error in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`.
+
+- Fixed [1895](https://github.com/roboflow/supervision/pull/1895): `ID=0` bug in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment.
+
+- Fixed [1898](https://github.com/roboflow/supervision/pull/1898): Issue where [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) could return negative values when certain object size categories have no data.
+
+- Fixed [1901](https://github.com/roboflow/supervision/pull/1901): `match_metric` support for [`sv.Detections.with_nms`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.detection.core.Detections.with_nms).
+
+- Fixed [1906](https://github.com/roboflow/supervision/pull/1906): `border_thickness` parameter usage for [`sv.PercentageBarAnnotator`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.annotators.core.PercentageBarAnnotator).
+
### 0.26.0 Jul 16, 2025
!!! failure "Removed"
@@ -153,7 +165,7 @@
- Changed [#1786](https://github.com/roboflow/supervision/pull/1786): Significantly improved the speed of HSV color mapping in [`sv.HeatMapAnnotator`](https://supervision.roboflow.com/0.26.0/detection/annotators/#supervision.annotators.core.HeatMapAnnotator), achieving approximately 28x faster performance on 1920x1080 frames.
-- Fix [#1834](https://github.com/roboflow/supervision/pull/1834): Supervision’s [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.0/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) is now fully aligned with [pycocotools](https://github.com/ppwwyyxx/cocoapi), the official COCO evaluation tool, ensuring accurate and standardized metrics. This update enabled us to launch a new version of the [Computer Vision Model Leaderboard](https://leaderboard.roboflow.com/).
+- Fixed [#1834](https://github.com/roboflow/supervision/pull/1834): Supervision’s [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.0/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) is now fully aligned with [pycocotools](https://github.com/ppwwyyxx/cocoapi), the official COCO evaluation tool, ensuring accurate and standardized metrics. This update enabled us to launch a new version of the [Computer Vision Model Leaderboard](https://leaderboard.roboflow.com/).
```python
import supervision as sv
@@ -173,7 +185,7 @@
# Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.629
```
-- Fix [#1767](https://github.com/roboflow/supervision/pull/1767): Fixed losing `sv.Detections.data` when detections filtering.
+- Fixed [#1767](https://github.com/roboflow/supervision/pull/1767): Fixed losing `sv.Detections.data` when detections filtering.
### 0.25.0 Nov 12, 2024
@@ -557,9 +569,9 @@ detections = sv.Detections.from_sam(sam_result=sam_result)
- Changed [#1434](https://github.com/roboflow/supervision/pull/1434): [`InferenceSlicer`](https://supervision.roboflow.com/0.23.0/detection/tools/inference_slicer/) now features an `overlap_wh` parameter, making it easier to compute slice sizes when handling overlapping slices.
-- Fix [#1448](https://github.com/roboflow/supervision/pull/1448): Various annotator type issues have been resolved, supporting expanded error handling.
+- Fixed [#1448](https://github.com/roboflow/supervision/pull/1448): Various annotator type issues have been resolved, supporting expanded error handling.
-- Fix [#1348](https://github.com/roboflow/supervision/pull/1348): Introduced a new method for [seeking to a specific video frame](https://supervision.roboflow.com/0.23.0/utils/video/#supervision.utils.video.get_video_frames_generator), addressing cases where traditional seek methods were failing. It can be enabled with `iterative_seek=True`.
+- Fixed [#1348](https://github.com/roboflow/supervision/pull/1348): Introduced a new method for [seeking to a specific video frame](https://supervision.roboflow.com/0.23.0/utils/video/#supervision.utils.video.get_video_frames_generator), addressing cases where traditional seek methods were failing. It can be enabled with `iterative_seek=True`.
```python
import supervision as sv
@@ -572,7 +584,7 @@ for frame in sv.get_video_frames_generator(
...
```
-- Fix [#1424](https://github.com/roboflow/supervision/pull/1424): `plot_image` function now clearly indicates that the size is in inches.
+- Fixed [#1424](https://github.com/roboflow/supervision/pull/1424): `plot_image` function now clearly indicates that the size is in inches.
!!! failure "Removed"
@@ -1285,7 +1297,7 @@ array([
### 0.11.1 June 29, 2023
-- Fix [#165](https://github.com/roboflow/supervision/pull/165): [`as_folder_structure`](/0.11.1/dataset/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure) fails to save [`sv.ClassificationDataset`](/0.11.1/dataset/core/#classificationdataset) when it is result of inference.
+- Fixed [#165](https://github.com/roboflow/supervision/pull/165): [`as_folder_structure`](/0.11.1/dataset/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure) fails to save [`sv.ClassificationDataset`](/0.11.1/dataset/core/#classificationdataset) when it is result of inference.
### 0.11.0 June 28, 2023
@@ -1331,7 +1343,7 @@ array([
- Added [#162](https://github.com/roboflow/supervision/pull/162): additional `start` and `end` arguments to [`sv.get_video_frames_generator`](/0.11.0/utils/video/#get_video_frames_generator) allowing to generate frames only for a selected part of the video.
-- Fix [#157](https://github.com/roboflow/supervision/pull/157): incorrect loading of YOLO dataset class names from `data.yaml`.
+- Fixed [#157](https://github.com/roboflow/supervision/pull/157): incorrect loading of YOLO dataset class names from `data.yaml`.
### 0.10.0 June 14, 2023
diff --git a/pyproject.toml b/pyproject.toml
index 1b830697a..cae78492a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
name = "supervision"
description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
license = { text = "MIT" }
-version = "0.26.0"
+version = "0.26.1"
readme = "README.md"
requires-python = ">=3.9"
authors = [
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 6c142db19..0b7d4b763 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -2239,8 +2239,11 @@ def __init__(
self.position: Position = position
self.color_lookup: ColorLookup = color_lookup
- if border_thickness is None:
- self.border_thickness = int(0.15 * self.height)
+ self.border_thickness = (
+ border_thickness
+ if border_thickness is not None
+ else int(0.15 * self.height)
+ )
@ensure_cv2_image_for_annotation
def annotate(
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index efaa366a3..ea466b895 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1939,8 +1939,8 @@ def with_nms(
class_agnostic (bool): Whether to perform class-agnostic
non-maximum suppression. If True, the class_id of each detection
will be ignored. Defaults to False.
- overlap_metric (OverlapMetric): Metric used for measuring overlap between
- detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of
+ overlap between pairs of masks or boxes (e.g., IoU, IoS).
Returns:
Detections: A new Detections object containing the subset of detections
@@ -2003,8 +2003,8 @@ def with_nmm(
class_agnostic (bool): Whether to perform class-agnostic
non-maximum merging. If True, the class_id of each detection
will be ignored. Defaults to False.
- overlap_metric (OverlapMetric): Metric used for measuring overlap between
- detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of
+ overlap between pairs of masks or boxes (e.g., IoU, IoS).
Returns:
Detections: A new Detections object containing the subset of detections
diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index ff4a44b28..aaecccb3d 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -9,7 +9,7 @@
from supervision.config import ORIENTED_BOX_COORDINATES
from supervision.detection.core import Detections
from supervision.detection.utils.boxes import move_boxes, move_oriented_boxes
-from supervision.detection.utils.iou_and_nms import OverlapFilter
+from supervision.detection.utils.iou_and_nms import OverlapFilter, OverlapMetric
from supervision.detection.utils.masks import move_masks
from supervision.utils.image import crop_image
from supervision.utils.internal import (
@@ -75,8 +75,8 @@ class InferenceSlicer:
filtering or merging overlapping detections in slices.
iou_threshold (float): Intersection over Union (IoU) threshold
used when filtering by overlap.
- match_metric (str): Metric used for matching detections in slices.
- "IOU" or "IOS". Defaults "IOU".
+ overlap_metric (Union[OverlapMetric, str]): Metric used for matching detections
+ in slices.
callback (Callable): A function that performs inference on a given image
slice and returns detections.
thread_workers (int): Number of threads for parallel execution.
@@ -96,7 +96,7 @@ def __init__(
overlap_wh: tuple[int, int] | None = None,
overlap_filter: OverlapFilter | str = OverlapFilter.NON_MAX_SUPPRESSION,
iou_threshold: float = 0.5,
- match_metric: str = "IOU",
+ overlap_metric: OverlapMetric | str = OverlapMetric.IOU,
thread_workers: int = 1,
):
if overlap_ratio_wh is not None:
@@ -112,7 +112,7 @@ def __init__(
self.slice_wh = slice_wh
self.iou_threshold = iou_threshold
- self.match_metric = match_metric
+ self.overlap_metric = OverlapMetric.from_value(overlap_metric)
self.overlap_filter = OverlapFilter.from_value(overlap_filter)
self.callback = callback
self.thread_workers = thread_workers
@@ -173,11 +173,11 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
return merged
elif self.overlap_filter == OverlapFilter.NON_MAX_SUPPRESSION:
return merged.with_nms(
- threshold=self.iou_threshold, match_metric=self.match_metric
+ threshold=self.iou_threshold, overlap_metric=self.overlap_metric
)
elif self.overlap_filter == OverlapFilter.NON_MAX_MERGE:
return merged.with_nmm(
- threshold=self.iou_threshold, match_metric=self.match_metric
+ threshold=self.iou_threshold, overlap_metric=self.overlap_metric
)
else:
warnings.warn(
diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index 56bdff71b..1a6f80bc5 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -164,7 +164,8 @@ def box_iou_batch(
`shape = (N, 4)` where `N` is number of true objects.
boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.
`shape = (M, 4)` where `M` is number of detected objects.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of boxes (e.g., IoU, IoS).
Returns:
np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
@@ -381,7 +382,8 @@ def _mask_iou_batch_split(
Args:
masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.
masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of masks (e.g., IoU, IoS).
Returns:
np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`.
@@ -433,7 +435,8 @@ def mask_iou_batch(
Args:
masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.
masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of masks (e.g., IoU, IoS).
memory_limit (int): memory limit in MB, default is 1024 * 5 MB (5GB).
Returns:
@@ -492,7 +495,8 @@ def mask_non_max_suppression(
dimensions of each mask.
iou_threshold (float): The intersection-over-union threshold
to use for non-maximum suppression.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of masks (e.g., IoU, IoS).
mask_dimension (int): The dimension to which the masks should be
resized before computing IOU values. Defaults to 640.
@@ -543,7 +547,8 @@ def box_non_max_suppression(
or `(x_min, y_min, x_max, y_max, score, class)`.
iou_threshold (float): The intersection-over-union threshold
to use for non-maximum suppression.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of boxes (e.g., IoU, IoS).
Returns:
np.ndarray: A boolean array indicating which predictions to keep after n
@@ -603,7 +608,8 @@ def _group_overlapping_masks(
the predictions.
iou_threshold (float): The intersection-over-union threshold
to use for non-maximum suppression. Defaults to 0.5.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of masks (e.g., IoU, IoS).
Returns:
list[list[int]]: Groups of prediction indices be merged.
@@ -664,7 +670,8 @@ def mask_non_max_merge(
to use for non-maximum suppression.
mask_dimension (int): The dimension to which the masks should be
resized before computing IOU values. Defaults to 640.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of masks (e.g., IoU, IoS).
Returns:
np.ndarray: A boolean array indicating which predictions to keep after
@@ -717,7 +724,8 @@ def _group_overlapping_boxes(
and the confidence scores.
iou_threshold (float): The intersection-over-union threshold
to use for non-maximum suppression. Defaults to 0.5.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of boxes (e.g., IoU, IoS).
Returns:
list[list[int]]: Groups of prediction indices be merged.
@@ -765,7 +773,8 @@ def box_non_max_merge(
detections of different classes to be merged.
iou_threshold (float): The intersection-over-union threshold
to use for non-maximum suppression. Defaults to 0.5.
- overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+ overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+ between pairs of boxes (e.g., IoU, IoS).
Returns:
list[list[int]]: Groups of prediction indices be merged.
diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 5100f39ac..43a3116ac 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -57,7 +57,11 @@ class and IoU threshold. Shape: `(num_target_classes, num_iou_thresholds)`
@property
def map50_95(self) -> float:
- return self.mAP_scores.mean()
+ valid_scores = self.mAP_scores[self.mAP_scores > -1]
+ if len(valid_scores) > 0:
+ return valid_scores.mean()
+ else:
+ return -1
@property
def map50(self) -> float:
@@ -421,6 +425,11 @@ def load_predictions(self, predictions: list[dict]) -> EvaluationDataset:
if not isinstance(predictions, list):
raise ValueError("results must be a list")
+ # Handle empty predictions
+ if len(predictions) == 0:
+ predictions_dataset.dataset["annotations"] = []
+ return predictions_dataset
+
ids = [pred["image_id"] for pred in predictions]
# Make sure the image ids from predictions exist in the current dataset
@@ -909,6 +918,35 @@ def _accumulate(self):
np.array(score_at_recall)
)
+ self.results = {
+ "params": self.params,
+ "counts": [
+ num_iou_thresholds,
+ num_recall_thresholds,
+ num_categories,
+ num_area_ranges,
+ num_max_detections,
+ ],
+ "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ "precision": precision,
+ "recall": recall,
+ "scores": scores,
+ }
+
+ # Helper function to compute average precision while handling -1 sentinel values
+ def compute_average_precision(precision_slice):
+ """Compute average precision while handling -1 sentinel values."""
+ masked = np.ma.masked_equal(precision_slice, -1)
+ if masked.count() == 0:
+ # All values are -1 (no data)
+ return np.full(num_iou_thresholds, -1), np.full(
+ (num_categories, num_iou_thresholds), -1
+ )
+ else:
+ mAP_scores = np.ma.filled(masked.mean(axis=(1, 2)), -1)
+ ap_per_class = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
+ return mAP_scores, ap_per_class
+
# Average precision over all sizes, 100 max detections
area_range_idx = list(ObjectSize).index(ObjectSize.ALL)
max_100_dets_idx = self.params.max_dets.index(100)
@@ -917,10 +955,9 @@ def _accumulate(self):
:, :, :, area_range_idx, max_100_dets_idx
]
# mAP over thresholds (dimension=num_thresholds)
- mAP_scores_all_sizes = average_precision_all_sizes.mean(axis=(1, 2))
- # AP per class
- ap_per_class_all_sizes = average_precision_all_sizes.mean(axis=1).transpose(
- 1, 0
+ # Use masked array to exclude -1 values when computing mean
+ mAP_scores_all_sizes, ap_per_class_all_sizes = compute_average_precision(
+ average_precision_all_sizes
)
# Average precision for SMALL objects and 100 max detections
@@ -928,24 +965,27 @@ def _accumulate(self):
average_precision_small = precision[
:, :, :, small_area_range_idx, max_100_dets_idx
]
- mAP_scores_small = average_precision_small.mean(axis=(1, 2))
- ap_per_class_small = average_precision_small.mean(axis=1).transpose(1, 0)
+ mAP_scores_small, ap_per_class_small = compute_average_precision(
+ average_precision_small
+ )
# Average precision for MEDIUM objects and 100 max detections
medium_area_range_idx = list(ObjectSize).index(ObjectSize.MEDIUM)
average_precision_medium = precision[
:, :, :, medium_area_range_idx, max_100_dets_idx
]
- mAP_scores_medium = average_precision_medium.mean(axis=(1, 2))
- ap_per_class_medium = average_precision_medium.mean(axis=1).transpose(1, 0)
+ mAP_scores_medium, ap_per_class_medium = compute_average_precision(
+ average_precision_medium
+ )
# Average precision for LARGE objects and 100 max detections
large_area_range_idx = list(ObjectSize).index(ObjectSize.LARGE)
average_precision_large = precision[
:, :, :, large_area_range_idx, max_100_dets_idx
]
- mAP_scores_large = average_precision_large.mean(axis=(1, 2))
- ap_per_class_large = average_precision_large.mean(axis=1).transpose(1, 0)
+ mAP_scores_large, ap_per_class_large = compute_average_precision(
+ average_precision_large
+ )
self.results = {
"params": self.params,
@@ -1221,7 +1261,7 @@ def _prepare_targets(self, targets):
for image_id, image_targets in enumerate(targets):
if self._image_indices is not None:
image_id = self._image_indices[image_id]
- for target in image_targets:
+ for target_idx, target in enumerate(image_targets):
xyxy = target[0] # or xyxy = prediction[0]; xyxy[2:4] -= xyxy[0:2]
xywh = [xyxy[0], xyxy[1], xyxy[2] - xyxy[0], xyxy[3] - xyxy[1]]
# Get "area" and "iscrowd" (default 0) from data
@@ -1231,13 +1271,20 @@ def _prepare_targets(self, targets):
category_id = self._class_mapping[target[3].item()]
else:
category_id = target[3].item()
+
+ # Use area from data if available (e.g., COCO datasets)
+ # Otherwise use Detections.area property
+ area = data.get("area") if data else None
+ if area is None:
+ area = image_targets.area[target_idx]
+
dict_annotation = {
- "area": data.get("area", 0),
+ "area": area,
"iscrowd": data.get("iscrowd", 0),
"image_id": image_id,
"bbox": xywh,
"category_id": category_id,
- "id": len(annotations), # incrementally increase the id
+ "id": len(annotations) + 1, # Start IDs from 1 (0 means no match)
}
annotations.append(dict_annotation)
# Category list
diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
new file mode 100644
index 000000000..f17cb1cec
--- /dev/null
+++ b/test/metrics/test_mean_average_precision.py
@@ -0,0 +1,328 @@
+"""
+Tests for Mean Average Precision ID=0 bug fix
+"""
+
+import numpy as np
+
+from supervision.detection.core import Detections
+from supervision.metrics.mean_average_precision import MeanAveragePrecision
+
+
+def test_single_perfect_detection():
+ """Test that single perfect detection gets 1.0 mAP (not 0.0 due to ID=0 bug)"""
+ # Perfect detection (identical prediction and target)
+ detection = Detections(
+ xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.9]),
+ )
+
+ metric = MeanAveragePrecision()
+ metric.update([detection], [detection])
+ result = metric.compute()
+
+ # Should be perfect 1.0 mAP, not 0.0 due to ID=0 bug
+ assert abs(result.map50_95 - 1.0) < 1e-6
+
+
+def test_multiple_perfect_detections():
+ """Test that multiple perfect detections get 1.0 mAP"""
+ # Multiple perfect detections in one image
+ detections = Detections(
+ xyxy=np.array(
+ [[10, 10, 50, 50], [100, 100, 140, 140], [200, 200, 240, 240]],
+ dtype=np.float64,
+ ),
+ class_id=np.array([0, 0, 0]),
+ confidence=np.array([0.9, 0.9, 0.9]),
+ )
+
+ metric = MeanAveragePrecision()
+ metric.update([detections], [detections])
+ result = metric.compute()
+
+ # Should be perfect 1.0 mAP
+ assert abs(result.map50_95 - 1.0) < 1e-6
+
+
+def test_batch_updates_perfect_detections():
+ """Test that batch updates with perfect detections get 1.0 mAP"""
+ # Single perfect detection for multiple batch updates
+ detection = Detections(
+ xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.9]),
+ )
+
+ metric = MeanAveragePrecision()
+ # Add 3 batch updates
+ metric.update([detection], [detection])
+ metric.update([detection], [detection])
+ metric.update([detection], [detection])
+ result = metric.compute()
+
+ # Should be perfect 1.0 mAP across all batches
+ assert abs(result.map50_95 - 1.0) < 1e-6
+
+
+def test_scenario_1_success_case_imperfect_match():
+ """Scenario 1: Success Case with imperfect match"""
+ # Small object (class 0) - area = 30*30 = 900 < 1024
+ small_perfect = Detections(
+ xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.95]),
+ data={"area": np.array([900])},
+ )
+
+ # Medium object (class 1) - area = 50*50 = 2500 (between 1024 and 9216)
+ medium_target = Detections(
+ xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
+ class_id=np.array([1]),
+ data={"area": np.array([2500])},
+ )
+ medium_pred = Detections(
+ xyxy=np.array([[12, 12, 60, 60]], dtype=np.float64), # Slightly off
+ class_id=np.array([1]),
+ confidence=np.array([0.9]),
+ data={"area": np.array([2304])}, # 48*48
+ )
+
+ # Large objects (classes 0, 1, 2) - area = 100*100 = 10000 > 9216
+ large_targets = Detections(
+ xyxy=np.array(
+ [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+ dtype=np.float64,
+ ),
+ class_id=np.array([2, 0, 1]),
+ data={"area": np.array([10000, 10000, 10000])},
+ )
+ large_preds = Detections(
+ xyxy=np.array(
+ [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+ dtype=np.float64,
+ ),
+ class_id=np.array([2, 0, 1]),
+ confidence=np.array([0.9, 0.9, 0.9]),
+ data={"area": np.array([10000, 10000, 10000])},
+ )
+
+ metric = MeanAveragePrecision()
+ metric.update([small_perfect], [small_perfect])
+ metric.update([medium_pred], [medium_target])
+ metric.update([large_preds], [large_targets])
+ result = metric.compute()
+
+ # Should be close to 0.9 (slightly less than perfect due to medium object)
+ assert 0.85 < result.map50_95 < 0.98 # Adjusted upper bound
+ assert result.medium_objects.map50_95 < 1.0 # Medium should be less than perfect
+
+
+def test_scenario_2_missed_detection():
+ """Scenario 2: GT Present, No Prediction (Missed Detection)"""
+ # Small object - area = 30*30 = 900 < 1024
+ small_detection = Detections(
+ xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.95]),
+ data={"area": np.array([900])},
+ )
+
+ # Medium object - area = 50*50 = 2500 (between 1024 and 9216) - missed
+ medium_target = Detections(
+ xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
+ class_id=np.array([1]),
+ data={"area": np.array([2500])},
+ )
+ no_medium_pred = Detections.empty()
+
+ # Large objects - area = 100*100 = 10000 > 9216
+ large_detections = Detections(
+ xyxy=np.array(
+ [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+ dtype=np.float64,
+ ),
+ class_id=np.array([2, 0, 1]),
+ confidence=np.array([0.9, 0.9, 0.9]),
+ data={"area": np.array([10000, 10000, 10000])},
+ )
+
+ metric = MeanAveragePrecision()
+ metric.update([small_detection], [small_detection])
+ metric.update([no_medium_pred], [medium_target])
+ metric.update([large_detections], [large_detections])
+ result = metric.compute()
+
+ # Medium objects should have 0.0 mAP (missed detection)
+ assert abs(result.medium_objects.map50_95 - 0.0) < 1e-6
+
+
+def test_scenario_3_false_positive():
+ """Scenario 3: No GT, Prediction Present (False Positive)"""
+ # Small object - area = 30*30 = 900 < 1024
+ small_detection = Detections(
+ xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.95]),
+ data={"area": np.array([900])},
+ )
+
+ # Medium object - area = 50*50 = 2500 - false positive (no GT)
+ medium_pred = Detections(
+ xyxy=np.array([[12, 12, 62, 62]], dtype=np.float64),
+ class_id=np.array([1]),
+ confidence=np.array([0.9]),
+ data={"area": np.array([2500])},
+ )
+ no_medium_target = Detections.empty()
+
+ # Large objects - area = 100*100 = 10000 > 9216
+ large_detections = Detections(
+ xyxy=np.array(
+ [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+ dtype=np.float64,
+ ),
+ class_id=np.array([2, 0, 1]),
+ confidence=np.array([0.9, 0.9, 0.9]),
+ data={"area": np.array([10000, 10000, 10000])},
+ )
+
+ metric = MeanAveragePrecision()
+ metric.update([small_detection], [small_detection])
+ metric.update([medium_pred], [no_medium_target])
+ metric.update([large_detections], [large_detections])
+ result = metric.compute()
+
+ # Medium objects should have -1 mAP (false positive, matching pycocotools)
+ assert result.medium_objects.map50_95 == -1
+
+
+def test_scenario_4_no_data():
+ """Scenario 4: No GT, No Prediction (Category has no data)"""
+ # Small object - area = 30*30 = 900 < 1024
+ small_detection = Detections(
+ xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.95]),
+ data={"area": np.array([900])},
+ )
+
+ # Medium object - no data at all
+ no_medium = Detections.empty()
+
+ # Large objects - area = 100*100 = 10000 > 9216 - only classes 0 and 2 (no class 1)
+ large_targets = Detections(
+ xyxy=np.array(
+ [
+ [10, 10, 110, 110],
+ [120, 120, 220, 220],
+ ],
+ dtype=np.float64,
+ ),
+ class_id=np.array([2, 0]),
+ data={"area": np.array([10000, 10000])},
+ )
+ large_preds = Detections(
+ xyxy=np.array(
+ [
+ [10, 10, 110, 110],
+ [120, 120, 220, 220],
+ ],
+ dtype=np.float64,
+ ),
+ class_id=np.array([2, 0]),
+ confidence=np.array([0.9, 0.9]),
+ data={"area": np.array([10000, 10000])},
+ )
+
+ metric = MeanAveragePrecision()
+ metric.update([small_detection], [small_detection])
+ metric.update([no_medium], [no_medium])
+ metric.update([large_preds], [large_targets])
+ result = metric.compute()
+
+ # Should NOT have negative mAP values for overall
+ assert result.map50_95 >= 0.0
+ # Medium objects should have -1 mAP (no data, matching pycocotools)
+ assert result.medium_objects.map50_95 == -1
+
+
+def test_scenario_5_only_one_class_present():
+ """Scenario 5: Only 1 of 3 Classes Present (Perfect Match)"""
+ # Only class 0 objects with perfect matches
+ detections_class_0 = [
+ Detections(
+ xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.95]),
+ ),
+ Detections(
+ xyxy=np.array([[20, 20, 230, 130]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.9]),
+ ),
+ ]
+
+ metric = MeanAveragePrecision()
+ for det in detections_class_0:
+ metric.update([det], [det])
+
+ result = metric.compute()
+
+ # Should be 1.0 mAP (perfect match for the only class present)
+ assert abs(result.map50_95 - 1.0) < 1e-6
+ assert abs(result.map50 - 1.0) < 1e-6
+ assert abs(result.map75 - 1.0) < 1e-6
+
+
+def test_mixed_classes_with_missing_detections():
+ """Test mixed scenario with some classes having no detections"""
+ # Class 0: Perfect detection
+ class_0_det = Detections(
+ xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
+ class_id=np.array([0]),
+ confidence=np.array([0.9]),
+ )
+
+ # Class 1: GT exists but no prediction
+ class_1_target = Detections(
+ xyxy=np.array([[60, 60, 100, 100]], dtype=np.float64),
+ class_id=np.array([1]),
+ )
+ class_1_pred = Detections.empty()
+
+ # Class 2: Prediction exists but no GT (false positive)
+ class_2_pred = Detections(
+ xyxy=np.array([[110, 110, 150, 150]], dtype=np.float64),
+ class_id=np.array([2]),
+ confidence=np.array([0.8]),
+ )
+ class_2_target = Detections.empty()
+
+ metric = MeanAveragePrecision()
+ metric.update([class_0_det], [class_0_det])
+ metric.update([class_1_pred], [class_1_target])
+ metric.update([class_2_pred], [class_2_target])
+ result = metric.compute()
+
+ # Should not have negative mAP
+ assert result.map50_95 >= 0.0
+ # Should be less than 1.0 due to missed detection and false positive
+ assert result.map50_95 < 1.0
+
+
+def test_empty_predictions_and_targets():
+ """Test completely empty predictions and targets"""
+ metric = MeanAveragePrecision()
+ metric.update([Detections.empty()], [Detections.empty()])
+ result = metric.compute()
+
+ # Should return -1 for no data (matching pycocotools behavior)
+ assert result.map50_95 == -1
+ assert result.map50 == -1
+ assert result.map75 == -1
+
+ # All object size categories should also be -1
+ assert result.small_objects.map50_95 == -1
+ assert result.medium_objects.map50_95 == -1
+ assert result.large_objects.map50_95 == -1
diff --git a/test/metrics/test_mean_average_precision_area.py b/test/metrics/test_mean_average_precision_area.py
new file mode 100644
index 000000000..832620961
--- /dev/null
+++ b/test/metrics/test_mean_average_precision_area.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from supervision.detection.core import Detections
+from supervision.metrics.mean_average_precision import MeanAveragePrecision
+
+
+class TestMeanAveragePrecisionArea:
+ """Test area calculation in MeanAveragePrecision."""
+
+ @pytest.mark.parametrize(
+ "xyxy, expected_areas, expected_size_maps",
+ [
+ (
+ np.array(
+ [
+ [10, 10, 40, 40], # Small: 900
+ [100, 100, 200, 150], # Medium: 5000
+ [300, 300, 500, 400], # Large: 20000
+ ],
+ dtype=np.float32,
+ ),
+ [900.0, 5000.0, 20000.0],
+ {"small": True, "medium": True, "large": True},
+ ),
+ (
+ np.array([[0, 0, 10, 10]], dtype=np.float32), # Small: 100
+ [100.0],
+ {"small": True, "medium": False, "large": False},
+ ),
+ (
+ np.array([[0, 0, 50, 50]], dtype=np.float32), # Medium: 2500
+ [2500.0],
+ {"small": False, "medium": True, "large": False},
+ ),
+ (
+ np.array([[0, 0, 100, 100]], dtype=np.float32), # Large: 10000
+ [10000.0],
+ {"small": False, "medium": False, "large": True},
+ ),
+ ],
+ )
+ def test_area_calculation_and_size_specific_map(
+ self, xyxy, expected_areas, expected_size_maps
+ ):
+ """Test area calculation and size-specific mAP functionality."""
+ gt = Detections(
+ xyxy=xyxy,
+ class_id=np.arange(len(xyxy)),
+ )
+ pred = Detections(
+ xyxy=gt.xyxy.copy(),
+ class_id=gt.class_id.copy(),
+ confidence=np.full(len(xyxy), 0.9),
+ )
+
+ map_metric = MeanAveragePrecision()
+ map_metric.update([pred], [gt])
+
+ # Test area calculation
+ prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
+ areas = [ann["area"] for ann in prepared_targets["annotations"]]
+ assert np.allclose(areas, expected_areas), (
+ f"Expected {expected_areas}, got {areas}"
+ )
+
+ # Test size-specific mAP
+ result = map_metric.compute()
+
+ if expected_size_maps["small"]:
+ assert result.small_objects.map50 > 0.9, (
+ "Small objects should have high mAP"
+ )
+ else:
+ assert result.small_objects.map50 == -1.0, (
+ "Small objects should have no data"
+ )
+
+ if expected_size_maps["medium"]:
+ assert result.medium_objects.map50 > 0.9, (
+ "Medium objects should have high mAP"
+ )
+ else:
+ assert result.medium_objects.map50 == -1.0, (
+ "Medium objects should have no data"
+ )
+
+ if expected_size_maps["large"]:
+ assert result.large_objects.map50 > 0.9, (
+ "Large objects should have high mAP"
+ )
+ else:
+ assert result.large_objects.map50 == -1.0, (
+ "Large objects should have no data"
+ )
+
+ def test_area_preserved_from_data(self):
+ """Test that area from data field is preserved (COCO case)."""
+ gt = Detections(
+ xyxy=np.array(
+ [[100, 100, 200, 150]], dtype=np.float32
+ ), # Would calculate to 5000
+ class_id=np.array([0]),
+ )
+ # Override with custom area
+ gt.data = {"area": np.array([3000.0])}
+
+ pred = Detections(
+ xyxy=gt.xyxy.copy(),
+ class_id=gt.class_id.copy(),
+ confidence=np.array([0.9]),
+ )
+ pred.data = {"area": np.array([3000.0])}
+
+ map_metric = MeanAveragePrecision()
+ map_metric.update([pred], [gt])
+
+ prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
+ used_area = prepared_targets["annotations"][0]["area"]
+
+ assert np.allclose(used_area, 3000.0), (
+ f"Should use provided area 3000.0, got {used_area}"
+ )
+
+ # Verify it's different from what would be calculated
+ calculated_area = (200 - 100) * (150 - 100) # 100 * 50 = 5000
+ assert not np.allclose(used_area, calculated_area), (
+ "Should use provided area, not calculated"
+ )