diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6e7fcd59e..02010285e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,4 +1,4 @@ # These owners will be the default owners for everything in # the repo. They will be requested for review when someone # opens a pull request. -* @SkalskiP @onuralpszr +* @SkalskiP @soumik12345 diff --git a/docs/changelog.md b/docs/changelog.md index 73efcf0b1..4ebec5002 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,17 @@ # Changelog +### 0.26.1 Jul 22, 2025 + +- Fixed [1894](https://github.com/roboflow/supervision/pull/1894): Error in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`. + +- Fixed [1895](https://github.com/roboflow/supervision/pull/1895): `ID=0` bug in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment. + +- Fixed [1898](https://github.com/roboflow/supervision/pull/1898): Issue where [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) could return negative values when certain object size categories have no data. + +- Fixed [1901](https://github.com/roboflow/supervision/pull/1901): `match_metric` support for [`sv.Detections.with_nms`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.detection.core.Detections.with_nms). + +- Fixed [1906](https://github.com/roboflow/supervision/pull/1906): `border_thickness` parameter usage for [`sv.PercentageBarAnnotator`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.annotators.core.PercentageBarAnnotator). + ### 0.26.0 Jul 16, 2025 !!! failure "Removed" @@ -153,7 +165,7 @@ - Changed [#1786](https://github.com/roboflow/supervision/pull/1786): Significantly improved the speed of HSV color mapping in [`sv.HeatMapAnnotator`](https://supervision.roboflow.com/0.26.0/detection/annotators/#supervision.annotators.core.HeatMapAnnotator), achieving approximately 28x faster performance on 1920x1080 frames. -- Fix [#1834](https://github.com/roboflow/supervision/pull/1834): Supervision’s [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.0/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) is now fully aligned with [pycocotools](https://github.com/ppwwyyxx/cocoapi), the official COCO evaluation tool, ensuring accurate and standardized metrics. This update enabled us to launch a new version of the [Computer Vision Model Leaderboard](https://leaderboard.roboflow.com/). +- Fixed [#1834](https://github.com/roboflow/supervision/pull/1834): Supervision’s [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.0/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) is now fully aligned with [pycocotools](https://github.com/ppwwyyxx/cocoapi), the official COCO evaluation tool, ensuring accurate and standardized metrics. This update enabled us to launch a new version of the [Computer Vision Model Leaderboard](https://leaderboard.roboflow.com/). ```python import supervision as sv @@ -173,7 +185,7 @@ # Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.629 ``` -- Fix [#1767](https://github.com/roboflow/supervision/pull/1767): Fixed losing `sv.Detections.data` when detections filtering. +- Fixed [#1767](https://github.com/roboflow/supervision/pull/1767): Fixed losing `sv.Detections.data` when detections filtering. ### 0.25.0 Nov 12, 2024 @@ -557,9 +569,9 @@ detections = sv.Detections.from_sam(sam_result=sam_result) - Changed [#1434](https://github.com/roboflow/supervision/pull/1434): [`InferenceSlicer`](https://supervision.roboflow.com/0.23.0/detection/tools/inference_slicer/) now features an `overlap_wh` parameter, making it easier to compute slice sizes when handling overlapping slices. -- Fix [#1448](https://github.com/roboflow/supervision/pull/1448): Various annotator type issues have been resolved, supporting expanded error handling. +- Fixed [#1448](https://github.com/roboflow/supervision/pull/1448): Various annotator type issues have been resolved, supporting expanded error handling. -- Fix [#1348](https://github.com/roboflow/supervision/pull/1348): Introduced a new method for [seeking to a specific video frame](https://supervision.roboflow.com/0.23.0/utils/video/#supervision.utils.video.get_video_frames_generator), addressing cases where traditional seek methods were failing. It can be enabled with `iterative_seek=True`. +- Fixed [#1348](https://github.com/roboflow/supervision/pull/1348): Introduced a new method for [seeking to a specific video frame](https://supervision.roboflow.com/0.23.0/utils/video/#supervision.utils.video.get_video_frames_generator), addressing cases where traditional seek methods were failing. It can be enabled with `iterative_seek=True`. ```python import supervision as sv @@ -572,7 +584,7 @@ for frame in sv.get_video_frames_generator( ... ``` -- Fix [#1424](https://github.com/roboflow/supervision/pull/1424): `plot_image` function now clearly indicates that the size is in inches. +- Fixed [#1424](https://github.com/roboflow/supervision/pull/1424): `plot_image` function now clearly indicates that the size is in inches. !!! failure "Removed" @@ -1285,7 +1297,7 @@ array([ ### 0.11.1 June 29, 2023 -- Fix [#165](https://github.com/roboflow/supervision/pull/165): [`as_folder_structure`](/0.11.1/dataset/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure) fails to save [`sv.ClassificationDataset`](/0.11.1/dataset/core/#classificationdataset) when it is result of inference. +- Fixed [#165](https://github.com/roboflow/supervision/pull/165): [`as_folder_structure`](/0.11.1/dataset/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure) fails to save [`sv.ClassificationDataset`](/0.11.1/dataset/core/#classificationdataset) when it is result of inference. ### 0.11.0 June 28, 2023 @@ -1331,7 +1343,7 @@ array([ - Added [#162](https://github.com/roboflow/supervision/pull/162): additional `start` and `end` arguments to [`sv.get_video_frames_generator`](/0.11.0/utils/video/#get_video_frames_generator) allowing to generate frames only for a selected part of the video. -- Fix [#157](https://github.com/roboflow/supervision/pull/157): incorrect loading of YOLO dataset class names from `data.yaml`. +- Fixed [#157](https://github.com/roboflow/supervision/pull/157): incorrect loading of YOLO dataset class names from `data.yaml`. ### 0.10.0 June 14, 2023 diff --git a/pyproject.toml b/pyproject.toml index 1b830697a..cae78492a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "supervision" description = "A set of easy-to-use utils that will come in handy in any Computer Vision project" license = { text = "MIT" } -version = "0.26.0" +version = "0.26.1" readme = "README.md" requires-python = ">=3.9" authors = [ diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py index 6c142db19..0b7d4b763 100644 --- a/supervision/annotators/core.py +++ b/supervision/annotators/core.py @@ -2239,8 +2239,11 @@ def __init__( self.position: Position = position self.color_lookup: ColorLookup = color_lookup - if border_thickness is None: - self.border_thickness = int(0.15 * self.height) + self.border_thickness = ( + border_thickness + if border_thickness is not None + else int(0.15 * self.height) + ) @ensure_cv2_image_for_annotation def annotate( diff --git a/supervision/detection/core.py b/supervision/detection/core.py index efaa366a3..ea466b895 100644 --- a/supervision/detection/core.py +++ b/supervision/detection/core.py @@ -1939,8 +1939,8 @@ def with_nms( class_agnostic (bool): Whether to perform class-agnostic non-maximum suppression. If True, the class_id of each detection will be ignored. Defaults to False. - overlap_metric (OverlapMetric): Metric used for measuring overlap between - detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of + overlap between pairs of masks or boxes (e.g., IoU, IoS). Returns: Detections: A new Detections object containing the subset of detections @@ -2003,8 +2003,8 @@ def with_nmm( class_agnostic (bool): Whether to perform class-agnostic non-maximum merging. If True, the class_id of each detection will be ignored. Defaults to False. - overlap_metric (OverlapMetric): Metric used for measuring overlap between - detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of + overlap between pairs of masks or boxes (e.g., IoU, IoS). Returns: Detections: A new Detections object containing the subset of detections diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py index ff4a44b28..aaecccb3d 100644 --- a/supervision/detection/tools/inference_slicer.py +++ b/supervision/detection/tools/inference_slicer.py @@ -9,7 +9,7 @@ from supervision.config import ORIENTED_BOX_COORDINATES from supervision.detection.core import Detections from supervision.detection.utils.boxes import move_boxes, move_oriented_boxes -from supervision.detection.utils.iou_and_nms import OverlapFilter +from supervision.detection.utils.iou_and_nms import OverlapFilter, OverlapMetric from supervision.detection.utils.masks import move_masks from supervision.utils.image import crop_image from supervision.utils.internal import ( @@ -75,8 +75,8 @@ class InferenceSlicer: filtering or merging overlapping detections in slices. iou_threshold (float): Intersection over Union (IoU) threshold used when filtering by overlap. - match_metric (str): Metric used for matching detections in slices. - "IOU" or "IOS". Defaults "IOU". + overlap_metric (Union[OverlapMetric, str]): Metric used for matching detections + in slices. callback (Callable): A function that performs inference on a given image slice and returns detections. thread_workers (int): Number of threads for parallel execution. @@ -96,7 +96,7 @@ def __init__( overlap_wh: tuple[int, int] | None = None, overlap_filter: OverlapFilter | str = OverlapFilter.NON_MAX_SUPPRESSION, iou_threshold: float = 0.5, - match_metric: str = "IOU", + overlap_metric: OverlapMetric | str = OverlapMetric.IOU, thread_workers: int = 1, ): if overlap_ratio_wh is not None: @@ -112,7 +112,7 @@ def __init__( self.slice_wh = slice_wh self.iou_threshold = iou_threshold - self.match_metric = match_metric + self.overlap_metric = OverlapMetric.from_value(overlap_metric) self.overlap_filter = OverlapFilter.from_value(overlap_filter) self.callback = callback self.thread_workers = thread_workers @@ -173,11 +173,11 @@ def callback(image_slice: np.ndarray) -> sv.Detections: return merged elif self.overlap_filter == OverlapFilter.NON_MAX_SUPPRESSION: return merged.with_nms( - threshold=self.iou_threshold, match_metric=self.match_metric + threshold=self.iou_threshold, overlap_metric=self.overlap_metric ) elif self.overlap_filter == OverlapFilter.NON_MAX_MERGE: return merged.with_nmm( - threshold=self.iou_threshold, match_metric=self.match_metric + threshold=self.iou_threshold, overlap_metric=self.overlap_metric ) else: warnings.warn( diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py index 56bdff71b..1a6f80bc5 100644 --- a/supervision/detection/utils/iou_and_nms.py +++ b/supervision/detection/utils/iou_and_nms.py @@ -164,7 +164,8 @@ def box_iou_batch( `shape = (N, 4)` where `N` is number of true objects. boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes. `shape = (M, 4)` where `M` is number of detected objects. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of boxes (e.g., IoU, IoS). Returns: np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`. @@ -381,7 +382,8 @@ def _mask_iou_batch_split( Args: masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks. masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of masks (e.g., IoU, IoS). Returns: np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`. @@ -433,7 +435,8 @@ def mask_iou_batch( Args: masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks. masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of masks (e.g., IoU, IoS). memory_limit (int): memory limit in MB, default is 1024 * 5 MB (5GB). Returns: @@ -492,7 +495,8 @@ def mask_non_max_suppression( dimensions of each mask. iou_threshold (float): The intersection-over-union threshold to use for non-maximum suppression. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of masks (e.g., IoU, IoS). mask_dimension (int): The dimension to which the masks should be resized before computing IOU values. Defaults to 640. @@ -543,7 +547,8 @@ def box_non_max_suppression( or `(x_min, y_min, x_max, y_max, score, class)`. iou_threshold (float): The intersection-over-union threshold to use for non-maximum suppression. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of boxes (e.g., IoU, IoS). Returns: np.ndarray: A boolean array indicating which predictions to keep after n @@ -603,7 +608,8 @@ def _group_overlapping_masks( the predictions. iou_threshold (float): The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of masks (e.g., IoU, IoS). Returns: list[list[int]]: Groups of prediction indices be merged. @@ -664,7 +670,8 @@ def mask_non_max_merge( to use for non-maximum suppression. mask_dimension (int): The dimension to which the masks should be resized before computing IOU values. Defaults to 640. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of masks (e.g., IoU, IoS). Returns: np.ndarray: A boolean array indicating which predictions to keep after @@ -717,7 +724,8 @@ def _group_overlapping_boxes( and the confidence scores. iou_threshold (float): The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of boxes (e.g., IoU, IoS). Returns: list[list[int]]: Groups of prediction indices be merged. @@ -765,7 +773,8 @@ def box_non_max_merge( detections of different classes to be merged. iou_threshold (float): The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5. - overlap_metric (OverlapMetric): Metric used for matching detections in slices. + overlap_metric (OverlapMetric): Metric used to compute the degree of overlap + between pairs of boxes (e.g., IoU, IoS). Returns: list[list[int]]: Groups of prediction indices be merged. diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py index 5100f39ac..43a3116ac 100644 --- a/supervision/metrics/mean_average_precision.py +++ b/supervision/metrics/mean_average_precision.py @@ -57,7 +57,11 @@ class and IoU threshold. Shape: `(num_target_classes, num_iou_thresholds)` @property def map50_95(self) -> float: - return self.mAP_scores.mean() + valid_scores = self.mAP_scores[self.mAP_scores > -1] + if len(valid_scores) > 0: + return valid_scores.mean() + else: + return -1 @property def map50(self) -> float: @@ -421,6 +425,11 @@ def load_predictions(self, predictions: list[dict]) -> EvaluationDataset: if not isinstance(predictions, list): raise ValueError("results must be a list") + # Handle empty predictions + if len(predictions) == 0: + predictions_dataset.dataset["annotations"] = [] + return predictions_dataset + ids = [pred["image_id"] for pred in predictions] # Make sure the image ids from predictions exist in the current dataset @@ -909,6 +918,35 @@ def _accumulate(self): np.array(score_at_recall) ) + self.results = { + "params": self.params, + "counts": [ + num_iou_thresholds, + num_recall_thresholds, + num_categories, + num_area_ranges, + num_max_detections, + ], + "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "precision": precision, + "recall": recall, + "scores": scores, + } + + # Helper function to compute average precision while handling -1 sentinel values + def compute_average_precision(precision_slice): + """Compute average precision while handling -1 sentinel values.""" + masked = np.ma.masked_equal(precision_slice, -1) + if masked.count() == 0: + # All values are -1 (no data) + return np.full(num_iou_thresholds, -1), np.full( + (num_categories, num_iou_thresholds), -1 + ) + else: + mAP_scores = np.ma.filled(masked.mean(axis=(1, 2)), -1) + ap_per_class = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0) + return mAP_scores, ap_per_class + # Average precision over all sizes, 100 max detections area_range_idx = list(ObjectSize).index(ObjectSize.ALL) max_100_dets_idx = self.params.max_dets.index(100) @@ -917,10 +955,9 @@ def _accumulate(self): :, :, :, area_range_idx, max_100_dets_idx ] # mAP over thresholds (dimension=num_thresholds) - mAP_scores_all_sizes = average_precision_all_sizes.mean(axis=(1, 2)) - # AP per class - ap_per_class_all_sizes = average_precision_all_sizes.mean(axis=1).transpose( - 1, 0 + # Use masked array to exclude -1 values when computing mean + mAP_scores_all_sizes, ap_per_class_all_sizes = compute_average_precision( + average_precision_all_sizes ) # Average precision for SMALL objects and 100 max detections @@ -928,24 +965,27 @@ def _accumulate(self): average_precision_small = precision[ :, :, :, small_area_range_idx, max_100_dets_idx ] - mAP_scores_small = average_precision_small.mean(axis=(1, 2)) - ap_per_class_small = average_precision_small.mean(axis=1).transpose(1, 0) + mAP_scores_small, ap_per_class_small = compute_average_precision( + average_precision_small + ) # Average precision for MEDIUM objects and 100 max detections medium_area_range_idx = list(ObjectSize).index(ObjectSize.MEDIUM) average_precision_medium = precision[ :, :, :, medium_area_range_idx, max_100_dets_idx ] - mAP_scores_medium = average_precision_medium.mean(axis=(1, 2)) - ap_per_class_medium = average_precision_medium.mean(axis=1).transpose(1, 0) + mAP_scores_medium, ap_per_class_medium = compute_average_precision( + average_precision_medium + ) # Average precision for LARGE objects and 100 max detections large_area_range_idx = list(ObjectSize).index(ObjectSize.LARGE) average_precision_large = precision[ :, :, :, large_area_range_idx, max_100_dets_idx ] - mAP_scores_large = average_precision_large.mean(axis=(1, 2)) - ap_per_class_large = average_precision_large.mean(axis=1).transpose(1, 0) + mAP_scores_large, ap_per_class_large = compute_average_precision( + average_precision_large + ) self.results = { "params": self.params, @@ -1221,7 +1261,7 @@ def _prepare_targets(self, targets): for image_id, image_targets in enumerate(targets): if self._image_indices is not None: image_id = self._image_indices[image_id] - for target in image_targets: + for target_idx, target in enumerate(image_targets): xyxy = target[0] # or xyxy = prediction[0]; xyxy[2:4] -= xyxy[0:2] xywh = [xyxy[0], xyxy[1], xyxy[2] - xyxy[0], xyxy[3] - xyxy[1]] # Get "area" and "iscrowd" (default 0) from data @@ -1231,13 +1271,20 @@ def _prepare_targets(self, targets): category_id = self._class_mapping[target[3].item()] else: category_id = target[3].item() + + # Use area from data if available (e.g., COCO datasets) + # Otherwise use Detections.area property + area = data.get("area") if data else None + if area is None: + area = image_targets.area[target_idx] + dict_annotation = { - "area": data.get("area", 0), + "area": area, "iscrowd": data.get("iscrowd", 0), "image_id": image_id, "bbox": xywh, "category_id": category_id, - "id": len(annotations), # incrementally increase the id + "id": len(annotations) + 1, # Start IDs from 1 (0 means no match) } annotations.append(dict_annotation) # Category list diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py new file mode 100644 index 000000000..f17cb1cec --- /dev/null +++ b/test/metrics/test_mean_average_precision.py @@ -0,0 +1,328 @@ +""" +Tests for Mean Average Precision ID=0 bug fix +""" + +import numpy as np + +from supervision.detection.core import Detections +from supervision.metrics.mean_average_precision import MeanAveragePrecision + + +def test_single_perfect_detection(): + """Test that single perfect detection gets 1.0 mAP (not 0.0 due to ID=0 bug)""" + # Perfect detection (identical prediction and target) + detection = Detections( + xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.9]), + ) + + metric = MeanAveragePrecision() + metric.update([detection], [detection]) + result = metric.compute() + + # Should be perfect 1.0 mAP, not 0.0 due to ID=0 bug + assert abs(result.map50_95 - 1.0) < 1e-6 + + +def test_multiple_perfect_detections(): + """Test that multiple perfect detections get 1.0 mAP""" + # Multiple perfect detections in one image + detections = Detections( + xyxy=np.array( + [[10, 10, 50, 50], [100, 100, 140, 140], [200, 200, 240, 240]], + dtype=np.float64, + ), + class_id=np.array([0, 0, 0]), + confidence=np.array([0.9, 0.9, 0.9]), + ) + + metric = MeanAveragePrecision() + metric.update([detections], [detections]) + result = metric.compute() + + # Should be perfect 1.0 mAP + assert abs(result.map50_95 - 1.0) < 1e-6 + + +def test_batch_updates_perfect_detections(): + """Test that batch updates with perfect detections get 1.0 mAP""" + # Single perfect detection for multiple batch updates + detection = Detections( + xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.9]), + ) + + metric = MeanAveragePrecision() + # Add 3 batch updates + metric.update([detection], [detection]) + metric.update([detection], [detection]) + metric.update([detection], [detection]) + result = metric.compute() + + # Should be perfect 1.0 mAP across all batches + assert abs(result.map50_95 - 1.0) < 1e-6 + + +def test_scenario_1_success_case_imperfect_match(): + """Scenario 1: Success Case with imperfect match""" + # Small object (class 0) - area = 30*30 = 900 < 1024 + small_perfect = Detections( + xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.95]), + data={"area": np.array([900])}, + ) + + # Medium object (class 1) - area = 50*50 = 2500 (between 1024 and 9216) + medium_target = Detections( + xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64), + class_id=np.array([1]), + data={"area": np.array([2500])}, + ) + medium_pred = Detections( + xyxy=np.array([[12, 12, 60, 60]], dtype=np.float64), # Slightly off + class_id=np.array([1]), + confidence=np.array([0.9]), + data={"area": np.array([2304])}, # 48*48 + ) + + # Large objects (classes 0, 1, 2) - area = 100*100 = 10000 > 9216 + large_targets = Detections( + xyxy=np.array( + [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]], + dtype=np.float64, + ), + class_id=np.array([2, 0, 1]), + data={"area": np.array([10000, 10000, 10000])}, + ) + large_preds = Detections( + xyxy=np.array( + [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]], + dtype=np.float64, + ), + class_id=np.array([2, 0, 1]), + confidence=np.array([0.9, 0.9, 0.9]), + data={"area": np.array([10000, 10000, 10000])}, + ) + + metric = MeanAveragePrecision() + metric.update([small_perfect], [small_perfect]) + metric.update([medium_pred], [medium_target]) + metric.update([large_preds], [large_targets]) + result = metric.compute() + + # Should be close to 0.9 (slightly less than perfect due to medium object) + assert 0.85 < result.map50_95 < 0.98 # Adjusted upper bound + assert result.medium_objects.map50_95 < 1.0 # Medium should be less than perfect + + +def test_scenario_2_missed_detection(): + """Scenario 2: GT Present, No Prediction (Missed Detection)""" + # Small object - area = 30*30 = 900 < 1024 + small_detection = Detections( + xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.95]), + data={"area": np.array([900])}, + ) + + # Medium object - area = 50*50 = 2500 (between 1024 and 9216) - missed + medium_target = Detections( + xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64), + class_id=np.array([1]), + data={"area": np.array([2500])}, + ) + no_medium_pred = Detections.empty() + + # Large objects - area = 100*100 = 10000 > 9216 + large_detections = Detections( + xyxy=np.array( + [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]], + dtype=np.float64, + ), + class_id=np.array([2, 0, 1]), + confidence=np.array([0.9, 0.9, 0.9]), + data={"area": np.array([10000, 10000, 10000])}, + ) + + metric = MeanAveragePrecision() + metric.update([small_detection], [small_detection]) + metric.update([no_medium_pred], [medium_target]) + metric.update([large_detections], [large_detections]) + result = metric.compute() + + # Medium objects should have 0.0 mAP (missed detection) + assert abs(result.medium_objects.map50_95 - 0.0) < 1e-6 + + +def test_scenario_3_false_positive(): + """Scenario 3: No GT, Prediction Present (False Positive)""" + # Small object - area = 30*30 = 900 < 1024 + small_detection = Detections( + xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.95]), + data={"area": np.array([900])}, + ) + + # Medium object - area = 50*50 = 2500 - false positive (no GT) + medium_pred = Detections( + xyxy=np.array([[12, 12, 62, 62]], dtype=np.float64), + class_id=np.array([1]), + confidence=np.array([0.9]), + data={"area": np.array([2500])}, + ) + no_medium_target = Detections.empty() + + # Large objects - area = 100*100 = 10000 > 9216 + large_detections = Detections( + xyxy=np.array( + [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]], + dtype=np.float64, + ), + class_id=np.array([2, 0, 1]), + confidence=np.array([0.9, 0.9, 0.9]), + data={"area": np.array([10000, 10000, 10000])}, + ) + + metric = MeanAveragePrecision() + metric.update([small_detection], [small_detection]) + metric.update([medium_pred], [no_medium_target]) + metric.update([large_detections], [large_detections]) + result = metric.compute() + + # Medium objects should have -1 mAP (false positive, matching pycocotools) + assert result.medium_objects.map50_95 == -1 + + +def test_scenario_4_no_data(): + """Scenario 4: No GT, No Prediction (Category has no data)""" + # Small object - area = 30*30 = 900 < 1024 + small_detection = Detections( + xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.95]), + data={"area": np.array([900])}, + ) + + # Medium object - no data at all + no_medium = Detections.empty() + + # Large objects - area = 100*100 = 10000 > 9216 - only classes 0 and 2 (no class 1) + large_targets = Detections( + xyxy=np.array( + [ + [10, 10, 110, 110], + [120, 120, 220, 220], + ], + dtype=np.float64, + ), + class_id=np.array([2, 0]), + data={"area": np.array([10000, 10000])}, + ) + large_preds = Detections( + xyxy=np.array( + [ + [10, 10, 110, 110], + [120, 120, 220, 220], + ], + dtype=np.float64, + ), + class_id=np.array([2, 0]), + confidence=np.array([0.9, 0.9]), + data={"area": np.array([10000, 10000])}, + ) + + metric = MeanAveragePrecision() + metric.update([small_detection], [small_detection]) + metric.update([no_medium], [no_medium]) + metric.update([large_preds], [large_targets]) + result = metric.compute() + + # Should NOT have negative mAP values for overall + assert result.map50_95 >= 0.0 + # Medium objects should have -1 mAP (no data, matching pycocotools) + assert result.medium_objects.map50_95 == -1 + + +def test_scenario_5_only_one_class_present(): + """Scenario 5: Only 1 of 3 Classes Present (Perfect Match)""" + # Only class 0 objects with perfect matches + detections_class_0 = [ + Detections( + xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.95]), + ), + Detections( + xyxy=np.array([[20, 20, 230, 130]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.9]), + ), + ] + + metric = MeanAveragePrecision() + for det in detections_class_0: + metric.update([det], [det]) + + result = metric.compute() + + # Should be 1.0 mAP (perfect match for the only class present) + assert abs(result.map50_95 - 1.0) < 1e-6 + assert abs(result.map50 - 1.0) < 1e-6 + assert abs(result.map75 - 1.0) < 1e-6 + + +def test_mixed_classes_with_missing_detections(): + """Test mixed scenario with some classes having no detections""" + # Class 0: Perfect detection + class_0_det = Detections( + xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64), + class_id=np.array([0]), + confidence=np.array([0.9]), + ) + + # Class 1: GT exists but no prediction + class_1_target = Detections( + xyxy=np.array([[60, 60, 100, 100]], dtype=np.float64), + class_id=np.array([1]), + ) + class_1_pred = Detections.empty() + + # Class 2: Prediction exists but no GT (false positive) + class_2_pred = Detections( + xyxy=np.array([[110, 110, 150, 150]], dtype=np.float64), + class_id=np.array([2]), + confidence=np.array([0.8]), + ) + class_2_target = Detections.empty() + + metric = MeanAveragePrecision() + metric.update([class_0_det], [class_0_det]) + metric.update([class_1_pred], [class_1_target]) + metric.update([class_2_pred], [class_2_target]) + result = metric.compute() + + # Should not have negative mAP + assert result.map50_95 >= 0.0 + # Should be less than 1.0 due to missed detection and false positive + assert result.map50_95 < 1.0 + + +def test_empty_predictions_and_targets(): + """Test completely empty predictions and targets""" + metric = MeanAveragePrecision() + metric.update([Detections.empty()], [Detections.empty()]) + result = metric.compute() + + # Should return -1 for no data (matching pycocotools behavior) + assert result.map50_95 == -1 + assert result.map50 == -1 + assert result.map75 == -1 + + # All object size categories should also be -1 + assert result.small_objects.map50_95 == -1 + assert result.medium_objects.map50_95 == -1 + assert result.large_objects.map50_95 == -1 diff --git a/test/metrics/test_mean_average_precision_area.py b/test/metrics/test_mean_average_precision_area.py new file mode 100644 index 000000000..832620961 --- /dev/null +++ b/test/metrics/test_mean_average_precision_area.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import numpy as np +import pytest + +from supervision.detection.core import Detections +from supervision.metrics.mean_average_precision import MeanAveragePrecision + + +class TestMeanAveragePrecisionArea: + """Test area calculation in MeanAveragePrecision.""" + + @pytest.mark.parametrize( + "xyxy, expected_areas, expected_size_maps", + [ + ( + np.array( + [ + [10, 10, 40, 40], # Small: 900 + [100, 100, 200, 150], # Medium: 5000 + [300, 300, 500, 400], # Large: 20000 + ], + dtype=np.float32, + ), + [900.0, 5000.0, 20000.0], + {"small": True, "medium": True, "large": True}, + ), + ( + np.array([[0, 0, 10, 10]], dtype=np.float32), # Small: 100 + [100.0], + {"small": True, "medium": False, "large": False}, + ), + ( + np.array([[0, 0, 50, 50]], dtype=np.float32), # Medium: 2500 + [2500.0], + {"small": False, "medium": True, "large": False}, + ), + ( + np.array([[0, 0, 100, 100]], dtype=np.float32), # Large: 10000 + [10000.0], + {"small": False, "medium": False, "large": True}, + ), + ], + ) + def test_area_calculation_and_size_specific_map( + self, xyxy, expected_areas, expected_size_maps + ): + """Test area calculation and size-specific mAP functionality.""" + gt = Detections( + xyxy=xyxy, + class_id=np.arange(len(xyxy)), + ) + pred = Detections( + xyxy=gt.xyxy.copy(), + class_id=gt.class_id.copy(), + confidence=np.full(len(xyxy), 0.9), + ) + + map_metric = MeanAveragePrecision() + map_metric.update([pred], [gt]) + + # Test area calculation + prepared_targets = map_metric._prepare_targets(map_metric._targets_list) + areas = [ann["area"] for ann in prepared_targets["annotations"]] + assert np.allclose(areas, expected_areas), ( + f"Expected {expected_areas}, got {areas}" + ) + + # Test size-specific mAP + result = map_metric.compute() + + if expected_size_maps["small"]: + assert result.small_objects.map50 > 0.9, ( + "Small objects should have high mAP" + ) + else: + assert result.small_objects.map50 == -1.0, ( + "Small objects should have no data" + ) + + if expected_size_maps["medium"]: + assert result.medium_objects.map50 > 0.9, ( + "Medium objects should have high mAP" + ) + else: + assert result.medium_objects.map50 == -1.0, ( + "Medium objects should have no data" + ) + + if expected_size_maps["large"]: + assert result.large_objects.map50 > 0.9, ( + "Large objects should have high mAP" + ) + else: + assert result.large_objects.map50 == -1.0, ( + "Large objects should have no data" + ) + + def test_area_preserved_from_data(self): + """Test that area from data field is preserved (COCO case).""" + gt = Detections( + xyxy=np.array( + [[100, 100, 200, 150]], dtype=np.float32 + ), # Would calculate to 5000 + class_id=np.array([0]), + ) + # Override with custom area + gt.data = {"area": np.array([3000.0])} + + pred = Detections( + xyxy=gt.xyxy.copy(), + class_id=gt.class_id.copy(), + confidence=np.array([0.9]), + ) + pred.data = {"area": np.array([3000.0])} + + map_metric = MeanAveragePrecision() + map_metric.update([pred], [gt]) + + prepared_targets = map_metric._prepare_targets(map_metric._targets_list) + used_area = prepared_targets["annotations"][0]["area"] + + assert np.allclose(used_area, 3000.0), ( + f"Should use provided area 3000.0, got {used_area}" + ) + + # Verify it's different from what would be calculated + calculated_area = (200 - 100) * (150 - 100) # 100 * 50 = 5000 + assert not np.allclose(used_area, calculated_area), ( + "Should use provided area, not calculated" + )