open-edge-platform · sovrasov · Nov 26, 2024 · Nov 20, 2024 · Nov 22, 2024 · Nov 22, 2024
@@ -12,15 +12,12 @@ A single input image of shape (H, W, 3) where H and W are the height and width o
 
 ### Outputs
 
-Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes:
+Detection model outputs a `DetectionResult` objects containing the following attributes:
 
-- `score` (float) - Confidence score of the object.
-- `id` (int) - Class label of the object.
-- `str_label` (str) - String label of the object.
-- `xmin` (int) - X-coordinate of the top-left corner of the bounding box.
-- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box.
-- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box.
-- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box.
+- `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2.
+- `scores` (np.ndarray) - Confidence scores of the detected objects.
+- `labels` (np.ndarray) - Class labels of the detected objects.
+- `label_names` (list[str]) - List of class names of the detected objects.
 
 ## Example
 
@@ -34,11 +31,14 @@ model = SSD.create_model("model.xml")
 # Forward pass
 predictions = model(image)
 
-# Iterate over the segmented objects
-for pred_obj in predictions.objects:
-    pred_score = pred_obj.score
-    label_id = pred_obj.id
-    bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax]
+# Iterate over detection result
+for box, score, label, label_name in zip(
+    predictions.boxes,
+    predictions.scores,
+    predictions.labels,
+    predictions.label_names,
+):
+    print(f"Box: {box}, Score: {score}, Label: {label}, Label Name: {label_name}")
 ```
 
 ```{eval-rst}

@@ -12,16 +12,13 @@ A single input image of shape (H, W, 3) where H and W are the height and width o
 
 ### Outputs
 
-Instance segmentation model outputs a list of segmented objects (i.e `list[SegmentedObject]`)wrapped in `InstanceSegmentationResult.segmentedObjects`, each containing the following attributes:
+Instance segmentation model outputs a `InstanceSegmentationResult` object containing the following attributes:
 
-- `mask` (numpy.ndarray) - A binary mask of the object.
-- `score` (float) - Confidence score of the object.
-- `id` (int) - Class label of the object.
-- `str_label` (str) - String label of the object.
-- `xmin` (int) - X-coordinate of the top-left corner of the bounding box.
-- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box.
-- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box.
-- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box.
+- `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2.
+- `scores` (np.ndarray) - Confidence scores of the detected objects.
+- `masks` (np.ndarray) - Segmentation masks of the detected objects.
+- `labels` (np.ndarray) - Class labels of the detected objects.
+- `label_names` (list[str]) - List of class names of the detected objects.
 
 ## Example
 
@@ -36,11 +33,17 @@ model = MaskRCNNModel.create_model("model.xml")
 predictions = model(image)
 
 # Iterate over the segmented objects
-for pred_obj in predictions.segmentedObjects:
-    pred_mask = pred_obj.mask
-    pred_score = pred_obj.score
-    label_id = pred_obj.id
-    bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax]
+for box, score, mask, label, label_name in zip(
+    predictions.boxes,
+    predictions.scores,
+    predictions.masks,
+    predictions.labels,
+    predictions.label_names,
+):
+    print(f"Box: {box}, Score: {score}, Label: {label}, Label Name: {label_name}")
+    cv2.imshow("Mask", mask)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
 ```
 
 ```{eval-rst}

@@ -16,13 +16,11 @@
     ClassificationResult,
     Contour,
     DetectedKeypoints,
-    Detection,
     DetectionResult,
     ImageResultWithSoftPrediction,
     InstanceSegmentationResult,
     PredictedMask,
-    SegmentedObject,
-    SegmentedObjectWithRects,
+    RotatedSegmentationResult,
     VisualPromptingResult,
     ZSLVisualPromptingResult,
 )
@@ -90,14 +88,12 @@
     "SAMImageEncoder",
     "ClassificationResult",
     "Prompt",
-    "Detection",
     "DetectionResult",
     "DetectedKeypoints",
     "classification_models",
     "detection_models",
     "segmentation_models",
-    "SegmentedObject",
-    "SegmentedObjectWithRects",
+    "RotatedSegmentationResult",
     "add_rotated_rects",
     "get_contours",
 ]
@@ -3,8 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+import numpy as np
+
 from .image_model import ImageModel
-from .result_types import Detection
+from .result_types import DetectionResult
 from .types import ListValue, NumericalValue, StringValue
 from .utils import load_labels
 
@@ -65,18 +67,15 @@ def parameters(cls):
 
         return parameters
 
-    def _resize_detections(self, detections: list[Detection], meta):
+    def _resize_detections(self, detection_result: DetectionResult, meta: dict):
         """Resizes detection bounding boxes according to initial image shape.
 
         It implements image resizing depending on the set `resize_type`(see `ImageModel` for details).
         Next, it applies bounding boxes clipping.
 
         Args:
-            detections (List[Detection]): list of detections with coordinates in normalized form
+            detection_result (DetectionList): detection result with coordinates in normalized form
             meta (dict): the input metadata obtained from `preprocess` method
-
-        Returns:
-            - list of detections with resized and clipped coordinates to fit the initial image
         """
         input_img_height, input_img_widht = meta["original_shape"][:2]
         inverted_scale_x = input_img_widht / self.w
@@ -92,63 +91,35 @@ def _resize_detections(self, detections: list[Detection], meta):
                 pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
                 pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2
 
-        def _clamp_and_round(val, min_value, max_value):
-            return round(max(min_value, min(max_value, val)))
+        boxes = detection_result.bboxes
+        boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
+        boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
+        np.round(boxes, out=boxes)
+        boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
+        boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
+        detection_result.bboxes = boxes.astype(np.int32)
 
-        for detection in detections:
-            detection.xmin = _clamp_and_round(
-                (detection.xmin * self.w - pad_left) * inverted_scale_x,
-                0,
-                input_img_widht,
-            )
-            detection.ymin = _clamp_and_round(
-                (detection.ymin * self.h - pad_top) * inverted_scale_y,
-                0,
-                input_img_height,
-            )
-            detection.xmax = _clamp_and_round(
-                (detection.xmax * self.w - pad_left) * inverted_scale_x,
-                0,
-                input_img_widht,
-            )
-            detection.ymax = _clamp_and_round(
-                (detection.ymax * self.h - pad_top) * inverted_scale_y,
-                0,
-                input_img_height,
-            )
-
-        return detections
-
-    def _filter_detections(self, detections: list[Detection], box_area_threshold=0.0):
+    def _filter_detections(self, detection_result: DetectionResult, box_area_threshold=0.0):
         """Filters detections by confidence threshold and box size threshold
 
         Args:
-            detections (List[Detection]): list of detections with coordinates in normalized form
+            detection_result (DetectionResult): DetectionResult object with coordinates in normalized form
             box_area_threshold (float): minimal area of the bounding to be considered
 
         Returns:
             - list of detections with confidence above the threshold
         """
-        filtered_detections = []
-        for detection in detections:
-            if (
-                detection.score < self.confidence_threshold
-                or (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin) < box_area_threshold
-            ):
-                continue
-            filtered_detections.append(detection)
-
-        return filtered_detections
-
-    def _add_label_names(self, detections: list[Detection]):
+        keep = (detection_result.get_obj_sizes() > box_area_threshold) & (
+            detection_result.scores > self.confidence_threshold
+        )
+        detection_result.bboxes = detection_result.bboxes[keep]
+        detection_result.labels = detection_result.labels[keep]
+        detection_result.scores = detection_result.scores[keep]
+
+    def _add_label_names(self, detection_result: DetectionResult) -> None:
         """Adds labels names to detections if they are available
 
         Args:
-            detections (List[Detection]): list of detections with coordinates in normalized form
-
-        Returns:
-            - list of detections with label strings
+            detection_result (List[Detection]): list of detections with coordinates in normalized form
         """
-        for detection in detections:
-            detection.str_label = self.get_label_name(detection.id)
-        return detections
+        detection_result.label_names = [self.get_label_name(label_idx) for label_idx in detection_result.labels]
@@ -9,7 +9,7 @@
 from model_api.adapters.inference_adapter import InferenceAdapter
 
 from .image_model import ImageModel
-from .result_types import InstanceSegmentationResult, SegmentedObject
+from .result_types import InstanceSegmentationResult
 from .types import BooleanValue, ListValue, NumericalValue, StringValue
 from .utils import load_labels
 
@@ -176,27 +176,31 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
             out=boxes,
         )
 
-        objects = []
         has_feature_vector_name = _feature_vector_name in self.outputs
         if has_feature_vector_name:
             if not self.labels:
                 self.raise_error("Can't get number of classes because labels are empty")
             saliency_maps: list = [[] for _ in range(len(self.labels))]
         else:
             saliency_maps = []
-        for box, confidence, cls, raw_mask in zip(boxes, scores, labels, masks):
-            x1, y1, x2, y2 = box
-            if (x2 - x1) * (y2 - y1) < 1 or (confidence <= self.confidence_threshold and not has_feature_vector_name):
-                continue
 
-            # Skip if label index is out of bounds
-            if self.labels and cls >= len(self.labels):
-                continue
+        # Apply confidence threshold, bounding box area filter and label index filter.
+        keep = (scores > self.confidence_threshold) & ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) > 1)
+
+        if self.labels:
+            keep &= labels < len(self.labels)
+
+        boxes = boxes[keep].astype(np.int32)
+        scores = scores[keep]
+        labels = labels[keep]
+        masks = masks[keep]
 
-            # Get label string
-            str_label = self.labels[cls] if self.labels else f"#{cls}"
+        resized_masks, label_names = [], []
+        for box, label_idx, raw_mask in zip(boxes, labels, masks):
+            if self.labels:
+                label_names.append(self.labels[label_idx])
 
-            raw_cls_mask = raw_mask[cls, ...] if self.is_segmentoly else raw_mask
+            raw_cls_mask = raw_mask[label_idx, ...] if self.is_segmentoly else raw_mask
             if self.postprocess_semantic_masks or has_feature_vector_name:
                 resized_mask = _segm_postprocess(
                     box,
@@ -205,27 +209,21 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
                 )
             else:
                 resized_mask = raw_cls_mask
-            if confidence > self.confidence_threshold:
-                output_mask = resized_mask if self.postprocess_semantic_masks else raw_cls_mask
-                xmin, ymin, xmax, ymax = box.astype(int)
-                objects.append(
-                    SegmentedObject(
-                        xmin,
-                        ymin,
-                        xmax,
-                        ymax,
-                        score=confidence,
-                        id=cls,
-                        str_label=str_label,
-                        mask=output_mask,
-                    ),
-                )
-            if has_feature_vector_name and confidence > self.confidence_threshold:
-                saliency_maps[cls - 1].append(resized_mask)
+
+            output_mask = resized_mask if self.postprocess_semantic_masks else raw_cls_mask
+            resized_masks.append(output_mask)
+            if has_feature_vector_name:
+                saliency_maps[label_idx - 1].append(resized_mask)
+
+        _masks = np.stack(resized_masks) if len(resized_masks) > 0 else np.empty((0, 16, 16), dtype=np.uint8)
         return InstanceSegmentationResult(
-            objects,
-            _average_and_normalize(saliency_maps),
-            outputs.get(_feature_vector_name, np.ndarray(0)),
+            bboxes=boxes,
+            labels=labels,
+            scores=scores,
+            masks=_masks,
+            label_names=label_names if label_names else None,
+            saliency_map=_average_and_normalize(saliency_maps),
+            feature_vector=outputs.get(_feature_vector_name, np.ndarray(0)),
         )
 
 

@@ -10,7 +10,7 @@
 import numpy as np
 
 from .image_model import ImageModel
-from .result_types import DetectedKeypoints, Detection
+from .result_types import DetectedKeypoints, DetectionResult
 from .types import ListValue
 
 
@@ -77,25 +77,27 @@ def __init__(self, base_model: KeypointDetectionModel) -> None:
     def predict(
         self,
         image: np.ndarray,
-        detections: list[Detection],
+        detection_result: DetectionResult,
     ) -> list[DetectedKeypoints]:
         """Predicts keypoints for the given image and detections.
 
         Args:
             image (np.ndarray): input full-size image
-            detections (list[Detection]): detections located within the given image
+            detection_result (detection_result): detections located within the given image
 
         Returns:
             list[DetectedKeypoints]: per detection keypoints in detection coordinates
         """
         crops = []
-        for det in detections:
-            crops.append(image[det.ymin : det.ymax, det.xmin : det.xmax])
+        for box in detection_result.bboxes:
+            x1, y1, x2, y2 = box
+            crops.append(image[y1:y2, x1:x2])
 
         crops_results = self.predict_crops(crops)
-        for i, det in enumerate(detections):
+        for i, box in enumerate(detection_result.bboxes):
+            x1, y1, x2, y2 = box
             crops_results[i] = DetectedKeypoints(
-                crops_results[i].keypoints + np.array([det.xmin, det.ymin]),
+                crops_results[i].keypoints + np.array([x1, y1]),
                 crops_results[i].scores,
             )