Skip to content
26 changes: 13 additions & 13 deletions docs/source/python/models/detection_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@ A single input image of shape (H, W, 3) where H and W are the height and width o

### Outputs

Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes:
Detection model outputs a `DetectionResult` objects containing the following attributes:

- `score` (float) - Confidence score of the object.
- `id` (int) - Class label of the object.
- `str_label` (str) - String label of the object.
- `xmin` (int) - X-coordinate of the top-left corner of the bounding box.
- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box.
- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box.
- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box.
- `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2.
- `scores` (np.ndarray) - Confidence scores of the detected objects.
- `labels` (np.ndarray) - Class labels of the detected objects.
- `label_names` (list[str]) - List of class names of the detected objects.

## Example

Expand All @@ -34,11 +31,14 @@ model = SSD.create_model("model.xml")
# Forward pass
predictions = model(image)

# Iterate over the segmented objects
for pred_obj in predictions.objects:
pred_score = pred_obj.score
label_id = pred_obj.id
bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax]
# Iterate over detection result
for box, score, label, label_name in zip(
predictions.boxes,
predictions.scores,
predictions.labels,
predictions.label_names,
):
print(f"Box: {box}, Score: {score}, Label: {label}, Label Name: {label_name}")
```

```{eval-rst}
Expand Down
31 changes: 17 additions & 14 deletions docs/source/python/models/instance_segmentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@ A single input image of shape (H, W, 3) where H and W are the height and width o

### Outputs

Instance segmentation model outputs a list of segmented objects (i.e `list[SegmentedObject]`)wrapped in `InstanceSegmentationResult.segmentedObjects`, each containing the following attributes:
Instance segmentation model outputs a `InstanceSegmentationResult` object containing the following attributes:

- `mask` (numpy.ndarray) - A binary mask of the object.
- `score` (float) - Confidence score of the object.
- `id` (int) - Class label of the object.
- `str_label` (str) - String label of the object.
- `xmin` (int) - X-coordinate of the top-left corner of the bounding box.
- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box.
- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box.
- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box.
- `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2.
- `scores` (np.ndarray) - Confidence scores of the detected objects.
- `masks` (np.ndarray) - Segmentation masks of the detected objects.
- `labels` (np.ndarray) - Class labels of the detected objects.
- `label_names` (list[str]) - List of class names of the detected objects.

## Example

Expand All @@ -36,11 +33,17 @@ model = MaskRCNNModel.create_model("model.xml")
predictions = model(image)

# Iterate over the segmented objects
for pred_obj in predictions.segmentedObjects:
pred_mask = pred_obj.mask
pred_score = pred_obj.score
label_id = pred_obj.id
bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax]
for box, score, mask, label, label_name in zip(
predictions.boxes,
predictions.scores,
predictions.masks,
predictions.labels,
predictions.label_names,
):
print(f"Box: {box}, Score: {score}, Label: {label}, Label Name: {label_name}")
cv2.imshow("Mask", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
```

```{eval-rst}
Expand Down
8 changes: 2 additions & 6 deletions model_api/python/model_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
ClassificationResult,
Contour,
DetectedKeypoints,
Detection,
DetectionResult,
ImageResultWithSoftPrediction,
InstanceSegmentationResult,
PredictedMask,
SegmentedObject,
SegmentedObjectWithRects,
RotatedSegmentationResult,
VisualPromptingResult,
ZSLVisualPromptingResult,
)
Expand Down Expand Up @@ -90,14 +88,12 @@
"SAMImageEncoder",
"ClassificationResult",
"Prompt",
"Detection",
"DetectionResult",
"DetectedKeypoints",
"classification_models",
"detection_models",
"segmentation_models",
"SegmentedObject",
"SegmentedObjectWithRects",
"RotatedSegmentationResult",
"add_rotated_rects",
"get_contours",
]
77 changes: 24 additions & 53 deletions model_api/python/model_api/models/detection_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
# SPDX-License-Identifier: Apache-2.0
#

import numpy as np

from .image_model import ImageModel
from .result_types import Detection
from .result_types import DetectionResult
from .types import ListValue, NumericalValue, StringValue
from .utils import load_labels

Expand Down Expand Up @@ -65,18 +67,15 @@ def parameters(cls):

return parameters

def _resize_detections(self, detections: list[Detection], meta):
def _resize_detections(self, detection_result: DetectionResult, meta: dict):
"""Resizes detection bounding boxes according to initial image shape.

It implements image resizing depending on the set `resize_type`(see `ImageModel` for details).
Next, it applies bounding boxes clipping.

Args:
detections (List[Detection]): list of detections with coordinates in normalized form
detection_result (DetectionList): detection result with coordinates in normalized form
meta (dict): the input metadata obtained from `preprocess` method

Returns:
- list of detections with resized and clipped coordinates to fit the initial image
"""
input_img_height, input_img_widht = meta["original_shape"][:2]
inverted_scale_x = input_img_widht / self.w
Expand All @@ -92,63 +91,35 @@ def _resize_detections(self, detections: list[Detection], meta):
pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2

def _clamp_and_round(val, min_value, max_value):
return round(max(min_value, min(max_value, val)))
boxes = detection_result.bboxes
boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
np.round(boxes, out=boxes)
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
detection_result.bboxes = boxes.astype(np.int32)

for detection in detections:
detection.xmin = _clamp_and_round(
(detection.xmin * self.w - pad_left) * inverted_scale_x,
0,
input_img_widht,
)
detection.ymin = _clamp_and_round(
(detection.ymin * self.h - pad_top) * inverted_scale_y,
0,
input_img_height,
)
detection.xmax = _clamp_and_round(
(detection.xmax * self.w - pad_left) * inverted_scale_x,
0,
input_img_widht,
)
detection.ymax = _clamp_and_round(
(detection.ymax * self.h - pad_top) * inverted_scale_y,
0,
input_img_height,
)

return detections

def _filter_detections(self, detections: list[Detection], box_area_threshold=0.0):
def _filter_detections(self, detection_result: DetectionResult, box_area_threshold=0.0):
"""Filters detections by confidence threshold and box size threshold

Args:
detections (List[Detection]): list of detections with coordinates in normalized form
detection_result (DetectionResult): DetectionResult object with coordinates in normalized form
box_area_threshold (float): minimal area of the bounding to be considered

Returns:
- list of detections with confidence above the threshold
"""
filtered_detections = []
for detection in detections:
if (
detection.score < self.confidence_threshold
or (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin) < box_area_threshold
):
continue
filtered_detections.append(detection)

return filtered_detections

def _add_label_names(self, detections: list[Detection]):
keep = (detection_result.get_obj_sizes() > box_area_threshold) & (
detection_result.scores > self.confidence_threshold
)
detection_result.bboxes = detection_result.bboxes[keep]
detection_result.labels = detection_result.labels[keep]
detection_result.scores = detection_result.scores[keep]

def _add_label_names(self, detection_result: DetectionResult) -> None:
"""Adds labels names to detections if they are available

Args:
detections (List[Detection]): list of detections with coordinates in normalized form

Returns:
- list of detections with label strings
detection_result (List[Detection]): list of detections with coordinates in normalized form
"""
for detection in detections:
detection.str_label = self.get_label_name(detection.id)
return detections
detection_result.label_names = [self.get_label_name(label_idx) for label_idx in detection_result.labels]
62 changes: 30 additions & 32 deletions model_api/python/model_api/models/instance_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from model_api.adapters.inference_adapter import InferenceAdapter

from .image_model import ImageModel
from .result_types import InstanceSegmentationResult, SegmentedObject
from .result_types import InstanceSegmentationResult
from .types import BooleanValue, ListValue, NumericalValue, StringValue
from .utils import load_labels

Expand Down Expand Up @@ -176,27 +176,31 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
out=boxes,
)

objects = []
has_feature_vector_name = _feature_vector_name in self.outputs
if has_feature_vector_name:
if not self.labels:
self.raise_error("Can't get number of classes because labels are empty")
saliency_maps: list = [[] for _ in range(len(self.labels))]
else:
saliency_maps = []
for box, confidence, cls, raw_mask in zip(boxes, scores, labels, masks):
x1, y1, x2, y2 = box
if (x2 - x1) * (y2 - y1) < 1 or (confidence <= self.confidence_threshold and not has_feature_vector_name):
continue

# Skip if label index is out of bounds
if self.labels and cls >= len(self.labels):
continue
# Apply confidence threshold, bounding box area filter and label index filter.
keep = (scores > self.confidence_threshold) & ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) > 1)

if self.labels:
keep &= labels < len(self.labels)

boxes = boxes[keep].astype(np.int32)
scores = scores[keep]
labels = labels[keep]
masks = masks[keep]

# Get label string
str_label = self.labels[cls] if self.labels else f"#{cls}"
resized_masks, label_names = [], []
for box, label_idx, raw_mask in zip(boxes, labels, masks):
if self.labels:
label_names.append(self.labels[label_idx])

raw_cls_mask = raw_mask[cls, ...] if self.is_segmentoly else raw_mask
raw_cls_mask = raw_mask[label_idx, ...] if self.is_segmentoly else raw_mask
if self.postprocess_semantic_masks or has_feature_vector_name:
resized_mask = _segm_postprocess(
box,
Expand All @@ -205,27 +209,21 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
)
else:
resized_mask = raw_cls_mask
if confidence > self.confidence_threshold:
output_mask = resized_mask if self.postprocess_semantic_masks else raw_cls_mask
xmin, ymin, xmax, ymax = box.astype(int)
objects.append(
SegmentedObject(
xmin,
ymin,
xmax,
ymax,
score=confidence,
id=cls,
str_label=str_label,
mask=output_mask,
),
)
if has_feature_vector_name and confidence > self.confidence_threshold:
saliency_maps[cls - 1].append(resized_mask)

output_mask = resized_mask if self.postprocess_semantic_masks else raw_cls_mask
resized_masks.append(output_mask)
if has_feature_vector_name:
saliency_maps[label_idx - 1].append(resized_mask)

_masks = np.stack(resized_masks) if len(resized_masks) > 0 else np.empty((0, 16, 16), dtype=np.uint8)
return InstanceSegmentationResult(
objects,
_average_and_normalize(saliency_maps),
outputs.get(_feature_vector_name, np.ndarray(0)),
bboxes=boxes,
labels=labels,
scores=scores,
masks=_masks,
label_names=label_names if label_names else None,
saliency_map=_average_and_normalize(saliency_maps),
feature_vector=outputs.get(_feature_vector_name, np.ndarray(0)),
)


Expand Down
16 changes: 9 additions & 7 deletions model_api/python/model_api/models/keypoint_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np

from .image_model import ImageModel
from .result_types import DetectedKeypoints, Detection
from .result_types import DetectedKeypoints, DetectionResult
from .types import ListValue


Expand Down Expand Up @@ -77,25 +77,27 @@ def __init__(self, base_model: KeypointDetectionModel) -> None:
def predict(
self,
image: np.ndarray,
detections: list[Detection],
detection_result: DetectionResult,
) -> list[DetectedKeypoints]:
"""Predicts keypoints for the given image and detections.

Args:
image (np.ndarray): input full-size image
detections (list[Detection]): detections located within the given image
detection_result (detection_result): detections located within the given image

Returns:
list[DetectedKeypoints]: per detection keypoints in detection coordinates
"""
crops = []
for det in detections:
crops.append(image[det.ymin : det.ymax, det.xmin : det.xmax])
for box in detection_result.bboxes:
x1, y1, x2, y2 = box
crops.append(image[y1:y2, x1:x2])

crops_results = self.predict_crops(crops)
for i, det in enumerate(detections):
for i, box in enumerate(detection_result.bboxes):
x1, y1, x2, y2 = box
crops_results[i] = DetectedKeypoints(
crops_results[i].keypoints + np.array([det.xmin, det.ymin]),
crops_results[i].keypoints + np.array([x1, y1]),
crops_results[i].scores,
)

Expand Down
Loading
Loading