diff --git a/model_api/cpp/models/src/detection_model_ssd.cpp b/model_api/cpp/models/src/detection_model_ssd.cpp index 16cceadb..2afe9e26 100644 --- a/model_api/cpp/models/src/detection_model_ssd.cpp +++ b/model_api/cpp/models/src/detection_model_ssd.cpp @@ -34,6 +34,7 @@ namespace { constexpr char saliency_map_name[]{"saliency_map"}; constexpr char feature_vector_name[]{"feature_vector"}; +constexpr float box_area_threshold = 1.0f; struct NumAndStep { size_t detectionsNum, objectSize; @@ -83,6 +84,11 @@ std::vector filterOutXai(const std::vector& names) { std::copy_if (names.begin(), names.end(), std::back_inserter(filtered), [](const std::string& name){return name != saliency_map_name && name != feature_vector_name;}); return filtered; } + + +float clamp_and_round(float val, float min, float max) { + return std::round(std::max(min, std::min(max, val))); +} } std::string ModelSSD::ModelType = "ssd"; @@ -214,23 +220,14 @@ std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult desc.confidence = confidence; desc.labelID = labels[i]; desc.label = getLabelName(desc.labelID); - desc.x = clamp( - round((boxes[i * numAndStep.objectSize] * widthScale - padLeft) * invertedScaleX), - 0.f, - floatInputImgWidth); - desc.y = clamp( - round((boxes[i * numAndStep.objectSize + 1] * heightScale - padTop) * invertedScaleY), - 0.f, - floatInputImgHeight); - desc.width = clamp( - round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX), - 0.f, - floatInputImgWidth) - desc.x; - desc.height = clamp( - round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY), - 0.f, - floatInputImgHeight) - desc.y; - result->objects.push_back(desc); + desc.x = clamp_and_round((boxes[i * numAndStep.objectSize] * widthScale - padLeft) * invertedScaleX, 0.f, floatInputImgWidth); + desc.y = clamp_and_round((boxes[i * numAndStep.objectSize + 1] * heightScale - padTop) * invertedScaleY, 0.f, floatInputImgHeight); + desc.width = clamp_and_round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX, 0.f, floatInputImgWidth) - desc.x; + desc.height = clamp_and_round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY, 0.f, floatInputImgHeight) - desc.y; + + if (desc.width * desc.height >= box_area_threshold) { + result->objects.push_back(desc); + } } } diff --git a/model_api/python/model_api/models/detection_model.py b/model_api/python/model_api/models/detection_model.py index fd0a87b6..b8092133 100644 --- a/model_api/python/model_api/models/detection_model.py +++ b/model_api/python/model_api/models/detection_model.py @@ -104,25 +104,55 @@ def _resize_detections(self, detections, meta): pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2 pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2 + def _clamp_and_round(val, min_value, max_value): + return round(max(min_value, min(max_value, val))) + for detection in detections: - detection.xmin = min( - max(round((detection.xmin * self.w - pad_left) * inverted_scale_x), 0), + detection.xmin = _clamp_and_round( + (detection.xmin * self.w - pad_left) * inverted_scale_x, + 0, input_img_widht, ) - detection.ymin = min( - max(round((detection.ymin * self.h - pad_top) * inverted_scale_y), 0), + detection.ymin = _clamp_and_round( + (detection.ymin * self.h - pad_top) * inverted_scale_y, + 0, input_img_height, ) - detection.xmax = min( - max(round((detection.xmax * self.w - pad_left) * inverted_scale_x), 0), + detection.xmax = _clamp_and_round( + (detection.xmax * self.w - pad_left) * inverted_scale_x, + 0, input_img_widht, ) - detection.ymax = min( - max(round((detection.ymax * self.h - pad_top) * inverted_scale_y), 0), + detection.ymax = _clamp_and_round( + (detection.ymax * self.h - pad_top) * inverted_scale_y, + 0, input_img_height, ) + return detections + def _filter_detections(self, detections, box_area_threshold=0.0): + """Filters detections by confidence threshold and box size threshold + + Args: + detections (List[Detection]): list of detections with coordinates in normalized form + box_area_threshold (float): minimal area of the bounding to be considered + + Returns: + - list of detections with confidence above the threshold + """ + filtered_detections = [] + for detection in detections: + if ( + detection.score < self.confidence_threshold + or (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin) + < box_area_threshold + ): + continue + filtered_detections.append(detection) + + return filtered_detections + def _add_label_names(self, detections): """Adds labels names to detections if they are available diff --git a/model_api/python/model_api/models/ssd.py b/model_api/python/model_api/models/ssd.py index 4f29dad5..63e8c559 100644 --- a/model_api/python/model_api/models/ssd.py +++ b/model_api/python/model_api/models/ssd.py @@ -41,6 +41,7 @@ def preprocess(self, inputs): def postprocess(self, outputs, meta): detections = self._parse_outputs(outputs) detections = self._resize_detections(detections, meta) + detections = self._filter_detections(detections, _bbox_area_threshold) detections = self._add_label_names(detections) return DetectionResult( detections, @@ -74,8 +75,7 @@ def _get_output_parser( self.raise_error("Unsupported model outputs") def _parse_outputs(self, outputs): - detections = self.output_parser(outputs) - return [d for d in detections if d.score > self.confidence_threshold] + return self.output_parser(outputs) def find_layer_by_name(name, layers): @@ -176,5 +176,6 @@ def __call__(self, outputs): return detections +_bbox_area_threshold = 1.0 _saliency_map_name = "saliency_map" _feature_vector_name = "feature_vector"