open-edge-platform · sovrasov · Oct 22, 2024 · Oct 19, 2024 · Oct 21, 2024 · Oct 21, 2024
@@ -34,6 +34,7 @@
 namespace {
 constexpr char saliency_map_name[]{"saliency_map"};
 constexpr char feature_vector_name[]{"feature_vector"};
+constexpr float box_area_threshold = 1.0f;
 
 struct NumAndStep {
     size_t detectionsNum, objectSize;
@@ -83,6 +84,11 @@ std::vector<std::string> filterOutXai(const std::vector<std::string>& names) {
     std::copy_if (names.begin(), names.end(), std::back_inserter(filtered), [](const std::string& name){return name != saliency_map_name && name != feature_vector_name;});
     return filtered;
 }
+
+
+float clamp_and_round(float val, float min, float max) {
+    return std::round(std::max(min, std::min(max, val)));
+}
 }
 
 std::string ModelSSD::ModelType = "ssd";
@@ -214,23 +220,14 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult
             desc.confidence = confidence;
             desc.labelID = labels[i];
             desc.label = getLabelName(desc.labelID);
-            desc.x = clamp(
-                round((boxes[i * numAndStep.objectSize] * widthScale - padLeft) * invertedScaleX),
-                0.f,
-                floatInputImgWidth);
-            desc.y = clamp(
-                round((boxes[i * numAndStep.objectSize + 1] * heightScale - padTop) * invertedScaleY),
-                0.f,
-                floatInputImgHeight);
-            desc.width = clamp(
-                round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX),
-                0.f,
-                floatInputImgWidth) - desc.x;
-            desc.height = clamp(
-                round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY),
-                0.f,
-                floatInputImgHeight) - desc.y;
-            result->objects.push_back(desc);
+            desc.x = clamp_and_round((boxes[i * numAndStep.objectSize] * widthScale - padLeft) * invertedScaleX, 0.f, floatInputImgWidth);
+            desc.y = clamp_and_round((boxes[i * numAndStep.objectSize + 1] * heightScale - padTop) * invertedScaleY, 0.f, floatInputImgHeight);
+            desc.width = clamp_and_round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX, 0.f, floatInputImgWidth) - desc.x;
+            desc.height = clamp_and_round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY, 0.f, floatInputImgHeight) - desc.y;
+
+            if (desc.width * desc.height >= box_area_threshold) {
+                result->objects.push_back(desc);
+            }
         }
     }
 

@@ -104,25 +104,55 @@ def _resize_detections(self, detections, meta):
                 pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
                 pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2
 
+        def _clamp_and_round(val, min_value, max_value):
+            return round(max(min_value, min(max_value, val)))
+
         for detection in detections:
-            detection.xmin = min(
-                max(round((detection.xmin * self.w - pad_left) * inverted_scale_x), 0),
+            detection.xmin = _clamp_and_round(
+                (detection.xmin * self.w - pad_left) * inverted_scale_x,
+                0,
                 input_img_widht,
             )
-            detection.ymin = min(
-                max(round((detection.ymin * self.h - pad_top) * inverted_scale_y), 0),
+            detection.ymin = _clamp_and_round(
+                (detection.ymin * self.h - pad_top) * inverted_scale_y,
+                0,
                 input_img_height,
             )
-            detection.xmax = min(
-                max(round((detection.xmax * self.w - pad_left) * inverted_scale_x), 0),
+            detection.xmax = _clamp_and_round(
+                (detection.xmax * self.w - pad_left) * inverted_scale_x,
+                0,
                 input_img_widht,
             )
-            detection.ymax = min(
-                max(round((detection.ymax * self.h - pad_top) * inverted_scale_y), 0),
+            detection.ymax = _clamp_and_round(
+                (detection.ymax * self.h - pad_top) * inverted_scale_y,
+                0,
                 input_img_height,
             )
+
         return detections
 
+    def _filter_detections(self, detections, box_area_threshold=0.0):
+        """Filters detections by confidence threshold and box size threshold
+
+        Args:
+            detections (List[Detection]): list of detections with coordinates in normalized form
+            box_area_threshold (float): minimal area of the bounding to be considered
+
+        Returns:
+            - list of detections with confidence above the threshold
+        """
+        filtered_detections = []
+        for detection in detections:
+            if (
+                detection.score < self.confidence_threshold
+                or (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin)
+                < box_area_threshold
+            ):
+                continue
+            filtered_detections.append(detection)
+
+        return filtered_detections
+
     def _add_label_names(self, detections):
         """Adds labels names to detections if they are available
 

@@ -41,6 +41,7 @@ def preprocess(self, inputs):
     def postprocess(self, outputs, meta):
         detections = self._parse_outputs(outputs)
         detections = self._resize_detections(detections, meta)
+        detections = self._filter_detections(detections, _bbox_area_threshold)
         detections = self._add_label_names(detections)
         return DetectionResult(
             detections,
@@ -74,8 +75,7 @@ def _get_output_parser(
         self.raise_error("Unsupported model outputs")
 
     def _parse_outputs(self, outputs):
-        detections = self.output_parser(outputs)
-        return [d for d in detections if d.score > self.confidence_threshold]
+        return self.output_parser(outputs)
 
 
 def find_layer_by_name(name, layers):
@@ -176,5 +176,6 @@ def __call__(self, outputs):
         return detections
 
 
+_bbox_area_threshold = 1.0
 _saliency_map_name = "saliency_map"
 _feature_vector_name = "feature_vector"