1010#include " utils/math.h"
1111#include " utils/preprocessing.h"
1212#include " utils/tensor.h"
13+ #include " utils/nms.h"
1314
1415constexpr char saliency_map_name[]{" saliency_map" };
1516constexpr char feature_vector_name[]{" feature_vector" };
@@ -99,7 +100,6 @@ Lbm filterTensors(const std::map<std::string, ov::Tensor>& infResult) {
99100}
100101
101102cv::Mat segm_postprocess (const SegmentedObject& box, const cv::Mat& unpadded, int im_h, int im_w) {
102- // Add zero border to prevent upsampling artifacts on segment borders.
103103 cv::Mat raw_cls_mask;
104104 cv::copyMakeBorder (unpadded, raw_cls_mask, 1 , 1 , 1 , 1 , cv::BORDER_CONSTANT, {0 });
105105 cv::Rect extended_box = expand_box (box, float (raw_cls_mask.cols ) / (raw_cls_mask.cols - 2 ));
@@ -137,7 +137,8 @@ void InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
137137 }
138138
139139 auto interpolation_mode = cv::INTER_LINEAR;
140- utils::RESIZE_MODE resize_mode = utils::RESIZE_FILL;
140+ utils::RESIZE_MODE resize_mode;
141+ resize_mode = utils::get_from_any_maps (" resize_type" , config, ov::AnyMap{}, resize_mode);
141142
142143 std::vector<float > scale_values;
143144 std::vector<float > mean_values;
@@ -186,7 +187,7 @@ void InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
186187 ov_model->set_rt_info (input_shape.height , " model_info" , " orig_height" );
187188}
188189
189- InstanceSegmentation InstanceSegmentation::load (const std::string& model_path) {
190+ InstanceSegmentation InstanceSegmentation::load (const std::string& model_path, const ov::AnyMap& configuration ) {
190191 auto core = ov::Core ();
191192 std::shared_ptr<ov::Model> model = core.read_model (model_path);
192193
@@ -204,15 +205,15 @@ InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {
204205 }
205206 auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
206207 adapter->loadModel (model, core, " AUTO" );
207- return InstanceSegmentation (adapter);
208+ return InstanceSegmentation (adapter, configuration );
208209}
209210
210211InstanceSegmentationResult InstanceSegmentation::infer (cv::Mat image) {
211- return pipeline. infer (image);
212+ return pipeline-> infer (image);
212213}
213214
214215std::vector<InstanceSegmentationResult> InstanceSegmentation::inferBatch (std::vector<cv::Mat> images) {
215- return pipeline. inferBatch (images);
216+ return pipeline-> inferBatch (images);
216217}
217218
218219std::map<std::string, ov::Tensor> InstanceSegmentation::preprocess (cv::Mat image) {
@@ -226,11 +227,14 @@ InstanceSegmentationResult InstanceSegmentation::postprocess(InferenceResult& in
226227 floatInputImgHeight = float (infResult.inputImageSize .height );
227228 float invertedScaleX = floatInputImgWidth / input_shape.width ,
228229 invertedScaleY = floatInputImgHeight / input_shape.height ;
230+
231+ std::cout << " got an inf result with image: " << infResult.inputImageSize << std::endl;
232+ std::cout << " resize mode: " << resize_mode << std::endl;
229233 int padLeft = 0 , padTop = 0 ;
230- auto resizeMode = utils::RESIZE_FILL;
231- if (utils::RESIZE_KEEP_ASPECT == resizeMode || utils::RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
234+ if (utils::RESIZE_KEEP_ASPECT == resize_mode || utils::RESIZE_KEEP_ASPECT_LETTERBOX == resize_mode) {
235+ std::cout << " using some other resize mode... " << std::endl;
232236 invertedScaleX = invertedScaleY = std::max (invertedScaleX, invertedScaleY);
233- if (utils::RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode ) {
237+ if (utils::RESIZE_KEEP_ASPECT_LETTERBOX == resize_mode ) {
234238 padLeft = (input_shape.width - int (std::round (floatInputImgWidth / invertedScaleX))) / 2 ;
235239 padTop = (input_shape.height - int (std::round (floatInputImgHeight / invertedScaleY))) / 2 ;
236240 }
@@ -302,6 +306,149 @@ InstanceSegmentationResult InstanceSegmentation::postprocess(InferenceResult& in
302306 return result;
303307}
304308
309+ InstanceSegmentationResult InstanceSegmentation::postprocess_tile (InstanceSegmentationResult result, const cv::Rect& coord) {
310+ for (auto & det : result.segmentedObjects ) {
311+ det.x += coord.x ;
312+ det.y += coord.y ;
313+ }
314+
315+ if (result.feature_vector ) {
316+ auto tmp_feature_vector =
317+ ov::Tensor (result.feature_vector .get_element_type (), result.feature_vector .get_shape ());
318+ result.feature_vector .copy_to (tmp_feature_vector);
319+ result.feature_vector = tmp_feature_vector;
320+ }
321+
322+ return result;
323+ }
324+
325+ InstanceSegmentationResult InstanceSegmentation::merge_tiling_results (const std::vector<InstanceSegmentationResult>& tiles_results,
326+ const cv::Size& image_size,
327+ const std::vector<cv::Rect>& tile_coords,
328+ const utils::TilingInfo& tiling_info) {
329+ size_t max_pred_number = 200 ; // TODO: Actually get this from config!
330+
331+ InstanceSegmentationResult output;
332+ std::vector<AnchorLabeled> all_detections;
333+ std::vector<std::reference_wrapper<const SegmentedObject>> all_detections_ptrs;
334+ std::vector<float > all_scores;
335+
336+ for (auto & result : tiles_results) {
337+ for (auto & det : result.segmentedObjects ) {
338+ all_detections.emplace_back (det.x , det.y , det.x + det.width , det.y + det.height , det.labelID );
339+ all_scores.push_back (det.confidence );
340+ all_detections_ptrs.push_back (det);
341+ }
342+ }
343+
344+ auto keep_idx = multiclass_nms (all_detections, all_scores, tiling_info.iou_threshold , false , max_pred_number);
345+
346+ output.segmentedObjects .reserve (keep_idx.size ());
347+ for (auto idx : keep_idx) {
348+ if (postprocess_semantic_masks) {
349+ // why does this happen again?
350+ // all_detections_ptrs[idx].get().mask = ;
351+ // SegmentedObject obj = all_detections_ptrs[idx]; //copy
352+ // std::cout << "Mask size before: " << obj.mask.size() << std::endl;
353+ // std::cout << static_cast<cv::Rect>(obj) << std::endl;
354+ // obj.mask = segm_postprocess(all_detections_ptrs[idx],
355+ // obj.mask,
356+ // image_size.height,
357+ // image_size.width);
358+ }
359+
360+ output.segmentedObjects .push_back (all_detections_ptrs[idx]);
361+ }
362+
363+ if (tiles_results.size ()) {
364+ auto first = tiles_results.front ();
365+ if (first.feature_vector ) {
366+ output.feature_vector =
367+ ov::Tensor (first.feature_vector .get_element_type (), first.feature_vector .get_shape ());
368+ }
369+ }
370+
371+ if (output.feature_vector ) {
372+ float * feature_ptr = output.feature_vector .data <float >();
373+ size_t feature_size = output.feature_vector .get_size ();
374+
375+ std::fill (feature_ptr, feature_ptr + feature_size, 0 .f );
376+
377+ for (const auto & result : tiles_results) {
378+ const float * current_feature_ptr = result.feature_vector .data <float >();
379+
380+ for (size_t i = 0 ; i < feature_size; ++i) {
381+ feature_ptr[i] += current_feature_ptr[i];
382+ }
383+ }
384+
385+ for (size_t i = 0 ; i < feature_size; ++i) {
386+ feature_ptr[i] /= tiles_results.size ();
387+ }
388+ }
389+
390+ output.saliency_map = merge_saliency_maps (tiles_results, image_size, tile_coords, tiling_info);
391+
392+ return output;
393+
394+ }
395+
396+
397+ std::vector<cv::Mat_<std::uint8_t >> InstanceSegmentation::merge_saliency_maps (const std::vector<InstanceSegmentationResult>& tiles_results,
398+ const cv::Size& image_size,
399+ const std::vector<cv::Rect>& tile_coords,
400+ const utils::TilingInfo& tiling_info ) {
401+ std::vector<std::vector<cv::Mat_<std::uint8_t >>> all_saliency_maps;
402+ all_saliency_maps.reserve (tiles_results.size ());
403+ for (const auto & result : tiles_results) {
404+ all_saliency_maps.push_back (result.saliency_map );
405+ }
406+
407+ std::vector<cv::Mat_<std::uint8_t >> image_saliency_map;
408+ if (all_saliency_maps.size ()) {
409+ image_saliency_map = all_saliency_maps[0 ];
410+ }
411+
412+ if (image_saliency_map.empty ()) {
413+ return image_saliency_map;
414+ }
415+
416+ size_t num_classes = image_saliency_map.size ();
417+ std::vector<cv::Mat_<std::uint8_t >> merged_map (num_classes);
418+ for (auto & map : merged_map) {
419+ map = cv::Mat_<std::uint8_t >(image_size, 0 );
420+ }
421+
422+ size_t start_idx = tiling_info.tile_with_full_image ? 1 : 0 ;
423+ for (size_t i = start_idx; i < all_saliency_maps.size (); ++i) {
424+ for (size_t class_idx = 0 ; class_idx < num_classes; ++class_idx) {
425+ auto current_cls_map_mat = all_saliency_maps[i][class_idx];
426+ if (current_cls_map_mat.empty ()) {
427+ continue ;
428+ }
429+ const auto & tile = tile_coords[i];
430+ cv::Mat tile_map;
431+ cv::resize (current_cls_map_mat, tile_map, tile.size ());
432+ auto tile_map_merged = cv::Mat (merged_map[class_idx], tile);
433+ cv::Mat (cv::max (tile_map, tile_map_merged)).copyTo (tile_map_merged);
434+ }
435+ }
436+
437+ for (size_t class_idx = 0 ; class_idx < num_classes; ++class_idx) {
438+ auto image_map_cls = tiling_info.tile_with_full_image ? image_saliency_map[class_idx] : cv::Mat_<std::uint8_t >();
439+ if (image_map_cls.empty ()) {
440+ if (cv::sum (merged_map[class_idx]) == cv::Scalar (0 .)) {
441+ merged_map[class_idx] = cv::Mat_<std::uint8_t >();
442+ }
443+ } else {
444+ cv::resize (image_map_cls, image_map_cls, image_size);
445+ cv::Mat (cv::max (merged_map[class_idx], image_map_cls)).copyTo (merged_map[class_idx]);
446+ }
447+ }
448+
449+ return merged_map;
450+ }
451+
305452std::vector<SegmentedObjectWithRects> InstanceSegmentation::getRotatedRectangles (
306453 const InstanceSegmentationResult& result) {
307454 std::vector<SegmentedObjectWithRects> objects_with_rects;
0 commit comments