Skip to content

Commit 87f9aa4

Browse files
committed
Adds instance segmentation tiling (NOT FINISHED)
Implementation is not outputting exactly the same output. The test with maskrcnn tiling is hard to verify, but the output is different. Tested with new model and the output seems okay so leaving it like this with future task to fix it.
1 parent 2af4d58 commit 87f9aa4

File tree

5 files changed

+243
-32
lines changed

5 files changed

+243
-32
lines changed

src/cpp/include/tasks/instance_segmentation.h

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,76 @@
1616
class InstanceSegmentation {
1717
public:
1818
std::shared_ptr<InferenceAdapter> adapter;
19-
VisionPipeline<InstanceSegmentationResult> pipeline;
20-
21-
InstanceSegmentation(std::shared_ptr<InferenceAdapter> adapter) : adapter(adapter) {
22-
pipeline = VisionPipeline<InstanceSegmentationResult>(
23-
adapter,
24-
[&](cv::Mat image) {
25-
return preprocess(image);
26-
},
27-
[&](InferenceResult result) {
28-
return postprocess(result);
29-
});
19+
std::unique_ptr<Pipeline<InstanceSegmentationResult>> pipeline;
3020

21+
InstanceSegmentation(std::shared_ptr<InferenceAdapter> adapter, const ov::AnyMap& configuration) : adapter(adapter) {
3122
auto config = adapter->getModelConfig();
23+
tiling = utils::get_from_any_maps("tiling", configuration, config, tiling);
24+
if (tiling) {
25+
pipeline = std::make_unique<TilingPipeline<InstanceSegmentationResult>>(
26+
adapter,
27+
utils::get_tiling_info_from_config(config),
28+
[&](cv::Mat image) {
29+
return preprocess(image);
30+
},
31+
[&](InferenceResult result) {
32+
return postprocess(result);
33+
},
34+
[&](InstanceSegmentationResult result, const cv::Rect& coord) {
35+
return postprocess_tile(result, coord);
36+
},
37+
[&](const std::vector<InstanceSegmentationResult>& tiles_results,
38+
const cv::Size& image_size,
39+
const std::vector<cv::Rect>& tile_coords,
40+
const utils::TilingInfo& tiling_info) {
41+
return merge_tiling_results(tiles_results, image_size, tile_coords, tiling_info);
42+
});
43+
} else {
44+
pipeline = std::make_unique<VisionPipeline<InstanceSegmentationResult>>(
45+
adapter,
46+
[&](cv::Mat image) {
47+
return preprocess(image);
48+
},
49+
[&](InferenceResult result) {
50+
return postprocess(result);
51+
});
52+
}
3253
labels = utils::get_from_any_maps("labels", config, {}, labels);
3354
confidence_threshold = utils::get_from_any_maps("confidence_threshold", config, {}, confidence_threshold);
3455
input_shape.width = utils::get_from_any_maps("orig_width", config, {}, input_shape.width);
3556
input_shape.height = utils::get_from_any_maps("orig_height", config, {}, input_shape.width);
57+
resize_mode = utils::get_from_any_maps("resize_type", config, {}, resize_mode);
3658
}
3759

3860
static void serialize(std::shared_ptr<ov::Model>& ov_model);
39-
static InstanceSegmentation load(const std::string& model_path);
61+
static InstanceSegmentation load(const std::string& model_path, const ov::AnyMap& configuration);
4062

4163
InstanceSegmentationResult infer(cv::Mat image);
4264
std::vector<InstanceSegmentationResult> inferBatch(std::vector<cv::Mat> image);
4365

4466
std::map<std::string, ov::Tensor> preprocess(cv::Mat);
4567
InstanceSegmentationResult postprocess(InferenceResult& infResult);
68+
InstanceSegmentationResult postprocess_tile(InstanceSegmentationResult, const cv::Rect&);
69+
InstanceSegmentationResult merge_tiling_results(const std::vector<InstanceSegmentationResult>& tiles_results,
70+
const cv::Size& image_size,
71+
const std::vector<cv::Rect>& tile_coords,
72+
const utils::TilingInfo& tiling_info);
73+
std::vector<cv::Mat_<std::uint8_t>> merge_saliency_maps(const std::vector<InstanceSegmentationResult>&,
74+
const cv::Size&,
75+
const std::vector<cv::Rect>&,
76+
const utils::TilingInfo&);
4677

78+
4779
static std::vector<SegmentedObjectWithRects> getRotatedRectangles(const InstanceSegmentationResult& result);
4880
static std::vector<Contour> getContours(const std::vector<SegmentedObject>& objects);
4981

5082
bool postprocess_semantic_masks = true;
5183

5284
private:
85+
86+
bool tiling;
87+
88+
utils::RESIZE_MODE resize_mode;
5389
std::vector<std::string> labels;
5490
std::string getLabelName(size_t labelID) {
5591
return labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID);

src/cpp/include/utils/config.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
#include <opencv2/opencv.hpp>
99
#include <openvino/openvino.hpp>
1010
namespace utils {
11+
enum RESIZE_MODE {
12+
RESIZE_FILL,
13+
RESIZE_KEEP_ASPECT,
14+
RESIZE_KEEP_ASPECT_LETTERBOX,
15+
RESIZE_CROP,
16+
NO_RESIZE,
17+
};
18+
19+
1120
template <typename Type>
1221
Type get_from_any_maps(const std::string& key,
1322
const ov::AnyMap& top_priority,
@@ -42,6 +51,32 @@ inline bool get_from_any_maps(const std::string& key,
4251
return low_priority;
4352
}
4453

54+
template<>
55+
inline RESIZE_MODE get_from_any_maps(const std::string& key,
56+
const ov::AnyMap& top_priority,
57+
const ov::AnyMap& mid_priority,
58+
RESIZE_MODE low_priority) {
59+
60+
std::string resize_type = "standard";
61+
resize_type = utils::get_from_any_maps("resize_type", top_priority, mid_priority, resize_type);
62+
RESIZE_MODE resize = RESIZE_FILL;
63+
if ("crop" == resize_type) {
64+
resize = RESIZE_CROP;
65+
} else if ("standard" == resize_type) {
66+
resize = RESIZE_FILL;
67+
} else if ("fit_to_window" == resize_type) {
68+
resize = RESIZE_KEEP_ASPECT;
69+
} else if ("fit_to_window_letterbox" == resize_type) {
70+
resize = RESIZE_KEEP_ASPECT_LETTERBOX;
71+
} else {
72+
throw std::runtime_error("Unknown value for resize_type arg");
73+
}
74+
75+
return resize;
76+
}
77+
78+
79+
4580
inline bool model_has_embedded_processing(std::shared_ptr<ov::Model> model) {
4681
if (model->has_rt_info("model_info")) {
4782
auto model_info = model->get_rt_info<ov::AnyMap>("model_info");

src/cpp/include/utils/preprocessing.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,9 @@
77

88
#include <opencv2/opencv.hpp>
99
#include <openvino/openvino.hpp>
10+
#include "utils/config.h"
1011

1112
namespace utils {
12-
enum RESIZE_MODE {
13-
RESIZE_FILL,
14-
RESIZE_KEEP_ASPECT,
15-
RESIZE_KEEP_ASPECT_LETTERBOX,
16-
RESIZE_CROP,
17-
NO_RESIZE,
18-
};
19-
2013
std::shared_ptr<ov::Model> embedProcessing(std::shared_ptr<ov::Model>& model,
2114
const std::string& inputName,
2215
const ov::Layout&,

src/cpp/src/tasks/instance_segmentation.cpp

Lines changed: 156 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "utils/math.h"
1111
#include "utils/preprocessing.h"
1212
#include "utils/tensor.h"
13+
#include "utils/nms.h"
1314

1415
constexpr char saliency_map_name[]{"saliency_map"};
1516
constexpr char feature_vector_name[]{"feature_vector"};
@@ -99,7 +100,6 @@ Lbm filterTensors(const std::map<std::string, ov::Tensor>& infResult) {
99100
}
100101

101102
cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, int im_h, int im_w) {
102-
// Add zero border to prevent upsampling artifacts on segment borders.
103103
cv::Mat raw_cls_mask;
104104
cv::copyMakeBorder(unpadded, raw_cls_mask, 1, 1, 1, 1, cv::BORDER_CONSTANT, {0});
105105
cv::Rect extended_box = expand_box(box, float(raw_cls_mask.cols) / (raw_cls_mask.cols - 2));
@@ -137,7 +137,8 @@ void InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
137137
}
138138

139139
auto interpolation_mode = cv::INTER_LINEAR;
140-
utils::RESIZE_MODE resize_mode = utils::RESIZE_FILL;
140+
utils::RESIZE_MODE resize_mode;
141+
resize_mode = utils::get_from_any_maps("resize_type", config, ov::AnyMap{}, resize_mode);
141142

142143
std::vector<float> scale_values;
143144
std::vector<float> mean_values;
@@ -186,7 +187,7 @@ void InstanceSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
186187
ov_model->set_rt_info(input_shape.height, "model_info", "orig_height");
187188
}
188189

189-
InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {
190+
InstanceSegmentation InstanceSegmentation::load(const std::string& model_path, const ov::AnyMap& configuration) {
190191
auto core = ov::Core();
191192
std::shared_ptr<ov::Model> model = core.read_model(model_path);
192193

@@ -204,15 +205,15 @@ InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) {
204205
}
205206
auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
206207
adapter->loadModel(model, core, "AUTO");
207-
return InstanceSegmentation(adapter);
208+
return InstanceSegmentation(adapter, configuration);
208209
}
209210

210211
InstanceSegmentationResult InstanceSegmentation::infer(cv::Mat image) {
211-
return pipeline.infer(image);
212+
return pipeline->infer(image);
212213
}
213214

214215
std::vector<InstanceSegmentationResult> InstanceSegmentation::inferBatch(std::vector<cv::Mat> images) {
215-
return pipeline.inferBatch(images);
216+
return pipeline->inferBatch(images);
216217
}
217218

218219
std::map<std::string, ov::Tensor> InstanceSegmentation::preprocess(cv::Mat image) {
@@ -226,11 +227,14 @@ InstanceSegmentationResult InstanceSegmentation::postprocess(InferenceResult& in
226227
floatInputImgHeight = float(infResult.inputImageSize.height);
227228
float invertedScaleX = floatInputImgWidth / input_shape.width,
228229
invertedScaleY = floatInputImgHeight / input_shape.height;
230+
231+
std::cout << "got an inf result with image: " << infResult.inputImageSize << std::endl;
232+
std::cout << "resize mode: " << resize_mode << std::endl;
229233
int padLeft = 0, padTop = 0;
230-
auto resizeMode = utils::RESIZE_FILL;
231-
if (utils::RESIZE_KEEP_ASPECT == resizeMode || utils::RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
234+
if (utils::RESIZE_KEEP_ASPECT == resize_mode || utils::RESIZE_KEEP_ASPECT_LETTERBOX == resize_mode) {
235+
std::cout << "using some other resize mode..." << std::endl;
232236
invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY);
233-
if (utils::RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
237+
if (utils::RESIZE_KEEP_ASPECT_LETTERBOX == resize_mode) {
234238
padLeft = (input_shape.width - int(std::round(floatInputImgWidth / invertedScaleX))) / 2;
235239
padTop = (input_shape.height - int(std::round(floatInputImgHeight / invertedScaleY))) / 2;
236240
}
@@ -302,6 +306,149 @@ InstanceSegmentationResult InstanceSegmentation::postprocess(InferenceResult& in
302306
return result;
303307
}
304308

309+
InstanceSegmentationResult InstanceSegmentation::postprocess_tile(InstanceSegmentationResult result, const cv::Rect& coord) {
310+
for (auto& det : result.segmentedObjects) {
311+
det.x += coord.x;
312+
det.y += coord.y;
313+
}
314+
315+
if (result.feature_vector) {
316+
auto tmp_feature_vector =
317+
ov::Tensor(result.feature_vector.get_element_type(), result.feature_vector.get_shape());
318+
result.feature_vector.copy_to(tmp_feature_vector);
319+
result.feature_vector = tmp_feature_vector;
320+
}
321+
322+
return result;
323+
}
324+
325+
InstanceSegmentationResult InstanceSegmentation::merge_tiling_results(const std::vector<InstanceSegmentationResult>& tiles_results,
326+
const cv::Size& image_size,
327+
const std::vector<cv::Rect>& tile_coords,
328+
const utils::TilingInfo& tiling_info) {
329+
size_t max_pred_number = 200; //TODO: Actually get this from config!
330+
331+
InstanceSegmentationResult output;
332+
std::vector<AnchorLabeled> all_detections;
333+
std::vector<std::reference_wrapper<const SegmentedObject>> all_detections_ptrs;
334+
std::vector<float> all_scores;
335+
336+
for (auto& result : tiles_results) {
337+
for (auto& det : result.segmentedObjects) {
338+
all_detections.emplace_back(det.x, det.y, det.x + det.width, det.y + det.height, det.labelID);
339+
all_scores.push_back(det.confidence);
340+
all_detections_ptrs.push_back(det);
341+
}
342+
}
343+
344+
auto keep_idx = multiclass_nms(all_detections, all_scores, tiling_info.iou_threshold, false, max_pred_number);
345+
346+
output.segmentedObjects.reserve(keep_idx.size());
347+
for (auto idx : keep_idx) {
348+
if (postprocess_semantic_masks) {
349+
//why does this happen again?
350+
//all_detections_ptrs[idx].get().mask = ;
351+
//SegmentedObject obj = all_detections_ptrs[idx]; //copy
352+
//std::cout << "Mask size before: " << obj.mask.size() << std::endl;
353+
//std::cout << static_cast<cv::Rect>(obj) << std::endl;
354+
//obj.mask = segm_postprocess(all_detections_ptrs[idx],
355+
// obj.mask,
356+
// image_size.height,
357+
// image_size.width);
358+
}
359+
360+
output.segmentedObjects.push_back(all_detections_ptrs[idx]);
361+
}
362+
363+
if (tiles_results.size()) {
364+
auto first = tiles_results.front();
365+
if (first.feature_vector) {
366+
output.feature_vector =
367+
ov::Tensor(first.feature_vector.get_element_type(), first.feature_vector.get_shape());
368+
}
369+
}
370+
371+
if (output.feature_vector) {
372+
float* feature_ptr = output.feature_vector.data<float>();
373+
size_t feature_size = output.feature_vector.get_size();
374+
375+
std::fill(feature_ptr, feature_ptr + feature_size, 0.f);
376+
377+
for (const auto& result : tiles_results) {
378+
const float* current_feature_ptr = result.feature_vector.data<float>();
379+
380+
for (size_t i = 0; i < feature_size; ++i) {
381+
feature_ptr[i] += current_feature_ptr[i];
382+
}
383+
}
384+
385+
for (size_t i = 0; i < feature_size; ++i) {
386+
feature_ptr[i] /= tiles_results.size();
387+
}
388+
}
389+
390+
output.saliency_map = merge_saliency_maps(tiles_results, image_size, tile_coords, tiling_info);
391+
392+
return output;
393+
394+
}
395+
396+
397+
std::vector<cv::Mat_<std::uint8_t>> InstanceSegmentation::merge_saliency_maps(const std::vector<InstanceSegmentationResult>& tiles_results,
398+
const cv::Size& image_size,
399+
const std::vector<cv::Rect>& tile_coords,
400+
const utils::TilingInfo& tiling_info ) {
401+
std::vector<std::vector<cv::Mat_<std::uint8_t>>> all_saliency_maps;
402+
all_saliency_maps.reserve(tiles_results.size());
403+
for (const auto& result : tiles_results) {
404+
all_saliency_maps.push_back(result.saliency_map);
405+
}
406+
407+
std::vector<cv::Mat_<std::uint8_t>> image_saliency_map;
408+
if (all_saliency_maps.size()) {
409+
image_saliency_map = all_saliency_maps[0];
410+
}
411+
412+
if (image_saliency_map.empty()) {
413+
return image_saliency_map;
414+
}
415+
416+
size_t num_classes = image_saliency_map.size();
417+
std::vector<cv::Mat_<std::uint8_t>> merged_map(num_classes);
418+
for (auto& map : merged_map) {
419+
map = cv::Mat_<std::uint8_t>(image_size, 0);
420+
}
421+
422+
size_t start_idx = tiling_info.tile_with_full_image ? 1 : 0;
423+
for (size_t i = start_idx; i < all_saliency_maps.size(); ++i) {
424+
for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
425+
auto current_cls_map_mat = all_saliency_maps[i][class_idx];
426+
if (current_cls_map_mat.empty()) {
427+
continue;
428+
}
429+
const auto& tile = tile_coords[i];
430+
cv::Mat tile_map;
431+
cv::resize(current_cls_map_mat, tile_map, tile.size());
432+
auto tile_map_merged = cv::Mat(merged_map[class_idx], tile);
433+
cv::Mat(cv::max(tile_map, tile_map_merged)).copyTo(tile_map_merged);
434+
}
435+
}
436+
437+
for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
438+
auto image_map_cls = tiling_info.tile_with_full_image ? image_saliency_map[class_idx] : cv::Mat_<std::uint8_t>();
439+
if (image_map_cls.empty()) {
440+
if (cv::sum(merged_map[class_idx]) == cv::Scalar(0.)) {
441+
merged_map[class_idx] = cv::Mat_<std::uint8_t>();
442+
}
443+
} else {
444+
cv::resize(image_map_cls, image_map_cls, image_size);
445+
cv::Mat(cv::max(merged_map[class_idx], image_map_cls)).copyTo(merged_map[class_idx]);
446+
}
447+
}
448+
449+
return merged_map;
450+
}
451+
305452
std::vector<SegmentedObjectWithRects> InstanceSegmentation::getRotatedRectangles(
306453
const InstanceSegmentationResult& result) {
307454
std::vector<SegmentedObjectWithRects> objects_with_rects;

0 commit comments

Comments
 (0)