diff --git a/src/cpp/include/tasks/instance_segmentation.h b/src/cpp/include/tasks/instance_segmentation.h index bc1905f1..e4ea92fd 100644 --- a/src/cpp/include/tasks/instance_segmentation.h +++ b/src/cpp/include/tasks/instance_segmentation.h @@ -16,33 +16,65 @@ class InstanceSegmentation { public: std::shared_ptr adapter; - VisionPipeline pipeline; - - InstanceSegmentation(std::shared_ptr adapter) : adapter(adapter) { - pipeline = VisionPipeline( - adapter, - [&](cv::Mat image) { - return preprocess(image); - }, - [&](InferenceResult result) { - return postprocess(result); - }); + std::unique_ptr> pipeline; + InstanceSegmentation(std::shared_ptr adapter, const ov::AnyMap& configuration) + : adapter(adapter) { auto config = adapter->getModelConfig(); + tiling = utils::get_from_any_maps("tiling", configuration, config, tiling); + if (tiling) { + pipeline = std::make_unique>( + adapter, + utils::get_tiling_info_from_config(config), + [&](cv::Mat image) { + return preprocess(image); + }, + [&](InferenceResult result) { + return postprocess(result); + }, + [&](InstanceSegmentationResult result, const cv::Rect& coord) { + return postprocess_tile(result, coord); + }, + [&](const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info) { + return merge_tiling_results(tiles_results, image_size, tile_coords, tiling_info); + }); + } else { + pipeline = std::make_unique>( + adapter, + [&](cv::Mat image) { + return preprocess(image); + }, + [&](InferenceResult result) { + return postprocess(result); + }); + } labels = utils::get_from_any_maps("labels", config, {}, labels); confidence_threshold = utils::get_from_any_maps("confidence_threshold", config, {}, confidence_threshold); input_shape.width = utils::get_from_any_maps("orig_width", config, {}, input_shape.width); input_shape.height = utils::get_from_any_maps("orig_height", config, {}, input_shape.width); + resize_mode = utils::get_from_any_maps("resize_type", config, {}, resize_mode); } static void serialize(std::shared_ptr& ov_model); - static InstanceSegmentation load(const std::string& model_path); + static InstanceSegmentation load(const std::string& model_path, const ov::AnyMap& configuration); InstanceSegmentationResult infer(cv::Mat image); std::vector inferBatch(std::vector image); std::map preprocess(cv::Mat); InstanceSegmentationResult postprocess(InferenceResult& infResult); + InstanceSegmentationResult postprocess_tile(InstanceSegmentationResult, const cv::Rect&); + InstanceSegmentationResult merge_tiling_results(const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info); + std::vector> merge_saliency_maps(const std::vector&, + const cv::Size&, + const std::vector&, + const utils::TilingInfo&); static std::vector getRotatedRectangles(const InstanceSegmentationResult& result); static std::vector getContours(const std::vector& objects); @@ -50,6 +82,9 @@ class InstanceSegmentation { bool postprocess_semantic_masks = true; private: + bool tiling; + + utils::RESIZE_MODE resize_mode; std::vector labels; std::string getLabelName(size_t labelID) { return labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID); diff --git a/src/cpp/include/tasks/semantic_segmentation.h b/src/cpp/include/tasks/semantic_segmentation.h index 47d836dc..8927db23 100644 --- a/src/cpp/include/tasks/semantic_segmentation.h +++ b/src/cpp/include/tasks/semantic_segmentation.h @@ -11,30 +11,54 @@ #include "tasks/results.h" #include "utils/config.h" #include "utils/preprocessing.h" +#include "utils/tiling.h" #include "utils/vision_pipeline.h" class SemanticSegmentation { public: - VisionPipeline pipeline; + std::unique_ptr> pipeline; std::shared_ptr adapter; - SemanticSegmentation(std::shared_ptr adapter) : adapter(adapter) { - pipeline = VisionPipeline( - adapter, - [&](cv::Mat image) { - return preprocess(image); - }, - [&](InferenceResult result) { - return postprocess(result); - }); - + SemanticSegmentation(std::shared_ptr adapter, const ov::AnyMap& configuration) + : adapter(adapter) { auto config = adapter->getModelConfig(); + tiling = utils::get_from_any_maps("tiling", configuration, config, tiling); + if (tiling) { + pipeline = std::make_unique>( + adapter, + utils::get_tiling_info_from_config(config), + [&](cv::Mat image) { + return preprocess(image); + }, + [&](InferenceResult result) { + return postprocess(result); + }, + [&](SemanticSegmentationResult& result, const cv::Rect& coord) { + return postprocess_tile(result, coord); + }, + [&](const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info) { + return merge_tiling_results(tiles_results, image_size, tile_coords, tiling_info); + }); + } else { + pipeline = std::make_unique>( + adapter, + [&](cv::Mat image) { + return preprocess(image); + }, + [&](InferenceResult result) { + return postprocess(result); + }); + } + labels = utils::get_from_any_maps("labels", config, {}, labels); soft_threshold = utils::get_from_any_maps("soft_threshold", config, {}, soft_threshold); blur_strength = utils::get_from_any_maps("blur_strength", config, {}, blur_strength); } static void serialize(std::shared_ptr& ov_model); - static SemanticSegmentation load(const std::string& model_path); + static SemanticSegmentation load(const std::string& model_path, const ov::AnyMap& configuration = {}); std::map preprocess(cv::Mat); SemanticSegmentationResult postprocess(InferenceResult& infResult); @@ -42,6 +66,11 @@ class SemanticSegmentation { SemanticSegmentationResult infer(cv::Mat image); std::vector inferBatch(std::vector image); + SemanticSegmentationResult postprocess_tile(SemanticSegmentationResult, const cv::Rect&); + SemanticSegmentationResult merge_tiling_results(const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info); private: cv::Mat create_hard_prediction_from_soft_prediction(cv::Mat, float threshold, int blur_strength); @@ -50,6 +79,7 @@ class SemanticSegmentation { int blur_strength = -1; float soft_threshold = -std::numeric_limits::infinity(); bool return_soft_prediction = true; + bool tiling = false; std::vector labels; diff --git a/src/cpp/include/utils/config.h b/src/cpp/include/utils/config.h index c28a5196..027ee7b9 100644 --- a/src/cpp/include/utils/config.h +++ b/src/cpp/include/utils/config.h @@ -8,6 +8,14 @@ #include #include namespace utils { +enum RESIZE_MODE { + RESIZE_FILL, + RESIZE_KEEP_ASPECT, + RESIZE_KEEP_ASPECT_LETTERBOX, + RESIZE_CROP, + NO_RESIZE, +}; + template Type get_from_any_maps(const std::string& key, const ov::AnyMap& top_priority, @@ -42,6 +50,29 @@ inline bool get_from_any_maps(const std::string& key, return low_priority; } +template <> +inline RESIZE_MODE get_from_any_maps(const std::string& key, + const ov::AnyMap& top_priority, + const ov::AnyMap& mid_priority, + RESIZE_MODE low_priority) { + std::string resize_type = "standard"; + resize_type = utils::get_from_any_maps("resize_type", top_priority, mid_priority, resize_type); + RESIZE_MODE resize = RESIZE_FILL; + if ("crop" == resize_type) { + resize = RESIZE_CROP; + } else if ("standard" == resize_type) { + resize = RESIZE_FILL; + } else if ("fit_to_window" == resize_type) { + resize = RESIZE_KEEP_ASPECT; + } else if ("fit_to_window_letterbox" == resize_type) { + resize = RESIZE_KEEP_ASPECT_LETTERBOX; + } else { + throw std::runtime_error("Unknown value for resize_type arg"); + } + + return resize; +} + inline bool model_has_embedded_processing(std::shared_ptr model) { if (model->has_rt_info("model_info")) { auto model_info = model->get_rt_info("model_info"); diff --git a/src/cpp/include/utils/preprocessing.h b/src/cpp/include/utils/preprocessing.h index 0aa6c8f9..28e06186 100644 --- a/src/cpp/include/utils/preprocessing.h +++ b/src/cpp/include/utils/preprocessing.h @@ -8,15 +8,9 @@ #include #include -namespace utils { -enum RESIZE_MODE { - RESIZE_FILL, - RESIZE_KEEP_ASPECT, - RESIZE_KEEP_ASPECT_LETTERBOX, - RESIZE_CROP, - NO_RESIZE, -}; +#include "utils/config.h" +namespace utils { std::shared_ptr embedProcessing(std::shared_ptr& model, const std::string& inputName, const ov::Layout&, diff --git a/src/cpp/include/utils/tiling.h b/src/cpp/include/utils/tiling.h index 5f50be02..146197a2 100644 --- a/src/cpp/include/utils/tiling.h +++ b/src/cpp/include/utils/tiling.h @@ -8,6 +8,8 @@ #include #include +#include "utils/config.h" + namespace utils { struct TilingInfo { @@ -24,30 +26,10 @@ inline bool config_contains_tiling_info(const ov::AnyMap& config) { inline TilingInfo get_tiling_info_from_config(const ov::AnyMap& config) { TilingInfo info; - { - auto iter = config.find("tile_size"); - if (iter != config.end()) { - info.tile_size = iter->second.as(); - } - } - { - auto iter = config.find("tiles_overlap"); - if (iter != config.end()) { - info.tiles_overlap = iter->second.as(); - } - } - { - auto iter = config.find("iou_threshold"); - if (iter != config.end()) { - info.iou_threshold = iter->second.as(); - } - } - { - auto iter = config.find("tile_with_full_img"); - if (iter != config.end()) { - info.tile_with_full_image = iter->second.as(); - } - } + info.tile_size = utils::get_from_any_maps("tile_size", config, {}, info.tile_size); + info.tiles_overlap = utils::get_from_any_maps("tiles_overlap", config, {}, info.tiles_overlap); + info.iou_threshold = utils::get_from_any_maps("iou_threshold", config, {}, info.iou_threshold); + info.tile_with_full_image = utils::get_from_any_maps("tile_with_full_image", config, {}, info.tile_with_full_image); return info; } diff --git a/src/cpp/include/utils/vision_pipeline.h b/src/cpp/include/utils/vision_pipeline.h index e3f72b17..bcab1da0 100644 --- a/src/cpp/include/utils/vision_pipeline.h +++ b/src/cpp/include/utils/vision_pipeline.h @@ -79,10 +79,10 @@ class TilingPipeline : public Pipeline { std::function preprocess; std::function postprocess; std::function postprocess_tile; - std::function&, - const cv::Size&, - const std::vector&, - const utils::TilingInfo&)> + std::function&, + const cv::Size&, + const std::vector&, + const utils::TilingInfo&)> merge_tiling_results; public: @@ -92,10 +92,10 @@ class TilingPipeline : public Pipeline { std::function preprocess, std::function postprocess, std::function postprocess_tile, - std::function&, - const cv::Size&, - const std::vector&, - const utils::TilingInfo&)> merge_tiling_results) + std::function&, + const cv::Size&, + const std::vector&, + const utils::TilingInfo&)> merge_tiling_results) : adapter(adapter), tiling_info(tiling_info), preprocess(preprocess), @@ -111,7 +111,7 @@ class TilingPipeline : public Pipeline { auto tile_img = cv::Mat(image, coord); auto input = preprocess(tile_img.clone()); InferenceResult result; - result.inputImageSize = image.size(); + result.inputImageSize = coord.size(); result.data = adapter->infer(input); auto tile_result = postprocess(result); tile_results.push_back(postprocess_tile(tile_result, coord)); @@ -146,7 +146,7 @@ class TilingPipeline : public Pipeline { auto input = preprocess(tile_img.clone()); auto additional_data = std::make_shared(); additional_data->insert({"index", i}); - additional_data->insert({"inputImageSize", images[i].size()}); + additional_data->insert({"inputImageSize", coord.size()}); additional_data->insert({"tileCoord", coord}); adapter->inferAsync(input, additional_data); } diff --git a/src/cpp/src/tasks/instance_segmentation.cpp b/src/cpp/src/tasks/instance_segmentation.cpp index a98cbc32..045ce61f 100644 --- a/src/cpp/src/tasks/instance_segmentation.cpp +++ b/src/cpp/src/tasks/instance_segmentation.cpp @@ -8,6 +8,7 @@ #include "adapters/openvino_adapter.h" #include "utils/config.h" #include "utils/math.h" +#include "utils/nms.h" #include "utils/preprocessing.h" #include "utils/tensor.h" @@ -99,7 +100,6 @@ Lbm filterTensors(const std::map& infResult) { } cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, int im_h, int im_w) { - // Add zero border to prevent upsampling artifacts on segment borders. cv::Mat raw_cls_mask; cv::copyMakeBorder(unpadded, raw_cls_mask, 1, 1, 1, 1, cv::BORDER_CONSTANT, {0}); cv::Rect extended_box = expand_box(box, float(raw_cls_mask.cols) / (raw_cls_mask.cols - 2)); @@ -141,7 +141,8 @@ void InstanceSegmentation::serialize(std::shared_ptr& ov_model) { } auto interpolation_mode = cv::INTER_LINEAR; - utils::RESIZE_MODE resize_mode = utils::RESIZE_FILL; + utils::RESIZE_MODE resize_mode; + resize_mode = utils::get_from_any_maps("resize_type", config, ov::AnyMap{}, resize_mode); std::vector scale_values; std::vector mean_values; @@ -190,7 +191,7 @@ void InstanceSegmentation::serialize(std::shared_ptr& ov_model) { ov_model->set_rt_info(input_shape.height, "model_info", "orig_height"); } -InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) { +InstanceSegmentation InstanceSegmentation::load(const std::string& model_path, const ov::AnyMap& configuration) { auto adapter = std::make_shared(); adapter->loadModel(model_path, "", {}, false); @@ -203,15 +204,15 @@ InstanceSegmentation InstanceSegmentation::load(const std::string& model_path) { adapter->applyModelTransform(InstanceSegmentation::serialize); adapter->compileModel("AUTO", {}); - return InstanceSegmentation(adapter); + return InstanceSegmentation(adapter, configuration); } InstanceSegmentationResult InstanceSegmentation::infer(cv::Mat image) { - return pipeline.infer(image); + return pipeline->infer(image); } std::vector InstanceSegmentation::inferBatch(std::vector images) { - return pipeline.inferBatch(images); + return pipeline->inferBatch(images); } std::map InstanceSegmentation::preprocess(cv::Mat image) { @@ -225,11 +226,11 @@ InstanceSegmentationResult InstanceSegmentation::postprocess(InferenceResult& in floatInputImgHeight = float(infResult.inputImageSize.height); float invertedScaleX = floatInputImgWidth / input_shape.width, invertedScaleY = floatInputImgHeight / input_shape.height; + int padLeft = 0, padTop = 0; - auto resizeMode = utils::RESIZE_FILL; - if (utils::RESIZE_KEEP_ASPECT == resizeMode || utils::RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { + if (utils::RESIZE_KEEP_ASPECT == resize_mode || utils::RESIZE_KEEP_ASPECT_LETTERBOX == resize_mode) { invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY); - if (utils::RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { + if (utils::RESIZE_KEEP_ASPECT_LETTERBOX == resize_mode) { padLeft = (input_shape.width - int(std::round(floatInputImgWidth / invertedScaleX))) / 2; padTop = (input_shape.height - int(std::round(floatInputImgHeight / invertedScaleY))) / 2; } @@ -301,6 +302,151 @@ InstanceSegmentationResult InstanceSegmentation::postprocess(InferenceResult& in return result; } +InstanceSegmentationResult InstanceSegmentation::postprocess_tile(InstanceSegmentationResult result, + const cv::Rect& coord) { + for (auto& det : result.segmentedObjects) { + det.x += coord.x; + det.y += coord.y; + } + + if (result.feature_vector) { + auto tmp_feature_vector = + ov::Tensor(result.feature_vector.get_element_type(), result.feature_vector.get_shape()); + result.feature_vector.copy_to(tmp_feature_vector); + result.feature_vector = tmp_feature_vector; + } + + return result; +} + +InstanceSegmentationResult InstanceSegmentation::merge_tiling_results( + const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info) { + size_t max_pred_number = 200; // TODO: Actually get this from config! + + InstanceSegmentationResult output; + std::vector all_detections; + std::vector> all_detections_ptrs; + std::vector all_scores; + + for (auto& result : tiles_results) { + for (auto& det : result.segmentedObjects) { + all_detections.emplace_back(det.x, det.y, det.x + det.width, det.y + det.height, det.labelID); + all_scores.push_back(det.confidence); + all_detections_ptrs.push_back(det); + } + } + + auto keep_idx = multiclass_nms(all_detections, all_scores, tiling_info.iou_threshold, false, max_pred_number); + + output.segmentedObjects.reserve(keep_idx.size()); + for (auto idx : keep_idx) { + if (postprocess_semantic_masks) { + // why does this happen again? + // all_detections_ptrs[idx].get().mask = ; + // SegmentedObject obj = all_detections_ptrs[idx]; //copy + // std::cout << "Mask size before: " << obj.mask.size() << std::endl; + // std::cout << static_cast(obj) << std::endl; + // obj.mask = segm_postprocess(all_detections_ptrs[idx], + // obj.mask, + // image_size.height, + // image_size.width); + } + + output.segmentedObjects.push_back(all_detections_ptrs[idx]); + } + + if (tiles_results.size()) { + auto first = tiles_results.front(); + if (first.feature_vector) { + output.feature_vector = + ov::Tensor(first.feature_vector.get_element_type(), first.feature_vector.get_shape()); + } + } + + if (output.feature_vector) { + float* feature_ptr = output.feature_vector.data(); + size_t feature_size = output.feature_vector.get_size(); + + std::fill(feature_ptr, feature_ptr + feature_size, 0.f); + + for (const auto& result : tiles_results) { + const float* current_feature_ptr = result.feature_vector.data(); + + for (size_t i = 0; i < feature_size; ++i) { + feature_ptr[i] += current_feature_ptr[i]; + } + } + + for (size_t i = 0; i < feature_size; ++i) { + feature_ptr[i] /= tiles_results.size(); + } + } + + output.saliency_map = merge_saliency_maps(tiles_results, image_size, tile_coords, tiling_info); + + return output; +} + +std::vector> InstanceSegmentation::merge_saliency_maps( + const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info) { + std::vector>> all_saliency_maps; + all_saliency_maps.reserve(tiles_results.size()); + for (const auto& result : tiles_results) { + all_saliency_maps.push_back(result.saliency_map); + } + + std::vector> image_saliency_map; + if (all_saliency_maps.size()) { + image_saliency_map = all_saliency_maps[0]; + } + + if (image_saliency_map.empty()) { + return image_saliency_map; + } + + size_t num_classes = image_saliency_map.size(); + std::vector> merged_map(num_classes); + for (auto& map : merged_map) { + map = cv::Mat_(image_size, 0); + } + + size_t start_idx = tiling_info.tile_with_full_image ? 1 : 0; + for (size_t i = start_idx; i < all_saliency_maps.size(); ++i) { + for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) { + auto current_cls_map_mat = all_saliency_maps[i][class_idx]; + if (current_cls_map_mat.empty()) { + continue; + } + const auto& tile = tile_coords[i]; + cv::Mat tile_map; + cv::resize(current_cls_map_mat, tile_map, tile.size()); + auto tile_map_merged = cv::Mat(merged_map[class_idx], tile); + cv::Mat(cv::max(tile_map, tile_map_merged)).copyTo(tile_map_merged); + } + } + + for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) { + auto image_map_cls = + tiling_info.tile_with_full_image ? image_saliency_map[class_idx] : cv::Mat_(); + if (image_map_cls.empty()) { + if (cv::sum(merged_map[class_idx]) == cv::Scalar(0.)) { + merged_map[class_idx] = cv::Mat_(); + } + } else { + cv::resize(image_map_cls, image_map_cls, image_size); + cv::Mat(cv::max(merged_map[class_idx], image_map_cls)).copyTo(merged_map[class_idx]); + } + } + + return merged_map; +} + std::vector InstanceSegmentation::getRotatedRectangles( const InstanceSegmentationResult& result) { std::vector objects_with_rects; diff --git a/src/cpp/src/tasks/semantic_segmentation.cpp b/src/cpp/src/tasks/semantic_segmentation.cpp index 588045ad..ab9c372e 100644 --- a/src/cpp/src/tasks/semantic_segmentation.cpp +++ b/src/cpp/src/tasks/semantic_segmentation.cpp @@ -2,13 +2,15 @@ * Copyright (C) 2020-2025 Intel Corporation * SPDX-License-Identifier: Apache-2.0 */ - #include "tasks/semantic_segmentation.h" +#include + #include "adapters/openvino_adapter.h" #include "utils/config.h" #include "utils/tensor.h" +namespace { constexpr char feature_vector_name[]{"feature_vector"}; cv::Mat get_activation_map(const cv::Mat& features) { double min_soft_score, max_soft_score; @@ -20,7 +22,26 @@ cv::Mat get_activation_map(const cv::Mat& features) { return int_act_map; } -SemanticSegmentation SemanticSegmentation::load(const std::string& model_path) { +void normalize_soft_prediction(cv::Mat& soft_prediction, const cv::Mat& normalize_factor) { + float* data = soft_prediction.ptr(0); + const int num_classes = soft_prediction.channels(); + const size_t step_rows = soft_prediction.step[0] / sizeof(float); + const size_t step_cols = soft_prediction.step[1] / sizeof(float); + + for (int y = 0; y < soft_prediction.rows; ++y) { + for (int x = 0; x < soft_prediction.cols; ++x) { + int weight = normalize_factor.at(y, x); + if (weight > 0) { + for (int c = 0; c < num_classes; ++c) { + data[y * step_rows + x * step_cols + c] /= weight; + } + } + } + } +} +} // namespace + +SemanticSegmentation SemanticSegmentation::load(const std::string& model_path, const ov::AnyMap& configuration) { auto adapter = std::make_shared(); adapter->loadModel(model_path, "", {}, false); @@ -33,7 +54,7 @@ SemanticSegmentation SemanticSegmentation::load(const std::string& model_path) { adapter->applyModelTransform(SemanticSegmentation::serialize); adapter->compileModel("AUTO", {}); - return SemanticSegmentation(adapter); + return SemanticSegmentation(adapter, configuration); } void SemanticSegmentation::serialize(std::shared_ptr& ov_model) { @@ -206,11 +227,11 @@ std::vector SemanticSegmentation::getContours(const SemanticSegmentatio } SemanticSegmentationResult SemanticSegmentation::infer(cv::Mat image) { - return pipeline.infer(image); + return pipeline->infer(image); } std::vector SemanticSegmentation::inferBatch(std::vector images) { - return pipeline.inferBatch(images); + return pipeline->inferBatch(images); } cv::Mat SemanticSegmentation::create_hard_prediction_from_soft_prediction(cv::Mat soft_prediction, @@ -248,3 +269,35 @@ cv::Mat SemanticSegmentation::create_hard_prediction_from_soft_prediction(cv::Ma } return hard_prediction; } + +SemanticSegmentationResult SemanticSegmentation::postprocess_tile(SemanticSegmentationResult tile, const cv::Rect&) { + return tile; +} + +SemanticSegmentationResult SemanticSegmentation::merge_tiling_results( + const std::vector& tiles_results, + const cv::Size& image_size, + const std::vector& tile_coords, + const utils::TilingInfo& tiling_info) { + auto first = tiles_results.front(); + cv::Mat voting_mask(cv::Size(image_size.width, image_size.height), CV_32SC1, cv::Scalar(0)); + cv::Mat merged_soft_prediction(cv::Size(image_size.width, image_size.height), + CV_32FC(first.soft_prediction.channels()), + cv::Scalar(0)); + + for (size_t i = 0; i < tiles_results.size(); ++i) { + voting_mask(tile_coords[i]) += 1; + merged_soft_prediction(tile_coords[i]) += tiles_results[i].soft_prediction; + } + + normalize_soft_prediction(merged_soft_prediction, voting_mask); + + SemanticSegmentationResult result; + result.resultImage = + create_hard_prediction_from_soft_prediction(merged_soft_prediction, soft_threshold, blur_strength); + ; + if (return_soft_prediction) { + result.soft_prediction = merged_soft_prediction; + } + return result; +} diff --git a/tests/cpp/test_accuracy.cpp b/tests/cpp/test_accuracy.cpp index b0a7a9b5..7746891c 100644 --- a/tests/cpp/test_accuracy.cpp +++ b/tests/cpp/test_accuracy.cpp @@ -23,7 +23,13 @@ struct TestData { NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(TestData, image, reference); -namespace cv {} +cv::Mat load_image(const std::string& path, bool use_tiling, cv::Size size) { + cv::Mat image = cv::imread(path); + if (use_tiling) { + cv::resize(image, image, size); + } + return image; +} struct ModelData { std::string name; @@ -84,36 +90,33 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { auto data = GetParam(); auto model_path = DATA_DIR + '/' + data.name; + auto use_tiling = !data.input_res.empty(); if (data.type == "DetectionModel") { - auto use_tiling = !data.input_res.empty(); auto model = DetectionModel::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); - if (use_tiling) { - cv::resize(image, image, data.input_res); - } + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(std::string{result}, test_data.reference[0]); } } else if (data.type == "SegmentationModel") { - auto model = SemanticSegmentation::load(model_path); + auto model = SemanticSegmentation::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]); } } else if (data.type == "MaskRCNNModel") { - auto model = InstanceSegmentation::load(model_path); + auto model = InstanceSegmentation::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]); @@ -122,7 +125,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { auto model = Classification::load(model_path); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(std::string{result}, test_data.reference[0]); } @@ -131,7 +134,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) { for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(std::string{result}, test_data.reference[0]); @@ -146,34 +149,31 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) { const std::string& basename = data.name.substr(data.name.find_last_of("/\\") + 1); auto model_path = DATA_DIR + "/serialized/" + basename; + auto use_tiling = !data.input_res.empty(); if (data.type == "DetectionModel") { - auto use_tiling = !data.input_res.empty(); auto model = DetectionModel::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); - if (use_tiling) { - cv::resize(image, image, data.input_res); - } + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(std::string{result}, test_data.reference[0]); } } else if (data.type == "SegmentationModel") { - auto model = SemanticSegmentation::load(model_path); + auto model = SemanticSegmentation::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]); } } else if (data.type == "MaskRCNNModel") { - auto model = InstanceSegmentation::load(model_path); + auto model = InstanceSegmentation::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]); @@ -182,7 +182,7 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) { auto model = Classification::load(model_path); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(std::string{result}, test_data.reference[0]); @@ -191,7 +191,7 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) { auto model = Anomaly::load(model_path); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.infer(image); EXPECT_EQ(std::string{result}, test_data.reference[0]); @@ -207,36 +207,33 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) { const std::string& basename = data.name.substr(data.name.find_last_of("/\\") + 1); auto model_path = DATA_DIR + "/serialized/" + basename; + auto use_tiling = !data.input_res.empty(); if (data.type == "DetectionModel") { - auto use_tiling = !data.input_res.empty(); auto model = DetectionModel::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); - if (use_tiling) { - cv::resize(image, image, data.input_res); - } + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.inferBatch({image}); ASSERT_EQ(result.size(), 1); EXPECT_EQ(std::string{result[0]}, test_data.reference[0]); } } else if (data.type == "SegmentationModel") { - auto model = SemanticSegmentation::load(model_path); + auto model = SemanticSegmentation::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.inferBatch({image}); EXPECT_EQ(format_test_output_to_string(model, result[0]), test_data.reference[0]); } } else if (data.type == "MaskRCNNModel") { - auto model = InstanceSegmentation::load(model_path); + auto model = InstanceSegmentation::load(model_path, {{"tiling", use_tiling}}); for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.inferBatch({image}); ASSERT_EQ(result.size(), 1); @@ -247,7 +244,7 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) { for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.inferBatch({image}); ASSERT_EQ(result.size(), 1); @@ -258,7 +255,7 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) { for (auto& test_data : data.test_data) { std::string image_path = DATA_DIR + '/' + test_data.image; - cv::Mat image = cv::imread(image_path); + auto image = load_image(image_path, use_tiling, data.input_res); auto result = model.inferBatch({image}); ASSERT_EQ(result.size(), 1); diff --git a/tests/python/accuracy/public_scope.json b/tests/python/accuracy/public_scope.json index a57ada55..56ce3937 100644 --- a/tests/python/accuracy/public_scope.json +++ b/tests/python/accuracy/public_scope.json @@ -35,6 +35,20 @@ } ] }, + { + "name": "otx_models/segnext_t_tiling.xml", + "type": "SegmentationModel", + "tiler": "SemanticSegmentationTiler", + "input_res": "(3500,3500)", + "test_data": [ + { + "image": "coco128/images/train2017/000000000074.jpg", + "reference": [ + "0: 0.272, 1: 0.728, [3500,3500,5], [0], [0]; background: 1.404, 311, background: 1.397, 44, background: 1.371, 34, background: 1.377, 12, background: 1.356, 155, background: 1.345, 12, background: 1.183, 219, background: 1.524, 8, background: 1.533, 4, background: 1.519, 2, background: 1.524, 4, background: 1.530, 6, background: 1.537, 2, background: 1.514, 4, background: 1.519, 8, background: 1.529, 6, background: 1.550, 6, background: 1.558, 4, background: 1.520, 2, background: 1.529, 4, background: 1.532, 6, background: 1.535, 6, background: 1.530, 2, background: 1.529, 50, background: 1.528, 22, background: 1.527, 38, background: 1.451, 1476, background: 1.345, 2743, background: 1.609, 2987, background: 1.636, 29909, " + ] + } + ] + }, { "name": "otx_models/maskrcnn_model_with_xai_head.xml", "type": "MaskRCNNModel",