Skip to content

Commit 4e5a9df

Browse files
committed
Implement semantic segmentation tiling
1 parent d5890e8 commit 4e5a9df

File tree

5 files changed

+149
-55
lines changed

5 files changed

+149
-55
lines changed

src/cpp/include/tasks/semantic_segmentation.h

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,37 +11,66 @@
1111
#include "tasks/results.h"
1212
#include "utils/config.h"
1313
#include "utils/preprocessing.h"
14+
#include "utils/tiling.h"
1415
#include "utils/vision_pipeline.h"
1516

1617
class SemanticSegmentation {
1718
public:
18-
VisionPipeline<SemanticSegmentationResult> pipeline;
19+
std::unique_ptr<Pipeline<SemanticSegmentationResult>> pipeline;
1920
std::shared_ptr<InferenceAdapter> adapter;
20-
SemanticSegmentation(std::shared_ptr<InferenceAdapter> adapter) : adapter(adapter) {
21-
pipeline = VisionPipeline<SemanticSegmentationResult>(
22-
adapter,
23-
[&](cv::Mat image) {
24-
return preprocess(image);
25-
},
26-
[&](InferenceResult result) {
27-
return postprocess(result);
28-
});
29-
21+
SemanticSegmentation(std::shared_ptr<InferenceAdapter> adapter, const ov::AnyMap& configuration)
22+
: adapter(adapter) {
3023
auto config = adapter->getModelConfig();
24+
tiling = utils::get_from_any_maps("tiling", configuration, config, tiling);
25+
if (tiling) {
26+
pipeline = std::make_unique<TilingPipeline<SemanticSegmentationResult>>(
27+
adapter,
28+
utils::get_tiling_info_from_config(config),
29+
[&](cv::Mat image) {
30+
return preprocess(image);
31+
},
32+
[&](InferenceResult result) {
33+
return postprocess(result);
34+
},
35+
[&](SemanticSegmentationResult& result, const cv::Rect& coord) {
36+
return postprocess_tile(result, coord);
37+
},
38+
[&](const std::vector<SemanticSegmentationResult>& tiles_results,
39+
const cv::Size& image_size,
40+
const std::vector<cv::Rect>& tile_coords,
41+
const utils::TilingInfo& tiling_info) {
42+
return merge_tiling_results(tiles_results, image_size, tile_coords, tiling_info);
43+
});
44+
} else {
45+
pipeline = std::make_unique<VisionPipeline<SemanticSegmentationResult>>(
46+
adapter,
47+
[&](cv::Mat image) {
48+
return preprocess(image);
49+
},
50+
[&](InferenceResult result) {
51+
return postprocess(result);
52+
});
53+
}
54+
3155
labels = utils::get_from_any_maps("labels", config, {}, labels);
3256
soft_threshold = utils::get_from_any_maps("soft_threshold", config, {}, soft_threshold);
3357
blur_strength = utils::get_from_any_maps("blur_strength", config, {}, blur_strength);
3458
}
3559

3660
static void serialize(std::shared_ptr<ov::Model>& ov_model);
37-
static SemanticSegmentation load(const std::string& model_path);
61+
static SemanticSegmentation load(const std::string& model_path, const ov::AnyMap& configuration = {});
3862

3963
std::map<std::string, ov::Tensor> preprocess(cv::Mat);
4064
SemanticSegmentationResult postprocess(InferenceResult& infResult);
4165
std::vector<Contour> getContours(const SemanticSegmentationResult& result);
4266

4367
SemanticSegmentationResult infer(cv::Mat image);
4468
std::vector<SemanticSegmentationResult> inferBatch(std::vector<cv::Mat> image);
69+
SemanticSegmentationResult postprocess_tile(SemanticSegmentationResult, const cv::Rect&);
70+
SemanticSegmentationResult merge_tiling_results(const std::vector<SemanticSegmentationResult>& tiles_results,
71+
const cv::Size& image_size,
72+
const std::vector<cv::Rect>& tile_coords,
73+
const utils::TilingInfo& tiling_info);
4574

4675
private:
4776
cv::Mat create_hard_prediction_from_soft_prediction(cv::Mat, float threshold, int blur_strength);
@@ -50,6 +79,7 @@ class SemanticSegmentation {
5079
int blur_strength = -1;
5180
float soft_threshold = -std::numeric_limits<float>::infinity();
5281
bool return_soft_prediction = true;
82+
bool tiling = false;
5383

5484
std::vector<std::string> labels;
5585

src/cpp/include/utils/vision_pipeline.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ class TilingPipeline : public Pipeline<ResultType> {
7979
std::function<InferenceInput(cv::Mat)> preprocess;
8080
std::function<ResultType(InferenceResult)> postprocess;
8181
std::function<ResultType(ResultType&, const cv::Rect&)> postprocess_tile;
82-
std::function<DetectionResult(const std::vector<DetectionResult>&,
83-
const cv::Size&,
84-
const std::vector<cv::Rect>&,
85-
const utils::TilingInfo&)>
82+
std::function<ResultType(const std::vector<ResultType>&,
83+
const cv::Size&,
84+
const std::vector<cv::Rect>&,
85+
const utils::TilingInfo&)>
8686
merge_tiling_results;
8787

8888
public:
@@ -92,10 +92,10 @@ class TilingPipeline : public Pipeline<ResultType> {
9292
std::function<InferenceInput(cv::Mat)> preprocess,
9393
std::function<ResultType(InferenceResult)> postprocess,
9494
std::function<ResultType(ResultType&, const cv::Rect&)> postprocess_tile,
95-
std::function<DetectionResult(const std::vector<DetectionResult>&,
96-
const cv::Size&,
97-
const std::vector<cv::Rect>&,
98-
const utils::TilingInfo&)> merge_tiling_results)
95+
std::function<ResultType(const std::vector<ResultType>&,
96+
const cv::Size&,
97+
const std::vector<cv::Rect>&,
98+
const utils::TilingInfo&)> merge_tiling_results)
9999
: adapter(adapter),
100100
tiling_info(tiling_info),
101101
preprocess(preprocess),
@@ -111,7 +111,7 @@ class TilingPipeline : public Pipeline<ResultType> {
111111
auto tile_img = cv::Mat(image, coord);
112112
auto input = preprocess(tile_img.clone());
113113
InferenceResult result;
114-
result.inputImageSize = image.size();
114+
result.inputImageSize = coord.size();
115115
result.data = adapter->infer(input);
116116
auto tile_result = postprocess(result);
117117
tile_results.push_back(postprocess_tile(tile_result, coord));
@@ -146,7 +146,7 @@ class TilingPipeline : public Pipeline<ResultType> {
146146
auto input = preprocess(tile_img.clone());
147147
auto additional_data = std::make_shared<ov::AnyMap>();
148148
additional_data->insert({"index", i});
149-
additional_data->insert({"inputImageSize", images[i].size()});
149+
additional_data->insert({"inputImageSize", coord.size()});
150150
additional_data->insert({"tileCoord", coord});
151151
adapter->inferAsync(input, additional_data);
152152
}

src/cpp/src/tasks/semantic_segmentation.cpp

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
* Copyright (C) 2020-2025 Intel Corporation
33
* SPDX-License-Identifier: Apache-2.0
44
*/
5-
65
#include "tasks/semantic_segmentation.h"
76

7+
#include <opencv2/core.hpp>
8+
89
#include "adapters/openvino_adapter.h"
910
#include "utils/config.h"
1011
#include "utils/tensor.h"
1112

13+
namespace {
1214
constexpr char feature_vector_name[]{"feature_vector"};
1315
cv::Mat get_activation_map(const cv::Mat& features) {
1416
double min_soft_score, max_soft_score;
@@ -20,7 +22,26 @@ cv::Mat get_activation_map(const cv::Mat& features) {
2022
return int_act_map;
2123
}
2224

23-
SemanticSegmentation SemanticSegmentation::load(const std::string& model_path) {
25+
void normalize_soft_prediction(cv::Mat& soft_prediction, const cv::Mat& normalize_factor) {
26+
float* data = soft_prediction.ptr<float>(0);
27+
const int num_classes = soft_prediction.channels();
28+
const size_t step_rows = soft_prediction.step[0] / sizeof(float);
29+
const size_t step_cols = soft_prediction.step[1] / sizeof(float);
30+
31+
for (int y = 0; y < soft_prediction.rows; ++y) {
32+
for (int x = 0; x < soft_prediction.cols; ++x) {
33+
int weight = normalize_factor.at<int>(y, x);
34+
if (weight > 0) {
35+
for (int c = 0; c < num_classes; ++c) {
36+
data[y * step_rows + x * step_cols + c] /= weight;
37+
}
38+
}
39+
}
40+
}
41+
}
42+
} // namespace
43+
44+
SemanticSegmentation SemanticSegmentation::load(const std::string& model_path, const ov::AnyMap& configuration) {
2445
auto core = ov::Core();
2546
std::shared_ptr<ov::Model> model = core.read_model(model_path);
2647

@@ -38,7 +59,7 @@ SemanticSegmentation SemanticSegmentation::load(const std::string& model_path) {
3859
}
3960
auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
4061
adapter->loadModel(model, core, "AUTO");
41-
return SemanticSegmentation(adapter);
62+
return SemanticSegmentation(adapter, configuration);
4263
}
4364

4465
void SemanticSegmentation::serialize(std::shared_ptr<ov::Model>& ov_model) {
@@ -207,11 +228,11 @@ std::vector<Contour> SemanticSegmentation::getContours(const SemanticSegmentatio
207228
}
208229

209230
SemanticSegmentationResult SemanticSegmentation::infer(cv::Mat image) {
210-
return pipeline.infer(image);
231+
return pipeline->infer(image);
211232
}
212233

213234
std::vector<SemanticSegmentationResult> SemanticSegmentation::inferBatch(std::vector<cv::Mat> images) {
214-
return pipeline.inferBatch(images);
235+
return pipeline->inferBatch(images);
215236
}
216237

217238
cv::Mat SemanticSegmentation::create_hard_prediction_from_soft_prediction(cv::Mat soft_prediction,
@@ -249,3 +270,35 @@ cv::Mat SemanticSegmentation::create_hard_prediction_from_soft_prediction(cv::Ma
249270
}
250271
return hard_prediction;
251272
}
273+
274+
SemanticSegmentationResult SemanticSegmentation::postprocess_tile(SemanticSegmentationResult tile, const cv::Rect&) {
275+
return tile;
276+
}
277+
278+
SemanticSegmentationResult SemanticSegmentation::merge_tiling_results(
279+
const std::vector<SemanticSegmentationResult>& tiles_results,
280+
const cv::Size& image_size,
281+
const std::vector<cv::Rect>& tile_coords,
282+
const utils::TilingInfo& tiling_info) {
283+
auto first = tiles_results.front();
284+
cv::Mat voting_mask(cv::Size(image_size.width, image_size.height), CV_32SC1, cv::Scalar(0));
285+
cv::Mat merged_soft_prediction(cv::Size(image_size.width, image_size.height),
286+
CV_32FC(first.soft_prediction.channels()),
287+
cv::Scalar(0));
288+
289+
for (size_t i = 0; i < tiles_results.size(); ++i) {
290+
voting_mask(tile_coords[i]) += 1;
291+
merged_soft_prediction(tile_coords[i]) += tiles_results[i].soft_prediction;
292+
}
293+
294+
normalize_soft_prediction(merged_soft_prediction, voting_mask);
295+
296+
SemanticSegmentationResult result;
297+
result.resultImage =
298+
create_hard_prediction_from_soft_prediction(merged_soft_prediction, soft_threshold, blur_strength);
299+
;
300+
if (return_soft_prediction) {
301+
result.soft_prediction = merged_soft_prediction;
302+
}
303+
return result;
304+
}

tests/cpp/test_accuracy.cpp

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,13 @@ struct TestData {
2222

2323
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(TestData, image, reference);
2424

25-
namespace cv {}
25+
cv::Mat load_image(const std::string& path, bool use_tiling, cv::Size size) {
26+
cv::Mat image = cv::imread(path);
27+
if (use_tiling) {
28+
cv::resize(image, image, size);
29+
}
30+
return image;
31+
}
2632

2733
struct ModelData {
2834
std::string name;
@@ -83,26 +89,23 @@ TEST_P(ModelParameterizedTest, AccuracyTest) {
8389
auto data = GetParam();
8490
auto model_path = DATA_DIR + '/' + data.name;
8591

92+
auto use_tiling = !data.input_res.empty();
8693
if (data.type == "DetectionModel") {
87-
auto use_tiling = !data.input_res.empty();
8894
auto model = DetectionModel::load(model_path, {{"tiling", use_tiling}});
8995

9096
for (auto& test_data : data.test_data) {
9197
std::string image_path = DATA_DIR + '/' + test_data.image;
92-
cv::Mat image = cv::imread(image_path);
93-
if (use_tiling) {
94-
cv::resize(image, image, data.input_res);
95-
}
98+
auto image = load_image(image_path, use_tiling, data.input_res);
9699
auto result = model.infer(image);
97100
EXPECT_EQ(std::string{result}, test_data.reference[0]);
98101
}
99102

100103
} else if (data.type == "SegmentationModel") {
101-
auto model = SemanticSegmentation::load(model_path);
104+
auto model = SemanticSegmentation::load(model_path, {{"tiling", use_tiling}});
102105

103106
for (auto& test_data : data.test_data) {
104107
std::string image_path = DATA_DIR + '/' + test_data.image;
105-
cv::Mat image = cv::imread(image_path);
108+
auto image = load_image(image_path, use_tiling, data.input_res);
106109
auto result = model.infer(image);
107110

108111
EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]);
@@ -112,7 +115,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) {
112115

113116
for (auto& test_data : data.test_data) {
114117
std::string image_path = DATA_DIR + '/' + test_data.image;
115-
cv::Mat image = cv::imread(image_path);
118+
auto image = load_image(image_path, use_tiling, data.input_res);
116119
auto result = model.infer(image);
117120

118121
EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]);
@@ -121,7 +124,7 @@ TEST_P(ModelParameterizedTest, AccuracyTest) {
121124
auto model = Classification::load(model_path);
122125
for (auto& test_data : data.test_data) {
123126
std::string image_path = DATA_DIR + '/' + test_data.image;
124-
cv::Mat image = cv::imread(image_path);
127+
auto image = load_image(image_path, use_tiling, data.input_res);
125128
auto result = model.infer(image);
126129
EXPECT_EQ(std::string{result}, test_data.reference[0]);
127130
}
@@ -135,24 +138,21 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) {
135138

136139
const std::string& basename = data.name.substr(data.name.find_last_of("/\\") + 1);
137140
auto model_path = DATA_DIR + "/serialized/" + basename;
141+
auto use_tiling = !data.input_res.empty();
138142
if (data.type == "DetectionModel") {
139-
auto use_tiling = !data.input_res.empty();
140143
auto model = DetectionModel::load(model_path, {{"tiling", use_tiling}});
141144
for (auto& test_data : data.test_data) {
142145
std::string image_path = DATA_DIR + '/' + test_data.image;
143-
cv::Mat image = cv::imread(image_path);
144-
if (use_tiling) {
145-
cv::resize(image, image, data.input_res);
146-
}
146+
auto image = load_image(image_path, use_tiling, data.input_res);
147147
auto result = model.infer(image);
148148
EXPECT_EQ(std::string{result}, test_data.reference[0]);
149149
}
150150
} else if (data.type == "SegmentationModel") {
151-
auto model = SemanticSegmentation::load(model_path);
151+
auto model = SemanticSegmentation::load(model_path, {{"tiling", use_tiling}});
152152

153153
for (auto& test_data : data.test_data) {
154154
std::string image_path = DATA_DIR + '/' + test_data.image;
155-
cv::Mat image = cv::imread(image_path);
155+
auto image = load_image(image_path, use_tiling, data.input_res);
156156
auto result = model.infer(image);
157157

158158
EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]);
@@ -162,7 +162,7 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) {
162162

163163
for (auto& test_data : data.test_data) {
164164
std::string image_path = DATA_DIR + '/' + test_data.image;
165-
cv::Mat image = cv::imread(image_path);
165+
auto image = load_image(image_path, use_tiling, data.input_res);
166166
auto result = model.infer(image);
167167

168168
EXPECT_EQ(format_test_output_to_string(model, result), test_data.reference[0]);
@@ -172,7 +172,7 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) {
172172

173173
for (auto& test_data : data.test_data) {
174174
std::string image_path = DATA_DIR + '/' + test_data.image;
175-
cv::Mat image = cv::imread(image_path);
175+
auto image = load_image(image_path, use_tiling, data.input_res);
176176
auto result = model.infer(image);
177177

178178
EXPECT_EQ(std::string{result}, test_data.reference[0]);
@@ -188,26 +188,23 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) {
188188
const std::string& basename = data.name.substr(data.name.find_last_of("/\\") + 1);
189189
auto model_path = DATA_DIR + "/serialized/" + basename;
190190

191+
auto use_tiling = !data.input_res.empty();
191192
if (data.type == "DetectionModel") {
192-
auto use_tiling = !data.input_res.empty();
193193
auto model = DetectionModel::load(model_path, {{"tiling", use_tiling}});
194194

195195
for (auto& test_data : data.test_data) {
196196
std::string image_path = DATA_DIR + '/' + test_data.image;
197-
cv::Mat image = cv::imread(image_path);
198-
if (use_tiling) {
199-
cv::resize(image, image, data.input_res);
200-
}
197+
auto image = load_image(image_path, use_tiling, data.input_res);
201198
auto result = model.inferBatch({image});
202199
ASSERT_EQ(result.size(), 1);
203200
EXPECT_EQ(std::string{result[0]}, test_data.reference[0]);
204201
}
205202
} else if (data.type == "SegmentationModel") {
206-
auto model = SemanticSegmentation::load(model_path);
203+
auto model = SemanticSegmentation::load(model_path, {{"tiling", use_tiling}});
207204

208205
for (auto& test_data : data.test_data) {
209206
std::string image_path = DATA_DIR + '/' + test_data.image;
210-
cv::Mat image = cv::imread(image_path);
207+
auto image = load_image(image_path, use_tiling, data.input_res);
211208
auto result = model.inferBatch({image});
212209

213210
EXPECT_EQ(format_test_output_to_string(model, result[0]), test_data.reference[0]);
@@ -217,7 +214,7 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) {
217214

218215
for (auto& test_data : data.test_data) {
219216
std::string image_path = DATA_DIR + '/' + test_data.image;
220-
cv::Mat image = cv::imread(image_path);
217+
auto image = load_image(image_path, use_tiling, data.input_res);
221218
auto result = model.inferBatch({image});
222219

223220
ASSERT_EQ(result.size(), 1);
@@ -228,7 +225,7 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) {
228225

229226
for (auto& test_data : data.test_data) {
230227
std::string image_path = DATA_DIR + '/' + test_data.image;
231-
cv::Mat image = cv::imread(image_path);
228+
auto image = load_image(image_path, use_tiling, data.input_res);
232229
auto result = model.inferBatch({image});
233230

234231
ASSERT_EQ(result.size(), 1);

0 commit comments

Comments
 (0)