Skip to content

Commit b8db281

Browse files
authored
Tiling for semantic segmentation (#201)
* Add python sseg tiler * Refactor get contours * Update python tests * Update cpp tests * Fix isort * Add cppp implementation * Turn soft prediction on for sseg tiling * Add checks of the input entities in cpp sseg tiler * Fix isort
1 parent 02cede4 commit b8db281

File tree

12 files changed

+302
-25
lines changed

12 files changed

+302
-25
lines changed

model_api/cpp/models/include/models/segmentation_model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,5 @@ class SegmentationModel : public ImageModel {
5757
float soft_threshold = -std::numeric_limits<float>::infinity();
5858
bool return_soft_prediction = true;
5959
};
60+
61+
cv::Mat create_hard_prediction_from_soft_prediction(const cv::Mat& soft_prediction, float soft_threshold, int blur_strength);

model_api/cpp/models/src/segmentation_model.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,17 @@
3636
namespace {
3737
constexpr char feature_vector_name[]{"feature_vector"};
3838

39+
cv::Mat get_activation_map(const cv::Mat& features) {
40+
double min_soft_score, max_soft_score;
41+
cv::minMaxLoc(features, &min_soft_score, &max_soft_score);
42+
double factor = 255.0 / (max_soft_score - min_soft_score + 1e-12);
43+
44+
cv::Mat int_act_map;
45+
features.convertTo(int_act_map, CV_8U, factor, -min_soft_score * factor);
46+
return int_act_map;
47+
}
48+
}
49+
3950
cv::Mat create_hard_prediction_from_soft_prediction(const cv::Mat& soft_prediction, float soft_threshold, int blur_strength) {
4051
if (soft_prediction.channels() == 1) {
4152
return soft_prediction;
@@ -70,17 +81,6 @@ cv::Mat create_hard_prediction_from_soft_prediction(const cv::Mat& soft_predicti
7081
return hard_prediction;
7182
}
7283

73-
cv::Mat get_activation_map(const cv::Mat& features) {
74-
double min_soft_score, max_soft_score;
75-
cv::minMaxLoc(features, &min_soft_score, &max_soft_score);
76-
double factor = 255.0 / (max_soft_score - min_soft_score + 1e-12);
77-
78-
cv::Mat int_act_map;
79-
features.convertTo(int_act_map, CV_8U, factor, -min_soft_score * factor);
80-
return int_act_map;
81-
}
82-
}
83-
8484
std::string SegmentationModel::ModelType = "Segmentation";
8585

8686
void SegmentationModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) {
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
// Copyright (C) 2024 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#pragma once
18+
#include <tilers/tiler_base.h>
19+
20+
struct ImageResult;
21+
struct ImageResultWithSoftPrediction;
22+
23+
class SemanticSegmentationTiler : public TilerBase {
24+
public:
25+
SemanticSegmentationTiler(std::shared_ptr<ImageModel> model, const ov::AnyMap& configuration);
26+
virtual std::unique_ptr<ImageResultWithSoftPrediction> run(const ImageInputData& inputData);
27+
virtual ~SemanticSegmentationTiler() = default;
28+
29+
protected:
30+
virtual std::unique_ptr<ResultBase> postprocess_tile(std::unique_ptr<ResultBase>, const cv::Rect&);
31+
virtual std::unique_ptr<ResultBase> merge_results(const std::vector<std::unique_ptr<ResultBase>>&, const cv::Size&, const std::vector<cv::Rect>&);
32+
33+
int blur_strength = -1;
34+
float soft_threshold = -std::numeric_limits<float>::infinity();
35+
bool return_soft_prediction = true;
36+
};
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
// Copyright (C) 2024 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
18+
#include <vector>
19+
#include <opencv2/core.hpp>
20+
21+
#include <tilers/semantic_segmentation.h>
22+
#include <models/segmentation_model.h>
23+
#include <models/results.h>
24+
#include "utils/common.hpp"
25+
26+
namespace {
27+
void normalize_soft_prediction(cv::Mat& soft_prediction, const cv::Mat& normalize_factor) {
28+
float* data = soft_prediction.ptr<float>(0);
29+
const int num_classes = soft_prediction.channels();
30+
const size_t step_rows = soft_prediction.step[0] / sizeof(float);
31+
const size_t step_cols = soft_prediction.step[1] / sizeof(float);
32+
33+
for (int y = 0; y < soft_prediction.rows; ++y) {
34+
for (int x = 0; x < soft_prediction.cols; ++x) {
35+
int weight = normalize_factor.at<int>(y, x);
36+
if (weight > 0) {
37+
for (int c = 0; c < num_classes; ++c) {
38+
data[y * step_rows + x * step_cols + c] /= weight;
39+
}
40+
}
41+
}
42+
}
43+
}
44+
}
45+
46+
SemanticSegmentationTiler::SemanticSegmentationTiler(std::shared_ptr<ImageModel> _model, const ov::AnyMap& configuration) :
47+
TilerBase(_model, configuration) {
48+
ov::AnyMap extra_config;
49+
try {
50+
auto ov_model = model->getModel();
51+
extra_config = ov_model->get_rt_info<ov::AnyMap>("model_info");
52+
}
53+
catch (const std::runtime_error&) {
54+
extra_config = model->getInferenceAdapter()->getModelConfig();
55+
}
56+
57+
blur_strength = get_from_any_maps("blur_strength", configuration, extra_config, blur_strength);
58+
soft_threshold = get_from_any_maps("soft_threshold", configuration, extra_config, soft_threshold);
59+
return_soft_prediction = get_from_any_maps("return_soft_prediction", configuration, extra_config, return_soft_prediction);
60+
}
61+
62+
std::unique_ptr<ImageResultWithSoftPrediction> SemanticSegmentationTiler::run(const ImageInputData& inputData) {
63+
auto result = this->run_impl(inputData);
64+
return std::unique_ptr<ImageResultWithSoftPrediction>(static_cast<ImageResultWithSoftPrediction*>(result.release()));
65+
}
66+
67+
std::unique_ptr<ResultBase> SemanticSegmentationTiler::postprocess_tile(std::unique_ptr<ResultBase> tile_result, const cv::Rect&) {
68+
ImageResultWithSoftPrediction* soft = dynamic_cast<ImageResultWithSoftPrediction*>(tile_result.get());
69+
if (!soft) {
70+
throw std::runtime_error("SemanticSegmentationTiler requires the underlying model to return ImageResultWithSoftPrediction");
71+
}
72+
return tile_result;
73+
}
74+
75+
std::unique_ptr<ResultBase> SemanticSegmentationTiler::merge_results(const std::vector<std::unique_ptr<ResultBase>>& tiles_results,
76+
const cv::Size& image_size, const std::vector<cv::Rect>& tile_coords) {
77+
if (tiles_results.empty()) {
78+
return std::unique_ptr<ResultBase>(new ImageResultWithSoftPrediction());
79+
}
80+
81+
cv::Mat voting_mask(cv::Size(image_size.width, image_size.height), CV_32SC1, cv::Scalar(0));
82+
auto* sseg_res = static_cast<ImageResultWithSoftPrediction*>(tiles_results[0].get());
83+
cv::Mat merged_soft_prediction(cv::Size(image_size.width, image_size.height), CV_32FC(sseg_res->soft_prediction.channels()), cv::Scalar(0));
84+
85+
for (size_t i = 0; i < tiles_results.size(); ++i) {
86+
auto* sseg_res = static_cast<ImageResultWithSoftPrediction*>(tiles_results[i].get());
87+
voting_mask(tile_coords[i]) += 1;
88+
merged_soft_prediction(tile_coords[i]) += sseg_res->soft_prediction;
89+
}
90+
91+
normalize_soft_prediction(merged_soft_prediction, voting_mask);
92+
93+
cv::Mat hard_prediction = create_hard_prediction_from_soft_prediction(merged_soft_prediction, soft_threshold, blur_strength);
94+
95+
std::unique_ptr<ResultBase> retVal;
96+
if (return_soft_prediction) {
97+
auto* result = new ImageResultWithSoftPrediction();
98+
retVal = std::unique_ptr<ResultBase>(result);
99+
result->soft_prediction = merged_soft_prediction;
100+
result->resultImage = hard_prediction;
101+
}
102+
else {
103+
auto* result = new ImageResult();
104+
retVal = std::unique_ptr<ResultBase>(result);
105+
result->resultImage = hard_prediction;
106+
}
107+
return retVal;
108+
}

model_api/python/model_api/models/segmentation.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,30 +165,32 @@ def postprocess(self, outputs, meta):
165165
return hard_prediction
166166

167167
def get_contours(
168-
self, hard_prediction: np.ndarray, soft_prediction: np.ndarray
168+
self,
169+
prediction: ImageResultWithSoftPrediction,
169170
) -> list:
170-
height, width = hard_prediction.shape[:2]
171-
n_layers = soft_prediction.shape[2]
171+
n_layers = prediction.soft_prediction.shape[2]
172172

173173
if n_layers == 1:
174174
raise RuntimeError("Cannot get contours from soft prediction with 1 layer")
175175
combined_contours = []
176176
for layer_index in range(1, n_layers): # ignoring background
177177
label = self.get_label_name(layer_index - 1)
178-
if len(soft_prediction.shape) == 3:
179-
current_label_soft_prediction = soft_prediction[:, :, layer_index]
178+
if len(prediction.soft_prediction.shape) == 3:
179+
current_label_soft_prediction = prediction.soft_prediction[
180+
:, :, layer_index
181+
]
180182
else:
181-
current_label_soft_prediction = soft_prediction
183+
current_label_soft_prediction = prediction.soft_prediction
182184

183-
obj_group = hard_prediction == layer_index
185+
obj_group = prediction.resultImage == layer_index
184186
label_index_map = obj_group.astype(np.uint8) * 255
185187

186188
contours, _hierarchy = cv2.findContours(
187189
label_index_map, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
188190
)
189191

190192
for contour in contours:
191-
mask = np.zeros(hard_prediction.shape, dtype=np.uint8)
193+
mask = np.zeros(prediction.resultImage.shape, dtype=np.uint8)
192194
cv2.drawContours(
193195
mask,
194196
np.asarray([contour]),

model_api/python/model_api/tilers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616

1717
from .detection import DetectionTiler
1818
from .instance_segmentation import InstanceSegmentationTiler
19+
from .semantic_segmentation import SemanticSegmentationTiler
1920
from .tiler import Tiler
2021

2122
__all__ = [
2223
"DetectionTiler",
2324
"InstanceSegmentationTiler",
2425
"Tiler",
26+
"SemanticSegmentationTiler",
2527
]

model_api/python/model_api/tilers/instance_segmentation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
class InstanceSegmentationTiler(DetectionTiler):
3232
"""
3333
Tiler for object instance segmentation models.
34-
This tiler expects model to output a lsit of `SegmentedObject` objects.
34+
This tiler expects model to output a list of `SegmentedObject` objects.
3535
3636
In addition, this tiler allows to use a tile classifier model,
3737
which predicts objectness score for each tile. Later, tiles can
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""
2+
Copyright (C) 2024 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
from __future__ import annotations
18+
19+
from contextlib import contextmanager
20+
21+
import numpy as np
22+
from model_api.models import SegmentationModel
23+
from model_api.models.utils import ImageResultWithSoftPrediction
24+
25+
from .tiler import Tiler
26+
27+
28+
class SemanticSegmentationTiler(Tiler):
29+
"""
30+
Tiler for segmentation models.
31+
"""
32+
33+
def _postprocess_tile(
34+
self,
35+
predictions: ImageResultWithSoftPrediction,
36+
coord: list[int],
37+
) -> dict:
38+
"""Converts predictions to a format convenient for further merging.
39+
40+
Args:
41+
predictions (ImageResultWithSoftPrediction): predictions from SegmentationModel
42+
coord (list[int]): coordinates of the tile
43+
44+
Returns:
45+
dict: postprocessed predictions
46+
"""
47+
output_dict = {}
48+
output_dict["coord"] = coord
49+
output_dict["masks"] = predictions.soft_prediction
50+
return output_dict
51+
52+
def _merge_results(
53+
self, results: list[dict], shape: tuple[int, int, int]
54+
) -> ImageResultWithSoftPrediction:
55+
"""Merge the results from all tiles.
56+
57+
Args:
58+
results (list[dict]): list of tile predictions
59+
shape (tuple[int, int, int]): shape of the original image
60+
61+
Returns:
62+
ImageResultWithSoftPrediction: merged predictions
63+
"""
64+
height, width = shape[:2]
65+
num_classes = len(self.model.labels)
66+
full_logits_mask = np.zeros((height, width, num_classes), dtype=np.float32)
67+
vote_mask = np.zeros((height, width), dtype=np.int32)
68+
for result in results:
69+
x1, y1, x2, y2 = result["coord"]
70+
mask = result["masks"]
71+
vote_mask[y1:y2, x1:x2] += 1
72+
full_logits_mask[y1:y2, x1:x2, :] += mask[: y2 - y1, : x2 - x1, :]
73+
74+
full_logits_mask = full_logits_mask / vote_mask[:, :, None]
75+
index_mask = full_logits_mask.argmax(2)
76+
return ImageResultWithSoftPrediction(
77+
resultImage=index_mask,
78+
soft_prediction=full_logits_mask,
79+
feature_vector=np.array([]),
80+
saliency_map=np.array([]),
81+
)
82+
83+
def __call__(self, inputs):
84+
@contextmanager
85+
def setup_segm_model():
86+
return_soft_prediction_state = None
87+
if isinstance(self.model, SegmentationModel):
88+
return_soft_prediction_state = self.model.return_soft_prediction
89+
self.model.return_soft_prediction = True
90+
try:
91+
yield
92+
finally:
93+
if isinstance(self.model, SegmentationModel):
94+
self.model.return_soft_prediction = return_soft_prediction_state
95+
96+
with setup_segm_model():
97+
return super().__call__(inputs)

tests/cpp/accuracy/test_accuracy.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <adapters/openvino_adapter.h>
2626
#include <tilers/detection.h>
2727
#include <tilers/instance_segmentation.h>
28+
#include <tilers/semantic_segmentation.h>
2829

2930
using json = nlohmann::json;
3031

@@ -197,7 +198,18 @@ TEST_P(ModelParameterizedTest, AccuracyTest)
197198
throw std::runtime_error{"Failed to read the image"};
198199
}
199200

200-
std::unique_ptr<ImageResult> pred = model->infer(image);
201+
std::unique_ptr<ImageResult> pred;
202+
if (modelData.tiler == "SemanticSegmentationTiler") {
203+
auto tiler = SemanticSegmentationTiler(std::move(model), {});
204+
if (modelData.input_res.height > 0 && modelData.input_res.width > 0) {
205+
cv::resize(image, image, modelData.input_res);
206+
}
207+
pred = tiler.run(image);
208+
}
209+
else {
210+
pred = model->infer(image);
211+
}
212+
201213
ImageResultWithSoftPrediction* soft = dynamic_cast<ImageResultWithSoftPrediction*>(pred.get());
202214
if (soft) {
203215
const std::vector<Contour>& contours = model->getContours(*soft);

tests/python/accuracy/prepare_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ async def main():
116116
download_otx_model(client, otx_models_dir, "sam_vit_b_zsl_encoder"),
117117
download_otx_model(client, otx_models_dir, "sam_vit_b_zsl_decoder"),
118118
download_otx_model(client, otx_models_dir, "rtmpose_tiny"),
119+
download_otx_model(client, otx_models_dir, "segnext_t_tiling"),
119120
)
120121

121122

0 commit comments

Comments
 (0)