Skip to content

Commit 713268e

Browse files
committed
save all changes
1 parent 52b15d9 commit 713268e

16 files changed

+324
-207
lines changed

src/model_api/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from .ssd import SSD
3030
from .utils import (
3131
OutputTransform,
32+
ResizeMetadata,
3233
add_rotated_rects,
3334
get_contours,
3435
)
@@ -78,6 +79,7 @@
7879
"OutputTransform",
7980
"PredictedMask",
8081
"Prompt",
82+
"ResizeMetadata",
8183
"RotatedSegmentationResult",
8284
"SAMDecoder",
8385
"SAMImageEncoder",

src/model_api/models/action_classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def _get_inputs(self) -> list[str]:
119119
)
120120
return image_blob_names
121121

122-
def preprocess(
122+
def base_preprocess(
123123
self,
124124
inputs: np.ndarray,
125125
) -> tuple[dict[str, np.ndarray], dict[str, tuple[int, ...]]]:

src/model_api/models/anomaly.py

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,7 @@ def __init__(
6868
super().__init__(inference_adapter, configuration, preload)
6969
self._check_io_number(1, (1, 4))
7070

71-
def preprocess(self, inputs: np.ndarray) -> list[dict]:
72-
"""Data preprocess method for Anomalib models.
73-
74-
Anomalib models typically expect inputs in [0,1] range as float32.
75-
"""
76-
original_shape = inputs.shape
77-
71+
def _resize_image(self, image: np.ndarray) -> tuple[np.ndarray, dict]:
7872
if (
7973
self._is_dynamic
8074
and getattr(self.inference_adapter, "device", "") == "NPU"
@@ -83,40 +77,12 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
8377
_, self.c, self.h, self.w = self.inference_adapter.compiled_model.inputs[0].get_shape()
8478
self._is_dynamic = False
8579

86-
if self._is_dynamic:
87-
h, w, c = inputs.shape
88-
resized_shape = (w, h, c)
80+
return super()._resize_image(image)
8981

90-
# For anomalib models, convert to float32 and normalize to [0,1] if needed
91-
if inputs.dtype == np.uint8:
92-
processed_image = inputs.astype(np.float32) / 255.0
93-
else:
94-
processed_image = inputs.astype(np.float32)
95-
96-
# Apply layout change but skip InputTransform (which might apply wrong normalization)
97-
processed_image = self._change_layout(processed_image)
98-
else:
99-
resized_shape = (self.w, self.h, self.c)
100-
# For fixed models, use standard preprocessing
101-
if self.params.embedded_processing:
102-
processed_image = inputs[None]
103-
else:
104-
# Resize image to expected model input dimensions
105-
resized_image = self.resize(inputs, (self.w, self.h))
106-
# Convert to float32 and normalize for anomalib
107-
if resized_image.dtype == np.uint8:
108-
processed_image = resized_image.astype(np.float32) / 255.0
109-
else:
110-
processed_image = resized_image.astype(np.float32)
111-
processed_image = self._change_layout(processed_image)
112-
113-
return [
114-
{self.image_blob_name: processed_image},
115-
{
116-
"original_shape": original_shape,
117-
"resized_shape": resized_shape,
118-
},
119-
]
82+
def _input_transform(self, image: np.ndarray) -> np.ndarray:
83+
if image.dtype == np.uint8:
84+
return image.astype(np.float32) / 255.0
85+
return image.astype(np.float32)
12086

12187
def postprocess(self, outputs: dict[str, np.ndarray], meta: dict[str, Any]) -> AnomalyResult:
12288
"""Post-processes the outputs and returns the results.

src/model_api/models/classification.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -60,36 +60,41 @@ def __init__(self, inference_adapter: InferenceAdapter, configuration: dict = {}
6060
self._verify_single_output()
6161

6262
self.raw_scores_name = _raw_scores_name
63+
6364
if self.params.hierarchical:
64-
self._embedded_processing = True
65-
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
66-
_append_xai_names(self.outputs.keys(), self.out_layer_names)
67-
hierarchical_config = self.params.hierarchical_config
68-
if not hierarchical_config:
69-
self.raise_error("Hierarchical classification config is empty.")
70-
self.raw_scores_name = self.out_layer_names[0]
71-
self.hierarchical_info = json.loads(hierarchical_config)
72-
73-
if self.params.hierarchical_postproc == "probabilistic":
74-
self.labels_resolver = ProbabilisticLabelsResolver(
75-
self.hierarchical_info,
76-
)
77-
else:
78-
self.labels_resolver = GreedyLabelsResolver(self.hierarchical_info)
65+
self._setup_hierarchical()
66+
elif self.params.multilabel:
67+
self._setup_multilabel()
68+
else:
69+
self._setup_single_label()
7970

80-
if preload:
81-
self.load()
82-
return
71+
_append_xai_names(self.outputs.keys(), self.out_layer_names)
72+
if preload:
73+
self.load()
8374

84-
if self.params.multilabel:
85-
self._embedded_processing = True
86-
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
87-
_append_xai_names(self.outputs.keys(), self.out_layer_names)
88-
self.raw_scores_name = self.out_layer_names[0]
89-
if preload:
90-
self.load()
91-
return
75+
def _setup_hierarchical(self) -> None:
76+
"""Configure model for hierarchical classification."""
77+
self._embedded_processing = True
78+
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
79+
hierarchical_config = self.params.hierarchical_config
80+
if not hierarchical_config:
81+
self.raise_error("Hierarchical classification config is empty.")
82+
self.raw_scores_name = self.out_layer_names[0]
83+
self.hierarchical_info = json.loads(hierarchical_config)
84+
85+
if self.params.hierarchical_postproc == "probabilistic":
86+
self.labels_resolver = ProbabilisticLabelsResolver(self.hierarchical_info)
87+
else:
88+
self.labels_resolver = GreedyLabelsResolver(self.hierarchical_info)
89+
90+
def _setup_multilabel(self) -> None:
91+
"""Configure model for multi-label classification."""
92+
self._embedded_processing = True
93+
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
94+
self.raw_scores_name = self.out_layer_names[0]
9295

96+
def _setup_single_label(self) -> None:
97+
"""Configure model for single-label classification with TopK."""
9398
try:
9499
addOrFindSoftmaxAndTopkOutputs(
95100
self.inference_adapter,
@@ -114,10 +119,6 @@ def __init__(self, inference_adapter: InferenceAdapter, configuration: dict = {}
114119

115120
self.embedded_processing = True
116121

117-
_append_xai_names(self.outputs.keys(), self.out_layer_names)
118-
if preload:
119-
self.load()
120-
121122
def _load_labels(self, labels_file: str) -> list:
122123
with Path(labels_file).open() as f:
123124
labels = []

src/model_api/models/detection_model.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .image_model import ImageModel
99
from .parameters import ParameterRegistry
1010
from .result import DetectionResult
11-
from .utils import load_labels
11+
from .utils import ResizeMetadata, load_labels
1212

1313

1414
class DetectionModel(ImageModel):
@@ -58,6 +58,18 @@ def parameters(cls):
5858
)
5959
return parameters
6060

61+
def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
62+
input_img_height, input_img_width = meta["original_shape"][:2]
63+
resize_meta = ResizeMetadata.compute(
64+
original_width=input_img_width,
65+
original_height=input_img_height,
66+
model_width=self.w,
67+
model_height=self.h,
68+
resize_type=self.params.resize_type,
69+
)
70+
meta["resize_info"] = resize_meta.to_dict()
71+
return dict_inputs, meta
72+
6173
def _resize_detections(self, detection_result: DetectionResult, meta: dict):
6274
"""Resizes detection bounding boxes according to initial image shape.
6375
@@ -68,26 +80,24 @@ def _resize_detections(self, detection_result: DetectionResult, meta: dict):
6880
detection_result (DetectionList): detection result with coordinates in normalized form
6981
meta (dict): the input metadata obtained from `preprocess` method
7082
"""
71-
input_img_height, input_img_widht = meta["original_shape"][:2]
72-
inverted_scale_x = input_img_widht / self.w
73-
inverted_scale_y = input_img_height / self.h
74-
pad_left = 0
75-
pad_top = 0
76-
resize_type = self.params.resize_type
77-
if resize_type == "fit_to_window" or resize_type == "fit_to_window_letterbox":
78-
inverted_scale_x = inverted_scale_y = max(
79-
inverted_scale_x,
80-
inverted_scale_y,
83+
input_img_height, input_img_width = meta["original_shape"][:2]
84+
85+
if "resize_info" in meta:
86+
resize_meta = ResizeMetadata.from_dict(meta["resize_info"])
87+
else:
88+
resize_meta = ResizeMetadata.compute(
89+
original_width=input_img_width,
90+
original_height=input_img_height,
91+
model_width=self.w,
92+
model_height=self.h,
93+
resize_type=self.params.resize_type,
8194
)
82-
if resize_type == "fit_to_window_letterbox":
83-
pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
84-
pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2
8595

8696
boxes = detection_result.bboxes
87-
boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
88-
boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
97+
boxes[:, 0::2] = (boxes[:, 0::2] * self.w - resize_meta.pad_left) * resize_meta.inverted_scale_x
98+
boxes[:, 1::2] = (boxes[:, 1::2] * self.h - resize_meta.pad_top) * resize_meta.inverted_scale_y
8999
np.round(boxes, out=boxes)
90-
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
100+
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_width)
91101
boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
92102
detection_result.bboxes = boxes.astype(np.int32)
93103

src/model_api/models/image_model.py

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def _get_inputs(self) -> tuple[list[str], ...]:
148148
)
149149
return image_blob_names, image_info_blob_names
150150

151-
def preprocess(self, inputs: np.ndarray) -> list[dict]:
151+
def base_preprocess(self, inputs: np.ndarray) -> list[dict]:
152152
"""Data preprocess method
153153
154154
It performs basic preprocessing of a single image:
@@ -173,35 +173,61 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
173173
}
174174
- the input metadata, which might be used in `postprocess` method
175175
"""
176-
original_shape = inputs.shape
177-
178176
if self.params.embedded_processing:
179-
processed_image = inputs[None]
180-
if self._is_dynamic:
181-
h, w, c = inputs.shape
182-
resized_shape = (w, h, c)
183-
else:
184-
resized_shape = (self.w, self.h, self.c)
185-
elif self._is_dynamic:
177+
dict_inputs, meta = self._preprocess_embedded(inputs)
178+
dict_inputs, meta = self.preprocess(dict_inputs, meta)
179+
return [dict_inputs, meta]
180+
181+
# 1. Resize
182+
resized_image, meta = self._resize_image(inputs)
183+
184+
# 2. Transform
185+
processed_image = self._input_transform(resized_image)
186+
187+
# 3. Layout
188+
processed_image = self._change_layout(processed_image)
189+
190+
# 4. Pack
191+
dict_inputs = {self.image_blob_name: processed_image}
192+
193+
# 5. Model-specific preprocess
194+
dict_inputs, meta = self.preprocess(dict_inputs, meta)
195+
196+
return [dict_inputs, meta]
197+
198+
def _preprocess_embedded(self, inputs: np.ndarray) -> tuple[dict, dict]:
199+
original_shape = inputs.shape
200+
processed_image = inputs[None]
201+
if self._is_dynamic:
186202
h, w, c = inputs.shape
187203
resized_shape = (w, h, c)
188-
processed_image = self.input_transform(inputs)
189-
processed_image = self._change_layout(processed_image)
190204
else:
191-
# Fixed model without embedded preprocessing
192205
resized_shape = (self.w, self.h, self.c)
193206

194-
resized_image = self.resize(inputs, (self.w, self.h), pad_value=self.params.pad_value)
195-
processed_image = self.input_transform(resized_image)
196-
processed_image = self._change_layout(processed_image)
197-
198-
return [
207+
return (
199208
{self.image_blob_name: processed_image},
200209
{
201210
"original_shape": original_shape,
202211
"resized_shape": resized_shape,
203212
},
204-
]
213+
)
214+
215+
def _resize_image(self, image: np.ndarray) -> tuple[np.ndarray, dict]:
216+
original_shape = image.shape
217+
if self._is_dynamic:
218+
h, w, c = image.shape
219+
resized_shape = (w, h, c)
220+
return image, {"original_shape": original_shape, "resized_shape": resized_shape}
221+
222+
resized_shape = (self.w, self.h, self.c)
223+
resized_image = self.resize(image, (self.w, self.h), pad_value=self.params.pad_value)
224+
return resized_image, {"original_shape": original_shape, "resized_shape": resized_shape}
225+
226+
def _input_transform(self, image: np.ndarray) -> np.ndarray:
227+
return self.input_transform(image)
228+
229+
def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
230+
return dict_inputs, meta
205231

206232
def _change_layout(self, image: np.ndarray) -> np.ndarray:
207233
"""Changes the input image layout to fit the layout of the model input layer.

src/model_api/models/instance_segmentation.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from .image_model import ImageModel
1212
from .parameters import ParameterRegistry
1313
from .result import InstanceSegmentationResult
14-
from .utils import load_labels
14+
from .utils import ResizeMetadata, load_labels
1515

1616

1717
class MaskRCNNModel(ImageModel):
@@ -95,8 +95,7 @@ def _get_segmentoly_outputs(self) -> dict:
9595
)
9696
return outputs
9797

98-
def preprocess(self, inputs: np.ndarray) -> list[dict]:
99-
dict_inputs, meta = super().preprocess(inputs)
98+
def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
10099
input_image_size = meta["resized_shape"][:2]
101100
if self.is_segmentoly:
102101
assert len(self.image_info_blob_names) == 1
@@ -105,7 +104,7 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
105104
dtype=np.float32,
106105
)
107106
dict_inputs[self.image_info_blob_names[0]] = input_image_info
108-
return [dict_inputs, meta]
107+
return dict_inputs, meta
109108

110109
def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
111110
if (
@@ -141,20 +140,21 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
141140
meta["original_shape"][1],
142141
meta["original_shape"][0],
143142
)
144-
invertedScaleX, invertedScaleY = (
145-
inputImgWidth / self.orig_width,
146-
inputImgHeight / self.orig_height,
143+
resize_meta = ResizeMetadata.compute(
144+
original_width=inputImgWidth,
145+
original_height=inputImgHeight,
146+
model_width=self.orig_width,
147+
model_height=self.orig_height,
148+
resize_type=self.params.resize_type,
149+
)
150+
151+
boxes -= (resize_meta.pad_left, resize_meta.pad_top, resize_meta.pad_left, resize_meta.pad_top)
152+
boxes *= (
153+
resize_meta.inverted_scale_x,
154+
resize_meta.inverted_scale_y,
155+
resize_meta.inverted_scale_x,
156+
resize_meta.inverted_scale_y,
147157
)
148-
padLeft, padTop = 0, 0
149-
resize_type = self.params.resize_type
150-
if resize_type == "fit_to_window" or resize_type == "fit_to_window_letterbox":
151-
invertedScaleX = invertedScaleY = max(invertedScaleX, invertedScaleY)
152-
if resize_type == "fit_to_window_letterbox":
153-
padLeft = (self.orig_width - round(inputImgWidth / invertedScaleX)) // 2
154-
padTop = (self.orig_height - round(inputImgHeight / invertedScaleY)) // 2
155-
156-
boxes -= (padLeft, padTop, padLeft, padTop)
157-
boxes *= (invertedScaleX, invertedScaleY, invertedScaleX, invertedScaleY)
158158
np.around(boxes, out=boxes)
159159
np.clip(
160160
boxes,

0 commit comments

Comments
 (0)