Bump inference dependencies (#4243)

sovrasov · web-flow · commit f6c321ac4cb6 · 2025-03-10T09:57:44.000+01:00
* Bump deps

* Update classification OV model

* Update detection OV model

* Update iseg OV model

* Fix unit tests

* Don't use OMZ model in unit tests

* Fix corner cases of one-sized scores in det output

* Don't check if mlc model always produces top labels, as it can be flacky

* Bump MAPI

* Update readme

* Update codeowners

* Update changelog

* Fix formatting

* Bump MAPI
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1,4 +1,4 @@
 # See help here: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
 
 # These owners will be the default owners for everything in the repo.
-* @samet-akcay @eugene123tw @kprokofi @sovrasov @negvet @daankrol @djdameln @ashwinvaidya17 @rajeshgangireddy @atwinand
+* @samet-akcay @eugene123tw @kprokofi @sovrasov @daankrol @djdameln @ashwinvaidya17 @rajeshgangireddy @atwinand
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,12 +8,13 @@ All notable changes to this project will be documented in this file.
 
 ### Enhancements
 
+- Bump inference dependencies
+  (<https://github.com/openvinotoolkit/training_extensions/pull/4243>)
 - Bump ModelAPI to 0.2.5.2
   (<https://github.com/openvinotoolkit/training_extensions/pull/4275>)
 
 ### Bug fixes
 
-
 - Fix auto batch size with tiling
   (<https://github.com/openvinotoolkit/training_extensions/pull/4233>)
 - Fix exportable code for tiling
diff --git a/README.md b/README.md
@@ -14,8 +14,8 @@
 <!-- markdownlint-disable MD042 -->
 
 [![python](https://img.shields.io/badge/python-3.10%2B-green)]()
-[![pytorch](https://img.shields.io/badge/pytorch-2.1.1%2B-orange)]()
-[![openvino](https://img.shields.io/badge/openvino-2024.0-purple)]()
+[![pytorch](https://img.shields.io/badge/pytorch-2.5%2B-orange)]()
+[![openvino](https://img.shields.io/badge/openvino-2025.0-purple)]()
 
 <!-- markdownlint-enable  MD042 -->
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -79,12 +79,11 @@ base = [
     "lightning==2.4.0",
     "pytorchcv==0.0.67",
     "timm==1.0.3",
-    "openvino==2024.6",
-    "openvino-dev==2024.6",
-    "openvino-model-api==0.2.5.2",
+    "openvino==2025.0",
+    "openvino-model-api==0.3.0.2",
     "onnx==1.17.0",
     "onnxconverter-common==1.14.0",
-    "nncf==2.14.1",
+    "nncf==2.15.0",
     "anomalib[core]==1.1.3",
 ]
 
diff --git a/src/otx/core/exporter/exportable_code/demo/demo_package/visualizers/visualizer.py b/src/otx/core/exporter/exportable_code/demo/demo_package/visualizers/visualizer.py
@@ -273,21 +273,24 @@ def draw(
         Returns:
             Output image with annotations.
         """
-        for detection in predictions.objects:
-            class_id = int(detection.id)
-            color = self.color_palette[class_id]
-            det_label = self.color_palette[class_id] if self.labels and len(self.labels) >= class_id else f"#{class_id}"
-            xmin, ymin, xmax, ymax = detection.xmin, detection.ymin, detection.xmax, detection.ymax
-            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
-            cv2.putText(
-                frame,
-                f"{det_label} {detection.score:.1%}",
-                (xmin, ymin - 7),
-                cv2.FONT_HERSHEY_COMPLEX,
-                0.6,
-                color,
-                1,
-            )
+        if len(predictions.bboxes.shape):
+            for i, box in enumerate(predictions.bboxes):
+                class_id = int(predictions.labels[i])
+                color = self.color_palette[class_id]
+                det_label = (
+                    self.color_palette[class_id] if self.labels and len(self.labels) >= class_id else f"#{class_id}"
+                )
+                xmin, ymin, xmax, ymax = box
+                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
+                cv2.putText(
+                    frame,
+                    f"{det_label} {predictions.scores[i]:.1%}",
+                    (xmin, ymin - 7),
+                    cv2.FONT_HERSHEY_COMPLEX,
+                    0.6,
+                    color,
+                    1,
+                )
 
         return frame
 
@@ -339,16 +342,10 @@ def draw(
             np.ndarray - The input frame with the instance segmentation results drawn on it.
         """
         result = frame.copy()
-        output_objects = predictions.segmentedObjects
-        bboxes = [[output.xmin, output.ymin, output.xmax, output.ymax] for output in output_objects]
-        scores = [output.score for output in output_objects]
-        masks = [output.mask for output in output_objects]
-        label_names = [output.str_label for output in output_objects]
-
-        result = self._overlay_masks(result, masks)
-        return self._overlay_labels(result, bboxes, label_names, scores)
+        result = self._overlay_masks(result, predictions.masks)
+        return self._overlay_labels(result, predictions.bboxes, predictions.label_names, predictions.scores)
 
-    def _overlay_masks(self, image: np.ndarray, masks: list[np.ndarray]) -> np.ndarray:
+    def _overlay_masks(self, image: np.ndarray, masks: np.ndarray) -> np.ndarray:
         segments_image = image.copy()
         aggregated_mask = np.zeros(image.shape[:2], dtype=np.uint8)
         aggregated_colored_mask = np.zeros(image.shape, dtype=np.uint8)
@@ -381,9 +378,9 @@ def _overlay_boxes(self, image: np.ndarray, boxes: list[np.ndarray], classes: li
     def _overlay_labels(
         self,
         image: np.ndarray,
-        boxes: list[np.ndarray],
+        boxes: np.ndarray,
         classes: list[str],
-        scores: list[float],
+        scores: np.ndarray,
     ) -> np.ndarray:
         template = "{}: {:.2f}" if self.show_scores else "{}"
 
diff --git a/src/otx/core/model/classification.py b/src/otx/core/model/classification.py
@@ -529,8 +529,8 @@ def _customize_outputs(
         outputs: list[ClassificationResult],
         inputs: MulticlassClsBatchDataEntity,
     ) -> MulticlassClsBatchPredEntity:
-        pred_labels = [torch.tensor(out.top_labels[0][0], dtype=torch.long, device=self.device) for out in outputs]
-        pred_scores = [torch.tensor(out.top_labels[0][2], device=self.device) for out in outputs]
+        pred_labels = [torch.tensor(out.top_labels[0].id, dtype=torch.long, device=self.device) for out in outputs]
+        pred_scores = [torch.tensor(out.top_labels[0].confidence, device=self.device) for out in outputs]
 
         if outputs and outputs[0].saliency_map.size != 0:
             # Squeeze dim 4D => 3D, (1, num_classes, H, W) => (num_classes, H, W)
@@ -605,7 +605,7 @@ def _customize_outputs(
         inputs: MultilabelClsBatchDataEntity,
     ) -> MultilabelClsBatchPredEntity:
         pred_scores = [
-            torch.tensor([top_label[2] for top_label in out.top_labels], device=self.device) for out in outputs
+            torch.tensor([top_label.confidence for top_label in out.top_labels], device=self.device) for out in outputs
         ]
 
         if outputs and outputs[0].saliency_map.size != 0:
diff --git a/src/otx/core/model/detection.py b/src/otx/core/model/detection.py
@@ -622,21 +622,16 @@ def _customize_outputs(
             log.warning(f"label_shift: {label_shift}")
 
         for i, output in enumerate(outputs):
-            output_objects = output.objects
-            if len(output_objects):
-                bbox = [[output.xmin, output.ymin, output.xmax, output.ymax] for output in output_objects]
-            else:
-                bbox = torch.empty(size=(0, 0))
             bboxes.append(
                 tv_tensors.BoundingBoxes(
-                    bbox,
+                    data=output.bboxes,
                     format="XYXY",
                     canvas_size=inputs.imgs_info[i].img_shape,
                     device=self.device,
                 ),
             )
-            scores.append(torch.tensor([output.score for output in output_objects], device=self.device))
-            labels.append(torch.tensor([output.id - label_shift for output in output_objects], device=self.device))
+            scores.append(torch.tensor(output.scores.reshape(-1), device=self.device))
+            labels.append(torch.tensor(output.labels.reshape(-1) - label_shift, device=self.device))
 
         if outputs and outputs[0].saliency_map.size > 1:
             # Squeeze dim 4D => 3D, (1, num_classes, H, W) => (num_classes, H, W)
diff --git a/src/otx/core/model/instance_segmentation.py b/src/otx/core/model/instance_segmentation.py
@@ -654,26 +654,19 @@ def _customize_outputs(
         labels = []
         masks = []
         for output in outputs:
-            output_objects = output.segmentedObjects
-            if len(output_objects):
-                bbox = [[output.xmin, output.ymin, output.xmax, output.ymax] for output in output_objects]
-            else:
-                bbox = torch.empty(size=(0, 0))
             bboxes.append(
                 tv_tensors.BoundingBoxes(
-                    bbox,
+                    data=output.bboxes,
                     format="XYXY",
                     canvas_size=inputs.imgs_info[-1].img_shape,
                     device=self.device,
                 ),
             )
             # NOTE: OTX 1.5 filter predictions with result_based_confidence_threshold,
             # but OTX 2.0 doesn't have it in configuration.
-            _masks = [output.mask for output in output_objects]
-            _masks = np.stack(_masks) if len(_masks) else []
-            scores.append(torch.tensor([output.score for output in output_objects], device=self.device))
-            masks.append(torch.tensor(_masks, device=self.device))
-            labels.append(torch.tensor([output.id - 1 for output in output_objects], device=self.device))
+            scores.append(torch.tensor(output.scores.reshape(-1), device=self.device))
+            masks.append(torch.tensor(output.masks, device=self.device))
+            labels.append(torch.tensor(output.labels.reshape(-1) - 1, device=self.device))
 
         if outputs and outputs[0].saliency_map:
             predicted_s_maps = []
diff --git a/tests/integration/api/test_geti_interaction.py b/tests/integration/api/test_geti_interaction.py
@@ -118,7 +118,6 @@ def test_export_and_infer_onnx(self):
 
             predictions = mapi_model(self.image)
             assert predictions is not None
-            assert len(predictions.top_labels) > 0
 
             exported_path.unlink(missing_ok=True)
 
@@ -148,7 +147,6 @@ def test_export_and_infer_openvino(self):
 
             predictions = mapi_model(self.image)
             assert predictions is not None
-            assert len(predictions.top_labels) > 0
 
             exported_path.unlink(missing_ok=True)
 
@@ -187,7 +185,6 @@ def test_optimize_and_infer_openvino_fp32(self):
 
         predictions = mapi_model(self.image)
         assert predictions is not None
-        assert len(predictions.top_labels) > 0
 
 
 @pytest.mark.parametrize("task", pytest.TASK_LIST)
diff --git a/tests/unit/core/data/test_tiling.py b/tests/unit/core/data/test_tiling.py
@@ -14,7 +14,7 @@
 from datumaro import Dataset as DmDataset
 from datumaro import Polygon
 from model_api.models import Model
-from model_api.models.utils import ImageResultWithSoftPrediction
+from model_api.models.result import ImageResultWithSoftPrediction
 from model_api.tilers import SemanticSegmentationTiler
 from omegaconf import OmegaConf
 from torchvision import tv_tensors
diff --git a/tests/unit/core/exporter/exportable_code/demo/demo_package/visualizers/test_visualizers.py b/tests/unit/core/exporter/exportable_code/demo/demo_package/visualizers/test_visualizers.py
@@ -5,13 +5,11 @@
 
 import numpy as np
 import pytest
-from model_api.models.utils import (
+from model_api.models.result import (
     ClassificationResult,
-    Detection,
     DetectionResult,
     ImageResultWithSoftPrediction,
     InstanceSegmentationResult,
-    SegmentedObject,
 )
 from numpy.random import PCG64, Generator
 
@@ -146,14 +144,24 @@ def visualizer(self):
 
     def test_draw_no_predictions(self, visualizer):
         frame = np.zeros((100, 100, 3), dtype=np.uint8)
-        predictions = DetectionResult([], saliency_map=None, feature_vector=None)
+        predictions = DetectionResult(
+            bboxes=np.ndarray([]),
+            labels=np.ndarray([]),
+            scores=np.ndarray([]),
+            label_names=[],
+            saliency_map=None,
+            feature_vector=None,
+        )
         output_frame = visualizer.draw(frame, predictions)
         assert np.array_equal(frame, output_frame)
 
     def test_draw_with_predictions(self, visualizer):
         frame = np.zeros((100, 100, 3), dtype=np.uint8)
         predictions = DetectionResult(
-            [Detection(10, 40, 30, 80, 0.7, 2, "Car")],
+            bboxes=np.array([[10, 10, 30, 30]]),
+            labels=np.array([0]),
+            label_names=["Car"],
+            scores=np.array([0.7]),
             saliency_map=None,
             feature_vector=None,
         )
@@ -184,28 +192,16 @@ def test_draw_multiple_objects(self, visualizer, rand_generator):
 
         # Create instance segmentation results with multiple objects
         predictions = InstanceSegmentationResult(
-            segmentedObjects=[
-                SegmentedObject(
-                    xmin=10,
-                    ymin=10,
-                    xmax=30,
-                    ymax=30,
-                    score=0.9,
-                    id=0,
-                    mask=rand_generator.integers(2, size=(100, 100), dtype=np.uint8),
-                    str_label="person",
-                ),
-                SegmentedObject(
-                    xmin=40,
-                    ymin=40,
-                    xmax=60,
-                    ymax=60,
-                    score=0.8,
-                    id=1,
-                    mask=rand_generator.integers(2, size=(100, 100), dtype=np.uint8),
-                    str_label="car",
-                ),
-            ],
+            bboxes=np.array([[10, 10, 30, 30], [40, 40, 60, 60]]),
+            labels=np.array([0, 1]),
+            masks=np.array(
+                [
+                    rand_generator.integers(2, size=(100, 100), dtype=np.uint8),
+                    rand_generator.integers(2, size=(100, 100), dtype=np.uint8),
+                ],
+            ),
+            scores=np.array([0.9, 0.8]),
+            label_names=["person", "car"],
             saliency_map=None,
             feature_vector=None,
         )
@@ -221,7 +217,13 @@ def test_draw_no_objects(self, visualizer):
         copied_frame = frame.copy()
 
         # Create instance segmentation results with no objects
-        predictions = InstanceSegmentationResult(segmentedObjects=[], saliency_map=None, feature_vector=None)
+        predictions = InstanceSegmentationResult(
+            bboxes=np.array([]),
+            labels=np.array([]),
+            masks=np.array([]),
+            saliency_map=None,
+            feature_vector=None,
+        )
 
         drawn_frame = visualizer.draw(frame, predictions)
         assert np.array_equal(drawn_frame, copied_frame)
diff --git a/tests/unit/core/model/test_base.py b/tests/unit/core/model/test_base.py
@@ -1,17 +1,21 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import tempfile
+
 import numpy as np
+import openvino as ov
 import pytest
 import torch
 from lightning import Trainer
 from lightning.pytorch.utilities.types import LRSchedulerConfig
-from model_api.models.utils import ClassificationResult
+from model_api.models.result import ClassificationResult
 from pytest_mock import MockerFixture
 
 from otx.core.data.entity.base import OTXBatchDataEntity
 from otx.core.model.base import OTXModel, OVModel
 from otx.core.schedulers.warmup_schedulers import LinearWarmupScheduler
+from tests.unit.core.utils.test_utils import get_dummy_ov_cls_model
 
 
 class MockNNModule(torch.nn.Module):
@@ -134,19 +138,22 @@ def input_batch(self) -> OTXBatchDataEntity:
 
     @pytest.fixture()
     def model(self) -> OVModel:
-        return OVModel(model_name="efficientnet-b0-pytorch", model_type="Classification")
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            ov.save_model(get_dummy_ov_cls_model(), f"{tmp_dir}/model.xml")
+            return OVModel(model_name=f"{tmp_dir}/model.xml", model_type="Classification")
 
-    def test_create_model(self) -> None:
-        OVModel(model_name="efficientnet-b0-pytorch", model_type="Classification", force_cpu=False)
+    def test_create_model(self, model) -> None:
+        pass
 
     def test_customize_inputs(self, model, input_batch) -> None:
         inputs = model._customize_inputs(input_batch)
         assert isinstance(inputs, dict)
         assert "inputs" in inputs
         assert inputs["inputs"][1].shape == np.transpose(input_batch.images[1].numpy(), (1, 2, 0)).shape
 
-    def test_forward(self, model, input_batch) -> None:
+    def test_forward(self, model, input_batch, mocker: MockerFixture) -> None:
         model._customize_outputs = lambda x, _: x
+        model.model.postprocess = mocker.Mock(return_value=ClassificationResult())
         outputs = model.forward(input_batch)
         assert isinstance(outputs, list)
         assert len(outputs) == 3
diff --git a/tests/unit/core/utils/test_utils.py b/tests/unit/core/utils/test_utils.py
diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py