Merge branch 'feature/cpp_refactoring' into rhecker/semantic_segmentation_tiling

RHeckerIntel · RHeckerIntel · commit 86746408e31d · 2025-06-17T15:36:48.000+02:00
diff --git a/src/cpp/include/tasks/anomaly.h b/src/cpp/include/tasks/anomaly.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2020-2025 Intel Corporation
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <openvino/openvino.hpp>
+
+#include "adapters/inference_adapter.h"
+#include "tasks/results.h"
+#include "utils/config.h"
+#include "utils/vision_pipeline.h"
+
+class Anomaly {
+public:
+    std::shared_ptr<InferenceAdapter> adapter;
+    VisionPipeline<AnomalyResult> pipeline;
+
+    Anomaly(std::shared_ptr<InferenceAdapter> adapter) : adapter(adapter) {
+        pipeline = VisionPipeline<AnomalyResult>(
+            adapter,
+            [&](cv::Mat image) {
+                return preprocess(image);
+            },
+            [&](InferenceResult result) {
+                return postprocess(result);
+            });
+
+        auto config = adapter->getModelConfig();
+        image_threshold = utils::get_from_any_maps("image_threshold", config, {}, image_threshold);
+        pixel_threshold = utils::get_from_any_maps("pixel_threshold", config, {}, pixel_threshold);
+        normalization_scale = utils::get_from_any_maps("normalization_scale", config, {}, normalization_scale);
+        task = utils::get_from_any_maps("pixel_threshold", config, {}, task);
+        labels = utils::get_from_any_maps("labels", config, {}, labels);
+        input_shape.width = utils::get_from_any_maps("orig_width", config, {}, input_shape.width);
+        input_shape.height = utils::get_from_any_maps("orig_height", config, {}, input_shape.height);
+    }
+
+    static void serialize(std::shared_ptr<ov::Model>& ov_model);
+    static Anomaly load(const std::string& model_path);
+
+    AnomalyResult infer(cv::Mat image);
+    std::vector<AnomalyResult> inferBatch(std::vector<cv::Mat> image);
+
+    std::map<std::string, ov::Tensor> preprocess(cv::Mat);
+    AnomalyResult postprocess(InferenceResult& infResult);
+
+private:
+    cv::Mat normalize(cv::Mat& tensor, float threshold);
+    double normalize(double& tensor, float threshold);
+    std::vector<cv::Rect> getBoxes(cv::Mat& mask);
+
+private:
+    cv::Size input_shape;
+    std::vector<std::string> labels;
+
+    float image_threshold = 0.5f;
+    float pixel_threshold = 0.5f;
+    float normalization_scale = 1.0f;
+    std::string task = "segmentation";
+};
diff --git a/src/cpp/include/tasks/results.h b/src/cpp/include/tasks/results.h
@@ -193,3 +193,37 @@ struct ClassificationResult {
     ov::Tensor saliency_map, feature_vector,
         raw_scores;  // Contains "raw_scores", "saliency_map" and "feature_vector" model outputs if such exist
 };
+
+struct AnomalyResult {
+    cv::Mat anomaly_map;
+    std::vector<cv::Rect> pred_boxes;
+    std::string pred_label;
+    cv::Mat pred_mask;
+    double pred_score;
+
+    friend std::ostream& operator<<(std::ostream& os, const AnomalyResult& prediction) {
+        double min_anomaly_map, max_anomaly_map;
+        cv::minMaxLoc(prediction.anomaly_map, &min_anomaly_map, &max_anomaly_map);
+        double min_pred_mask, max_pred_mask;
+        cv::minMaxLoc(prediction.pred_mask, &min_pred_mask, &max_pred_mask);
+        os << "anomaly_map min:" << min_anomaly_map << " max:" << max_anomaly_map << ";";
+        os << "pred_score:" << std::fixed << std::setprecision(1) << prediction.pred_score << ";";
+        os << "pred_label:" << prediction.pred_label << ";";
+        os << std::fixed << std::setprecision(0) << "pred_mask min:" << min_pred_mask << " max:" << max_pred_mask
+           << ";";
+
+        if (!prediction.pred_boxes.empty()) {
+            os << "pred_boxes:";
+            for (const cv::Rect& box : prediction.pred_boxes) {
+                os << box << ",";
+            }
+        }
+
+        return os;
+    }
+    explicit operator std::string() {
+        std::stringstream ss;
+        ss << *this;
+        return ss.str();
+    }
+};
diff --git a/src/cpp/src/tasks/anomaly.cpp b/src/cpp/src/tasks/anomaly.cpp
@@ -0,0 +1,157 @@
+#include "tasks/anomaly.h"
+
+#include "adapters/openvino_adapter.h"
+#include "utils/preprocessing.h"
+#include "utils/tensor.h"
+
+void Anomaly::serialize(std::shared_ptr<ov::Model>& ov_model) {
+    auto input = ov_model->inputs().front();
+
+    auto layout = ov::layout::get_layout(input);
+    if (layout.empty()) {
+        layout = utils::getLayoutFromShape(input.get_partial_shape());
+    }
+
+    const ov::Shape& shape = input.get_partial_shape().get_max_shape();
+
+    auto interpolation_mode = cv::INTER_LINEAR;
+    utils::RESIZE_MODE resize_mode = utils::RESIZE_FILL;
+    uint8_t pad_value = 0;
+    bool reverse_input_channels = false;
+
+    std::vector<float> scale_values;
+    std::vector<float> mean_values;
+    if (ov_model->has_rt_info("model_info")) {
+        auto config = ov_model->get_rt_info<ov::AnyMap>("model_info");
+        reverse_input_channels =
+            utils::get_from_any_maps("reverse_input_channels", config, ov::AnyMap{}, reverse_input_channels);
+        scale_values = utils::get_from_any_maps("scale_values", config, ov::AnyMap{}, scale_values);
+        mean_values = utils::get_from_any_maps("mean_values", config, ov::AnyMap{}, mean_values);
+    }
+
+    auto input_shape = ov::Shape{shape[ov::layout::width_idx(layout)], shape[ov::layout::height_idx(layout)]};
+
+    ov_model = utils::embedProcessing(ov_model,
+                                      input.get_any_name(),
+                                      layout,
+                                      resize_mode,
+                                      interpolation_mode,
+                                      input_shape,
+                                      pad_value,
+                                      reverse_input_channels,
+                                      mean_values,
+                                      scale_values);
+
+    ov_model->set_rt_info(input_shape[0], "model_info", "orig_width");
+    ov_model->set_rt_info(input_shape[1], "model_info", "orig_height");
+}
+
+Anomaly Anomaly::load(const std::string& model_path) {
+    auto core = ov::Core();
+    std::shared_ptr<ov::Model> model = core.read_model(model_path);
+
+    if (model->has_rt_info("model_info", "model_type")) {
+        std::cout << "has model type in info: " << model->get_rt_info<std::string>("model_info", "model_type")
+                  << std::endl;
+    } else {
+        throw std::runtime_error("Incorrect or unsupported model_type");
+    }
+
+    if (utils::model_has_embedded_processing(model)) {
+        std::cout << "model already was serialized" << std::endl;
+    } else {
+        serialize(model);
+    }
+    auto adapter = std::make_shared<OpenVINOInferenceAdapter>();
+    adapter->loadModel(model, core, "AUTO");
+    return Anomaly(adapter);
+}
+
+AnomalyResult Anomaly::infer(cv::Mat image) {
+    return pipeline.infer(image);
+}
+
+std::vector<AnomalyResult> Anomaly::inferBatch(std::vector<cv::Mat> images) {
+    return pipeline.inferBatch(images);
+}
+
+std::map<std::string, ov::Tensor> Anomaly::preprocess(cv::Mat image) {
+    std::map<std::string, ov::Tensor> input = {};
+    input.emplace(adapter->getInputNames()[0], utils::wrapMat2Tensor(image));
+    return input;
+}
+
+AnomalyResult Anomaly::postprocess(InferenceResult& infResult) {
+    auto tensorName = adapter->getOutputNames().front();
+    ov::Tensor predictions = infResult.data[tensorName];
+    const auto& inputImgSize = infResult.inputImageSize;
+
+    double pred_score;
+    std::string pred_label;
+    cv::Mat anomaly_map;
+    cv::Mat pred_mask;
+    std::vector<cv::Rect> pred_boxes;
+    if (predictions.get_shape().size() == 1) {
+        pred_score = predictions.data<float>()[0];
+    } else {
+        const ov::Layout& layout = utils::getLayoutFromShape(predictions.get_shape());
+        const ov::Shape& predictionsShape = predictions.get_shape();
+        anomaly_map = cv::Mat(static_cast<int>(predictionsShape[ov::layout::height_idx(layout)]),
+                              static_cast<int>(predictionsShape[ov::layout::width_idx(layout)]),
+                              CV_32FC1,
+                              predictions.data<float>());
+        // find the max predicted score
+        cv::minMaxLoc(anomaly_map, NULL, &pred_score);
+    }
+    pred_label = labels[pred_score > image_threshold ? 1 : 0];
+
+    pred_mask = anomaly_map >= pixel_threshold;
+    pred_mask.convertTo(pred_mask, CV_8UC1, 1 / 255.);
+    cv::resize(pred_mask, pred_mask, cv::Size{inputImgSize.width, inputImgSize.height});
+    anomaly_map = normalize(anomaly_map, pixel_threshold);
+    anomaly_map.convertTo(anomaly_map, CV_8UC1, 255);
+
+    pred_score = normalize(pred_score, image_threshold);
+    if (pred_label == labels[0]) {    // normal label
+        pred_score = 1 - pred_score;  // Score of normal is 1 - score of anomaly
+    }
+
+    if (!anomaly_map.empty()) {
+        cv::resize(anomaly_map, anomaly_map, cv::Size{inputImgSize.width, inputImgSize.height});
+    }
+
+    if (task == "detection") {
+        pred_boxes = getBoxes(pred_mask);
+    }
+
+    AnomalyResult result;
+    result.anomaly_map = std::move(anomaly_map);
+    result.pred_score = pred_score;
+    result.pred_label = std::move(pred_label);
+    result.pred_mask = std::move(pred_mask);
+    result.pred_boxes = std::move(pred_boxes);
+    return result;
+}
+
+cv::Mat Anomaly::normalize(cv::Mat& tensor, float threshold) {
+    cv::Mat normalized = ((tensor - threshold) / normalization_scale) + 0.5f;
+    normalized = cv::min(cv::max(normalized, 0.f), 1.f);
+    return normalized;
+}
+
+double Anomaly::normalize(double& value, float threshold) {
+    double normalized = ((value - threshold) / normalization_scale) + 0.5f;
+    return std::min(std::max(normalized, 0.), 1.);
+}
+
+std::vector<cv::Rect> Anomaly::getBoxes(cv::Mat& mask) {
+    std::vector<cv::Rect> boxes;
+    std::vector<std::vector<cv::Point>> contours;
+    cv::findContours(mask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
+    for (auto& contour : contours) {
+        std::vector<int> box;
+        cv::Rect rect = cv::boundingRect(contour);
+        boxes.push_back(rect);
+    }
+    return boxes;
+}
diff --git a/tests/cpp/test_accuracy.cpp b/tests/cpp/test_accuracy.cpp
@@ -7,6 +7,7 @@
 #include <thread>
 
 #include "matchers.h"
+#include "tasks/anomaly.h"
 #include "tasks/classification.h"
 #include "tasks/detection.h"
 #include "tasks/instance_segmentation.h"
@@ -126,6 +127,16 @@ TEST_P(ModelParameterizedTest, AccuracyTest) {
             std::string image_path = DATA_DIR + '/' + test_data.image;
             auto image = load_image(image_path, use_tiling, data.input_res);
             auto result = model.infer(image);
+            EXPECT_EQ(std::string{result}, test_data.reference[0]);
+        }
+    } else if (data.type == "AnomalyDetection") {
+        auto model = Anomaly::load(model_path);
+
+        for (auto& test_data : data.test_data) {
+            std::string image_path = DATA_DIR + '/' + test_data.image;
+            cv::Mat image = cv::imread(image_path);
+            auto result = model.infer(image);
+
             EXPECT_EQ(std::string{result}, test_data.reference[0]);
         }
     } else {
@@ -169,7 +180,15 @@ TEST_P(ModelParameterizedTest, SerializedAccuracyTest) {
         }
     } else if (data.type == "ClassificationModel") {
         auto model = Classification::load(model_path);
+        for (auto& test_data : data.test_data) {
+            std::string image_path = DATA_DIR + '/' + test_data.image;
+            cv::Mat image = cv::imread(image_path);
+            auto result = model.infer(image);
 
+            EXPECT_EQ(std::string{result}, test_data.reference[0]);
+        }
+    } else if (data.type == "AnomalyDetection") {
+        auto model = Anomaly::load(model_path);
         for (auto& test_data : data.test_data) {
             std::string image_path = DATA_DIR + '/' + test_data.image;
             auto image = load_image(image_path, use_tiling, data.input_res);
@@ -228,6 +247,17 @@ TEST_P(ModelParameterizedTest, AccuracyTestBatch) {
             auto image = load_image(image_path, use_tiling, data.input_res);
             auto result = model.inferBatch({image});
 
+            ASSERT_EQ(result.size(), 1);
+            EXPECT_EQ(std::string{result[0]}, test_data.reference[0]);
+        }
+    } else if (data.type == "AnomalyDetection") {
+        auto model = Anomaly::load(model_path);
+
+        for (auto& test_data : data.test_data) {
+            std::string image_path = DATA_DIR + '/' + test_data.image;
+            cv::Mat image = cv::imread(image_path);
+            auto result = model.inferBatch({image});
+
             ASSERT_EQ(result.size(), 1);
             EXPECT_EQ(std::string{result[0]}, test_data.reference[0]);
         }
diff --git a/tests/python/accuracy/public_scope.json b/tests/python/accuracy/public_scope.json
@@ -187,5 +187,29 @@
         "reference": ["0 (1): 0.849, [0], [0], [0]"]
       }
     ]
+  },
+  {
+    "name": "otx_models/anomaly_padim_bottle_mvtec.xml",
+    "type": "AnomalyDetection",
+    "test_data": [
+      {
+        "image": "coco128/images/train2017/000000000074.jpg",
+        "reference": [
+          "anomaly_map min:151 max:255;pred_score:1.0;pred_label:Anomaly;pred_mask min:1 max:1;"
+        ]
+      }
+    ]
+  },
+  {
+    "name": "otx_models/anomaly_stfpm_bottle_mvtec.xml",
+    "type": "AnomalyDetection",
+    "test_data": [
+      {
+        "image": "coco128/images/train2017/000000000074.jpg",
+        "reference": [
+          "anomaly_map min:124 max:225;pred_score:0.9;pred_label:Anomaly;pred_mask min:0 max:1;"
+        ]
+      }
+    ]
   }
 ]