From b5d22be9aadf76ccd7461210c38c828803c7de17 Mon Sep 17 00:00:00 2001 From: Ronald Hecker Date: Mon, 9 Jun 2025 11:44:49 +0200 Subject: [PATCH 01/10] Remove cpp implementation --- src/cpp/CMakeLists.txt | 106 --- .../include/adapters/inference_adapter.h | 46 - .../include/adapters/openvino_adapter.h | 52 -- src/cpp/adapters/src/openvino_adapter.cpp | 134 --- src/cpp/cmake/model_apiConfig.cmake | 7 - src/cpp/install_dependencies.sh | 16 - src/cpp/models/include/models/anomaly_model.h | 47 - src/cpp/models/include/models/base_model.h | 108 --- .../include/models/classification_model.h | 126 --- .../models/include/models/detection_model.h | 35 - .../include/models/detection_model_ext.h | 26 - .../include/models/detection_model_ssd.h | 38 - .../include/models/detection_model_yolo.h | 95 -- .../models/detection_model_yolov3_onnx.h | 31 - .../include/models/detection_model_yolox.h | 34 - src/cpp/models/include/models/input_data.h | 30 - .../include/models/instance_segmentation.h | 52 -- .../include/models/internal_model_data.h | 37 - .../include/models/keypoint_detection.h | 46 - src/cpp/models/include/models/results.h | 379 -------- .../include/models/segmentation_model.h | 54 -- src/cpp/models/src/anomaly_model.cpp | 205 ----- src/cpp/models/src/base_model.cpp | 371 -------- src/cpp/models/src/classification_model.cpp | 838 ------------------ src/cpp/models/src/detection_model.cpp | 118 --- src/cpp/models/src/detection_model_ext.cpp | 40 - src/cpp/models/src/detection_model_ssd.cpp | 351 -------- src/cpp/models/src/detection_model_yolo.cpp | 642 -------------- .../src/detection_model_yolov3_onnx.cpp | 174 ---- src/cpp/models/src/detection_model_yolox.cpp | 204 ----- src/cpp/models/src/instance_segmentation.cpp | 375 -------- src/cpp/models/src/keypoint_detection.cpp | 266 ------ src/cpp/models/src/segmentation_model.cpp | 331 ------- src/cpp/py_bindings/.gitignore | 3 - src/cpp/py_bindings/CMakeLists.txt | 39 - src/cpp/py_bindings/pyproject.toml | 32 - .../py_bindings/src/vision_api/__init__.py | 13 - .../py_bindings/src/vision_api/py_base.cpp | 24 - .../src/vision_api/py_classification.cpp | 88 -- .../py_bindings/src/vision_api/py_utils.cpp | 33 - .../py_bindings/src/vision_api/py_utils.hpp | 17 - .../src/vision_api/py_vision_api.cpp | 17 - src/cpp/tilers/include/tilers/detection.h | 30 - .../include/tilers/instance_segmentation.h | 32 - .../include/tilers/semantic_segmentation.h | 29 - src/cpp/tilers/include/tilers/tiler_base.h | 47 - src/cpp/tilers/src/detection.cpp | 225 ----- src/cpp/tilers/src/instance_segmentation.cpp | 196 ---- src/cpp/tilers/src/semantic_segmentation.cpp | 106 --- src/cpp/tilers/src/tiler_base.cpp | 114 --- src/cpp/utils/include/utils/args_helper.hpp | 62 -- .../utils/include/utils/async_infer_queue.hpp | 50 -- src/cpp/utils/include/utils/common.hpp | 58 -- src/cpp/utils/include/utils/image_utils.h | 45 - src/cpp/utils/include/utils/kuhn_munkres.hpp | 56 -- src/cpp/utils/include/utils/nms.hpp | 116 --- src/cpp/utils/include/utils/ocv_common.hpp | 200 ----- .../include/utils/performance_metrics.hpp | 88 -- src/cpp/utils/include/utils/slog.hpp | 95 -- src/cpp/utils/src/args_helper.cpp | 110 --- src/cpp/utils/src/async_infer_queue.cpp | 153 ---- src/cpp/utils/src/image_utils.cpp | 374 -------- src/cpp/utils/src/kuhn_munkres.cpp | 170 ---- src/cpp/utils/src/nms.cpp | 28 - 64 files changed, 8064 deletions(-) delete mode 100644 src/cpp/CMakeLists.txt delete mode 100644 src/cpp/adapters/include/adapters/inference_adapter.h delete mode 100644 src/cpp/adapters/include/adapters/openvino_adapter.h delete mode 100644 src/cpp/adapters/src/openvino_adapter.cpp delete mode 100644 src/cpp/cmake/model_apiConfig.cmake delete mode 100755 src/cpp/install_dependencies.sh delete mode 100644 src/cpp/models/include/models/anomaly_model.h delete mode 100644 src/cpp/models/include/models/base_model.h delete mode 100644 src/cpp/models/include/models/classification_model.h delete mode 100644 src/cpp/models/include/models/detection_model.h delete mode 100644 src/cpp/models/include/models/detection_model_ext.h delete mode 100644 src/cpp/models/include/models/detection_model_ssd.h delete mode 100644 src/cpp/models/include/models/detection_model_yolo.h delete mode 100644 src/cpp/models/include/models/detection_model_yolov3_onnx.h delete mode 100644 src/cpp/models/include/models/detection_model_yolox.h delete mode 100644 src/cpp/models/include/models/input_data.h delete mode 100644 src/cpp/models/include/models/instance_segmentation.h delete mode 100644 src/cpp/models/include/models/internal_model_data.h delete mode 100644 src/cpp/models/include/models/keypoint_detection.h delete mode 100644 src/cpp/models/include/models/results.h delete mode 100644 src/cpp/models/include/models/segmentation_model.h delete mode 100644 src/cpp/models/src/anomaly_model.cpp delete mode 100644 src/cpp/models/src/base_model.cpp delete mode 100644 src/cpp/models/src/classification_model.cpp delete mode 100644 src/cpp/models/src/detection_model.cpp delete mode 100644 src/cpp/models/src/detection_model_ext.cpp delete mode 100644 src/cpp/models/src/detection_model_ssd.cpp delete mode 100644 src/cpp/models/src/detection_model_yolo.cpp delete mode 100644 src/cpp/models/src/detection_model_yolov3_onnx.cpp delete mode 100644 src/cpp/models/src/detection_model_yolox.cpp delete mode 100644 src/cpp/models/src/instance_segmentation.cpp delete mode 100644 src/cpp/models/src/keypoint_detection.cpp delete mode 100644 src/cpp/models/src/segmentation_model.cpp delete mode 100644 src/cpp/py_bindings/.gitignore delete mode 100644 src/cpp/py_bindings/CMakeLists.txt delete mode 100644 src/cpp/py_bindings/pyproject.toml delete mode 100644 src/cpp/py_bindings/src/vision_api/__init__.py delete mode 100644 src/cpp/py_bindings/src/vision_api/py_base.cpp delete mode 100644 src/cpp/py_bindings/src/vision_api/py_classification.cpp delete mode 100644 src/cpp/py_bindings/src/vision_api/py_utils.cpp delete mode 100644 src/cpp/py_bindings/src/vision_api/py_utils.hpp delete mode 100644 src/cpp/py_bindings/src/vision_api/py_vision_api.cpp delete mode 100644 src/cpp/tilers/include/tilers/detection.h delete mode 100644 src/cpp/tilers/include/tilers/instance_segmentation.h delete mode 100644 src/cpp/tilers/include/tilers/semantic_segmentation.h delete mode 100644 src/cpp/tilers/include/tilers/tiler_base.h delete mode 100644 src/cpp/tilers/src/detection.cpp delete mode 100644 src/cpp/tilers/src/instance_segmentation.cpp delete mode 100644 src/cpp/tilers/src/semantic_segmentation.cpp delete mode 100644 src/cpp/tilers/src/tiler_base.cpp delete mode 100644 src/cpp/utils/include/utils/args_helper.hpp delete mode 100644 src/cpp/utils/include/utils/async_infer_queue.hpp delete mode 100644 src/cpp/utils/include/utils/common.hpp delete mode 100644 src/cpp/utils/include/utils/image_utils.h delete mode 100644 src/cpp/utils/include/utils/kuhn_munkres.hpp delete mode 100644 src/cpp/utils/include/utils/nms.hpp delete mode 100644 src/cpp/utils/include/utils/ocv_common.hpp delete mode 100644 src/cpp/utils/include/utils/performance_metrics.hpp delete mode 100644 src/cpp/utils/include/utils/slog.hpp delete mode 100644 src/cpp/utils/src/args_helper.cpp delete mode 100644 src/cpp/utils/src/async_infer_queue.cpp delete mode 100644 src/cpp/utils/src/image_utils.cpp delete mode 100644 src/cpp/utils/src/kuhn_munkres.cpp delete mode 100644 src/cpp/utils/src/nms.cpp diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt deleted file mode 100644 index 880f211a..00000000 --- a/src/cpp/CMakeLists.txt +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (C) 2018-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -cmake_minimum_required(VERSION 3.26) - -# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with -# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options -get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) -if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE) - message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used") - # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect - set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...") -endif() - -set(model_api_VERSION 0.0.0) - -project(model_api - VERSION ${model_api_VERSION} - DESCRIPTION "OpenVINO Vision API" - HOMEPAGE_URL "https://github.com/openvinotoolkit/model_api/" - LANGUAGES CXX C) - -if(WIN32) - if(NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - message(FATAL_ERROR "Only 64-bit supported on Windows") - endif() - - add_definitions(-DNOMINMAX) -endif() - -find_package(OpenCV REQUIRED COMPONENTS core imgproc) - -find_package(OpenVINO REQUIRED - COMPONENTS Runtime Threading) - -include(FetchContent) -FetchContent_Declare(json GIT_REPOSITORY https://github.com/nlohmann/json.git - GIT_TAG d41ca94fa85d5119852e2f7a3f94335cc7cb0486 # PR #4709, fixes cmake deprecation warnings - ) -FetchContent_MakeAvailable(json) - -file(GLOB MODELS_SOURCES ./models/src/*.cpp) -file(GLOB MODELS_HEADERS ./models/include/models/*.h) -file(GLOB_RECURSE UTILS_HEADERS ./utils/include/*) -file(GLOB_RECURSE UTILS_SOURCES ./utils/src/*.cpp) -file(GLOB_RECURSE ADAPTERS_HEADERS ./adapters/include/*) -file(GLOB_RECURSE ADAPTERS_SOURCES ./adapters/src/*.cpp) -file(GLOB_RECURSE TILERS_HEADERS ./tilers/include/tilers/*.h) -file(GLOB_RECURSE TILERS_SOURCES ./tilers/src/*.cpp) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("models/src" FILES ${MODELS_SOURCES}) -source_group("models/include" FILES ${MODELS_HEADERS}) -source_group("utils/src" FILES ${UTILS_SOURCES}) -source_group("utils/include" FILES ${UTILS_HEADERS}) -source_group("adapters/src" FILES ${ADAPTERS_SOURCES}) -source_group("adapters/include" FILES ${ADAPTERS_HEADERS}) -source_group("tilers/src" FILES ${TILERS_SOURCES}) -source_group("tilers/include" FILES ${TILERS_HEADERS}) - -add_library(model_api STATIC ${MODELS_SOURCES} ${UTILS_SOURCES} ${ADAPTERS_SOURCES} ${TILERS_SOURCES}) -target_include_directories(model_api PUBLIC "$" "$" "$" "$" "$") -target_link_libraries(model_api PUBLIC openvino::runtime opencv_core opencv_imgproc) -target_link_libraries(model_api PRIVATE $) -set_target_properties(model_api PROPERTIES CXX_STANDARD 17) -set_target_properties(model_api PROPERTIES CXX_STANDARD_REQUIRED ON) -if(MSVC) - target_compile_options(model_api PRIVATE /wd4251 /wd4275 /wd4267 # disable some warnings - /W3 # Specify the level of warnings to be generated by the compiler - /EHsc) # Enable standard C++ stack unwinding, assume functions with extern "C" never throw -elseif(CMAKE_CXX_COMPILER_ID MATCHES "^GNU|(Apple)?Clang$") - target_compile_options(model_api PRIVATE -Wall -Wextra -Wpedantic) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") -endif() - -include(GenerateExportHeader) - -generate_export_header(model_api) -set_property(TARGET model_api PROPERTY VERSION ${model_api_VERSION}) -set_property(TARGET model_api PROPERTY SOVERSION 3) -set_property(TARGET model_api PROPERTY INTERFACE_model_api_MAJOR_VERSION 3) -set_property(TARGET model_api APPEND PROPERTY COMPATIBLE_INTERFACE_STRING model_api_MAJOR_VERSION) - -install(TARGETS model_api EXPORT model_apiTargets - LIBRARY DESTINATION lib COMPONENT Devel - ARCHIVE DESTINATION lib COMPONENT Devel - RUNTIME DESTINATION bin COMPONENT Devel - INCLUDES DESTINATION include) -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/models/include/" "${CMAKE_CURRENT_SOURCE_DIR}/utils/include/" "${CMAKE_CURRENT_SOURCE_DIR}/adapters/include/" "${CMAKE_CURRENT_SOURCE_DIR}/tilers/include/" - DESTINATION include COMPONENT Devel) - -include(CMakePackageConfigHelpers) -write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/model_apiConfigVersion.cmake" VERSION ${model_api_VERSION} COMPATIBILITY AnyNewerVersion) - -export(EXPORT model_apiTargets FILE "${CMAKE_CURRENT_BINARY_DIR}/model_apiTargets.cmake") -configure_file(cmake/model_apiConfig.cmake "${CMAKE_CURRENT_BINARY_DIR}/model_apiConfig.cmake" COPYONLY) - -set(ConfigPackageLocation lib/cmake/model_api) -install(EXPORT model_apiTargets FILE model_apiTargets.cmake DESTINATION ${ConfigPackageLocation}) -install(FILES cmake/model_apiConfig.cmake "${CMAKE_CURRENT_BINARY_DIR}/model_apiConfigVersion.cmake" - DESTINATION ${ConfigPackageLocation} COMPONENT Devel) - -set(CPACK_PACKAGE_VERSION ${model_api_VERSION}) -include(CPack) diff --git a/src/cpp/adapters/include/adapters/inference_adapter.h b/src/cpp/adapters/include/adapters/inference_adapter.h deleted file mode 100644 index 8d28e84e..00000000 --- a/src/cpp/adapters/include/adapters/inference_adapter.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include -#include -#include -#include - -struct InputData; -struct InferenceResult; - -using InferenceOutput = std::map; -using InferenceInput = std::map; -using CallbackData = std::shared_ptr; - -// The interface doesn't have implementation -class InferenceAdapter { -public: - virtual ~InferenceAdapter() = default; - - virtual InferenceOutput infer(const InferenceInput& input) = 0; - virtual void infer(const InferenceInput& input, InferenceOutput& output) = 0; - virtual void setCallback(std::function callback) = 0; - virtual void inferAsync(const InferenceInput& input, CallbackData callback_args) = 0; - virtual bool isReady() = 0; - virtual void awaitAll() = 0; - virtual void awaitAny() = 0; - virtual size_t getNumAsyncExecutors() const = 0; - virtual void loadModel(const std::shared_ptr& model, - ov::Core& core, - const std::string& device = "", - const ov::AnyMap& compilationConfig = {}, - size_t max_num_requests = 0) = 0; - virtual ov::PartialShape getInputShape(const std::string& inputName) const = 0; - virtual ov::PartialShape getOutputShape(const std::string& inputName) const = 0; - virtual ov::element::Type_t getInputDatatype(const std::string& inputName) const = 0; - virtual ov::element::Type_t getOutputDatatype(const std::string& outputName) const = 0; - virtual std::vector getInputNames() const = 0; - virtual std::vector getOutputNames() const = 0; - virtual const ov::AnyMap& getModelConfig() const = 0; -}; diff --git a/src/cpp/adapters/include/adapters/openvino_adapter.h b/src/cpp/adapters/include/adapters/openvino_adapter.h deleted file mode 100644 index 7e1db1ac..00000000 --- a/src/cpp/adapters/include/adapters/openvino_adapter.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include -#include -#include -#include - -#include "adapters/inference_adapter.h" -#include "utils/async_infer_queue.hpp" - -class OpenVINOInferenceAdapter : public InferenceAdapter { -public: - OpenVINOInferenceAdapter() = default; - - virtual InferenceOutput infer(const InferenceInput& input) override; - virtual void infer(const InferenceInput& input, InferenceOutput& output) override; - virtual void inferAsync(const InferenceInput& input, const CallbackData callback_args) override; - virtual void setCallback(std::function callback); - virtual bool isReady(); - virtual void awaitAll(); - virtual void awaitAny(); - virtual void loadModel(const std::shared_ptr& model, - ov::Core& core, - const std::string& device = "", - const ov::AnyMap& compilationConfig = {}, - size_t max_num_requests = 1) override; - virtual size_t getNumAsyncExecutors() const; - virtual ov::PartialShape getInputShape(const std::string& inputName) const override; - virtual ov::PartialShape getOutputShape(const std::string& outputName) const override; - virtual ov::element::Type_t getInputDatatype(const std::string& inputName) const override; - virtual ov::element::Type_t getOutputDatatype(const std::string& outputName) const override; - virtual std::vector getInputNames() const override; - virtual std::vector getOutputNames() const override; - virtual const ov::AnyMap& getModelConfig() const override; - -protected: - void initInputsOutputs(); - -protected: - // Depends on the implementation details but we should share the model state in this class - std::vector inputNames; - std::vector outputNames; - ov::CompiledModel compiledModel; - std::unique_ptr asyncQueue; - ov::AnyMap modelConfig; // the content of model_info section of rt_info -}; diff --git a/src/cpp/adapters/src/openvino_adapter.cpp b/src/cpp/adapters/src/openvino_adapter.cpp deleted file mode 100644 index 087b6ba7..00000000 --- a/src/cpp/adapters/src/openvino_adapter.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "adapters/openvino_adapter.h" - -#include -#include -#include -#include - -void OpenVINOInferenceAdapter::loadModel(const std::shared_ptr& model, - ov::Core& core, - const std::string& device, - const ov::AnyMap& compilationConfig, - size_t max_num_requests) { - slog::info << "Loading model to the plugin" << slog::endl; - ov::AnyMap customCompilationConfig(compilationConfig); - if (max_num_requests != 1) { - if (customCompilationConfig.find("PERFORMANCE_HINT") == customCompilationConfig.end()) { - customCompilationConfig["PERFORMANCE_HINT"] = ov::hint::PerformanceMode::THROUGHPUT; - } - if (max_num_requests > 0) { - if (customCompilationConfig.find("PERFORMANCE_HINT_NUM_REQUESTS") == customCompilationConfig.end()) { - customCompilationConfig["PERFORMANCE_HINT_NUM_REQUESTS"] = ov::hint::num_requests(max_num_requests); - } - } - } else { - if (customCompilationConfig.find("PERFORMANCE_HINT") == customCompilationConfig.end()) { - customCompilationConfig["PERFORMANCE_HINT"] = ov::hint::PerformanceMode::LATENCY; - } - } - - compiledModel = core.compile_model(model, device, customCompilationConfig); - asyncQueue = std::make_unique(compiledModel, max_num_requests); - initInputsOutputs(); - - if (model->has_rt_info({"model_info"})) { - modelConfig = model->get_rt_info("model_info"); - } -} - -void OpenVINOInferenceAdapter::infer(const InferenceInput& input, InferenceOutput& output) { - auto request = asyncQueue->operator[](asyncQueue->get_idle_request_id()); - for (const auto& [name, tensor] : input) { - request.set_tensor(name, tensor); - } - for (const auto& [name, tensor] : output) { - request.set_tensor(name, tensor); - } - request.infer(); - for (const auto& name : outputNames) { - output[name] = request.get_tensor(name); - } -} - -InferenceOutput OpenVINOInferenceAdapter::infer(const InferenceInput& input) { - auto request = asyncQueue->operator[](asyncQueue->get_idle_request_id()); - // Fill input blobs - for (const auto& item : input) { - request.set_tensor(item.first, item.second); - } - - // Do inference - request.infer(); - - // Processing output blobs - InferenceOutput output; - for (const auto& item : outputNames) { - output.emplace(item, request.get_tensor(item)); - } - - return output; -} - -void OpenVINOInferenceAdapter::inferAsync(const InferenceInput& input, CallbackData callback_args) { - asyncQueue->start_async(input, callback_args); -} - -void OpenVINOInferenceAdapter::setCallback(std::function callback) { - asyncQueue->set_custom_callbacks(callback); -} - -bool OpenVINOInferenceAdapter::isReady() { - return asyncQueue->is_ready(); -} - -void OpenVINOInferenceAdapter::awaitAll() { - asyncQueue->wait_all(); -} - -void OpenVINOInferenceAdapter::awaitAny() { - asyncQueue->get_idle_request_id(); -} - -size_t OpenVINOInferenceAdapter::getNumAsyncExecutors() const { - return asyncQueue->size(); -} - -ov::PartialShape OpenVINOInferenceAdapter::getInputShape(const std::string& inputName) const { - return compiledModel.input(inputName).get_partial_shape(); -} -ov::PartialShape OpenVINOInferenceAdapter::getOutputShape(const std::string& outputName) const { - return compiledModel.output(outputName).get_partial_shape(); -} - -void OpenVINOInferenceAdapter::initInputsOutputs() { - for (const auto& input : compiledModel.inputs()) { - inputNames.push_back(input.get_any_name()); - } - - for (const auto& output : compiledModel.outputs()) { - outputNames.push_back(output.get_any_name()); - } -} -ov::element::Type_t OpenVINOInferenceAdapter::getInputDatatype(const std::string&) const { - throw std::runtime_error("Not implemented"); -} -ov::element::Type_t OpenVINOInferenceAdapter::getOutputDatatype(const std::string&) const { - throw std::runtime_error("Not implemented"); -} - -std::vector OpenVINOInferenceAdapter::getInputNames() const { - return inputNames; -} - -std::vector OpenVINOInferenceAdapter::getOutputNames() const { - return outputNames; -} - -const ov::AnyMap& OpenVINOInferenceAdapter::getModelConfig() const { - return modelConfig; -} diff --git a/src/cpp/cmake/model_apiConfig.cmake b/src/cpp/cmake/model_apiConfig.cmake deleted file mode 100644 index 570679ce..00000000 --- a/src/cpp/cmake/model_apiConfig.cmake +++ /dev/null @@ -1,7 +0,0 @@ -include(CMakeFindDependencyMacro) -find_dependency(OpenCV COMPONENTS core imgproc) -find_dependency(OpenVINO COMPONENTS Runtime) - -include("${CMAKE_CURRENT_LIST_DIR}/model_apiTargets.cmake") - -check_required_components() diff --git a/src/cpp/install_dependencies.sh b/src/cpp/install_dependencies.sh deleted file mode 100755 index caac2523..00000000 --- a/src/cpp/install_dependencies.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# Added required keys / do the update -wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - -apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - -echo "deb https://apt.repos.intel.com/openvino/2025 ubuntu22 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2025.list - -apt update - -#Install OpenCV -apt-get install libopencv-dev - -# Install OpenVINO -sudo apt install openvino-2025.0.0 diff --git a/src/cpp/models/include/models/anomaly_model.h b/src/cpp/models/include/models/anomaly_model.h deleted file mode 100644 index 1cc5be22..00000000 --- a/src/cpp/models/include/models/anomaly_model.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include "models/base_model.h" - -namespace ov { -class Model; -} // namespace ov -struct AnomalyResult; -struct ImageInputData; - -class AnomalyModel : public BaseModel { -public: - AnomalyModel(std::shared_ptr& model, const ov::AnyMap& configuration); - AnomalyModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - static std::unique_ptr create_model(const std::string& modelFile, - const ov::AnyMap& configuration = {}, - bool preload = true, - const std::string& device = "AUTO"); - static std::unique_ptr create_model(std::shared_ptr& adapter); - - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); - std::unique_ptr postprocess(InferenceResult& infResult) override; - - friend std::ostream& operator<<(std::ostream& os, std::unique_ptr& model); - - static std::string ModelType; - -protected: - float imageThreshold{0.5f}; - float pixelThreshold{0.5f}; - float normalizationScale{1.0f}; - std::string task = "segmentation"; - - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); - - void prepareInputsOutputs(std::shared_ptr& model) override; - void updateModelInfo() override; - cv::Mat normalize(cv::Mat& tensor, float threshold); - double normalize(double& tensor, float threshold); - std::vector getBoxes(cv::Mat& mask); -}; diff --git a/src/cpp/models/include/models/base_model.h b/src/cpp/models/include/models/base_model.h deleted file mode 100644 index 85131805..00000000 --- a/src/cpp/models/include/models/base_model.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -#include -#include - -#include "adapters/inference_adapter.h" -#include "models/input_data.h" -#include "models/results.h" -#include "utils/args_helper.hpp" -#include "utils/image_utils.h" -#include "utils/ocv_common.hpp" - -namespace ov { -class InferRequest; -} // namespace ov -struct InputData; -struct InternalModelData; - -// ImageModel implements preprocess(), ImageModel's direct or indirect children are expected to implement prostprocess() -class BaseModel { -public: - /// Constructor - /// @param modelFile name of model to load - /// @param useAutoResize - if true, image is resized by openvino - /// @param layout - model input layout - BaseModel(const std::string& modelFile, - const std::string& resize_type, - bool useAutoResize, - const std::string& layout = ""); - - BaseModel(std::shared_ptr& model, const ov::AnyMap& configuration); - BaseModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - virtual std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input); - virtual std::unique_ptr postprocess(InferenceResult& infResult) = 0; - - void load(ov::Core& core, const std::string& device, size_t num_infer_requests = 1); - - std::shared_ptr prepare(); - - virtual size_t getNumAsyncExecutors() const; - virtual bool isReady(); - virtual void awaitAll(); - virtual void awaitAny(); - virtual void setCallback( - std::function, const ov::AnyMap& callback_args)> callback); - - std::shared_ptr getModel(); - std::shared_ptr getInferenceAdapter(); - - static std::vector loadLabels(const std::string& labelFilename); - std::shared_ptr embedProcessing(std::shared_ptr& model, - const std::string& inputName, - const ov::Layout&, - RESIZE_MODE resize_mode, - const cv::InterpolationFlags interpolationMode, - const ov::Shape& targetShape, - uint8_t pad_value, - bool brg2rgb, - const std::vector& mean, - const std::vector& scale, - const std::type_info& dtype = typeid(int)); - virtual void inferAsync(const ImageInputData& inputData, const ov::AnyMap& callback_args = {}); - std::unique_ptr inferImage(const ImageInputData& inputData); - std::vector> inferBatchImage(const std::vector& inputData); - -protected: - RESIZE_MODE selectResizeMode(const std::string& resize_type); - virtual void updateModelInfo(); - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); - - std::string getLabelName(size_t labelID) { - return labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID); - } - - std::vector labels = {}; - bool useAutoResize = false; - bool embedded_processing = false; // flag in model_info that pre/postprocessing embedded - - size_t netInputHeight = 0; - size_t netInputWidth = 0; - cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR; - RESIZE_MODE resizeMode = RESIZE_FILL; - uint8_t pad_value = 0; - bool reverse_input_channels = false; - std::vector scale_values; - std::vector mean_values; - -protected: - virtual void prepareInputsOutputs(std::shared_ptr& model) = 0; - - InputTransform inputTransform = InputTransform(); - - std::shared_ptr model; - std::vector inputNames; - std::vector outputNames; - std::string modelFile; - std::shared_ptr inferenceAdapter; - std::map inputsLayouts; - ov::Layout getInputLayout(const ov::Output& input); - std::function, const ov::AnyMap&)> lastCallback; -}; diff --git a/src/cpp/models/include/models/classification_model.h b/src/cpp/models/include/models/classification_model.h deleted file mode 100644 index 88ac03bc..00000000 --- a/src/cpp/models/include/models/classification_model.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -#include -#include -#include -#include -#include - -#include "models/base_model.h" - -namespace ov { -class Model; -} // namespace ov -struct InferenceResult; -struct ClassificationResult; -struct ResultBase; -struct ImageInputData; - -struct HierarchicalConfig { - std::map label_to_idx; - std::vector> label_tree_edges; - std::vector> all_groups; - std::map> head_idx_to_logits_range; - std::map logit_idx_to_label; - size_t num_multiclass_heads; - size_t num_multilabel_heads; - size_t num_single_label_classes; - - HierarchicalConfig() = default; - HierarchicalConfig(const std::string&); -}; - -class SimpleLabelsGraph { -public: - SimpleLabelsGraph() = default; - SimpleLabelsGraph(const std::vector& vertices_); - void add_edge(const std::string& parent, const std::string& child); - std::vector get_children(const std::string& label) const; - std::string get_parent(const std::string& label) const; - std::vector get_ancestors(const std::string& label) const; - std::vector get_labels_in_topological_order(); - -protected: - std::vector vertices; - std::unordered_map> adj; - std::unordered_map parents_map; - bool t_sort_cache_valid = false; - std::vector topological_order_cache; - - std::vector topological_sort(); -}; - -class GreedyLabelsResolver { -public: - GreedyLabelsResolver() = default; - GreedyLabelsResolver(const HierarchicalConfig&); - - virtual std::map resolve_labels(const std::vector>& labels, - const std::vector& scores); - -protected: - std::map label_to_idx; - std::vector> label_relations; - std::vector> label_groups; - - std::string get_parent(const std::string& label); - std::vector get_predecessors(const std::string& label, const std::vector& candidates); -}; - -class ProbabilisticLabelsResolver : public GreedyLabelsResolver { -public: - ProbabilisticLabelsResolver() = default; - ProbabilisticLabelsResolver(const HierarchicalConfig&); - - virtual std::map resolve_labels(const std::vector>& labels, - const std::vector& scores); - std::unordered_map add_missing_ancestors(const std::unordered_map&) const; - std::map resolve_exclusive_labels(const std::unordered_map&) const; - void suppress_descendant_output(std::map&); - -protected: - SimpleLabelsGraph label_tree; -}; - -class ClassificationModel : public BaseModel { -public: - ClassificationModel(std::shared_ptr& model, const ov::AnyMap& configuration); - ClassificationModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - static std::unique_ptr create_model(const std::string& modelFile, - const ov::AnyMap& configuration = {}, - bool preload = true, - const std::string& device = "AUTO"); - static std::unique_ptr create_model(std::shared_ptr& adapter); - - std::unique_ptr postprocess(InferenceResult& infResult) override; - - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); - static std::string ModelType; - -protected: - size_t topk = 1; - bool multilabel = false; - bool hierarchical = false; - bool output_raw_scores = false; - float confidence_threshold = 0.5f; - std::string hierarchical_config; - std::string hierarchical_postproc = "greedy"; - HierarchicalConfig hierarchical_info; - std::unique_ptr resolver; - - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); - void prepareInputsOutputs(std::shared_ptr& model) override; - void updateModelInfo() override; - std::unique_ptr get_multilabel_predictions(InferenceResult& infResult, bool add_raw_scores); - std::unique_ptr get_multiclass_predictions(InferenceResult& infResult, bool add_raw_scores); - std::unique_ptr get_hierarchical_predictions(InferenceResult& infResult, bool add_raw_scores); - ov::Tensor reorder_saliency_maps(const ov::Tensor&); -}; diff --git a/src/cpp/models/include/models/detection_model.h b/src/cpp/models/include/models/detection_model.h deleted file mode 100644 index 16ba8cf8..00000000 --- a/src/cpp/models/include/models/detection_model.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include - -#include "models/base_model.h" - -struct DetectionResult; -struct ImageInputData; -struct InferenceAdatper; - -class DetectionModel : public BaseModel { -public: - DetectionModel(std::shared_ptr& model, const ov::AnyMap& configuration); - DetectionModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - static std::unique_ptr create_model(const std::string& modelFile, - const ov::AnyMap& configuration = {}, - std::string model_type = "", - bool preload = true, - const std::string& device = "AUTO"); - static std::unique_ptr create_model(std::shared_ptr& adapter); - - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); - -protected: - float confidence_threshold = 0.5f; - - void updateModelInfo() override; -}; diff --git a/src/cpp/models/include/models/detection_model_ext.h b/src/cpp/models/include/models/detection_model_ext.h deleted file mode 100644 index 8c6d46c7..00000000 --- a/src/cpp/models/include/models/detection_model_ext.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -#include -#include - -#include "models/detection_model.h" - -struct DetectionResult; -struct ImageInputData; -struct InferenceAdatper; - -class DetectionModelExt : public DetectionModel { -public: - DetectionModelExt(std::shared_ptr& model, const ov::AnyMap& configuration); - DetectionModelExt(std::shared_ptr& adapter); - -protected: - void updateModelInfo() override; - float iou_threshold = 0.5f; -}; diff --git a/src/cpp/models/include/models/detection_model_ssd.h b/src/cpp/models/include/models/detection_model_ssd.h deleted file mode 100644 index acb3060f..00000000 --- a/src/cpp/models/include/models/detection_model_ssd.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -#include -#include -#include - -#include "models/detection_model.h" - -namespace ov { -class InferRequest; -class Model; -} // namespace ov -struct InferenceResult; -struct InputData; -struct InternalModelData; -struct ResultBase; - -class ModelSSD : public DetectionModel { -public: - using DetectionModel::DetectionModel; - std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input) override; - std::unique_ptr postprocess(InferenceResult& infResult) override; - static std::string ModelType; - -protected: - std::unique_ptr postprocessSingleOutput(InferenceResult& infResult); - std::unique_ptr postprocessMultipleOutputs(InferenceResult& infResult); - void prepareInputsOutputs(std::shared_ptr& model) override; - void prepareSingleOutput(std::shared_ptr& model); - void prepareMultipleOutputs(std::shared_ptr& model); - void updateModelInfo() override; -}; diff --git a/src/cpp/models/include/models/detection_model_yolo.h b/src/cpp/models/include/models/detection_model_yolo.h deleted file mode 100644 index 56055588..00000000 --- a/src/cpp/models/include/models/detection_model_yolo.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "models/detection_model_ext.h" - -struct DetectedObject; -struct InferenceResult; -struct ResultBase; - -class ModelYolo : public DetectionModelExt { -protected: - class Region { - public: - int num = 0; - size_t classes = 0; - int coords = 0; - std::vector anchors; - size_t outputWidth = 0; - size_t outputHeight = 0; - - Region(const std::shared_ptr& regionYolo); - Region(size_t classes, - int coords, - const std::vector& anchors, - const std::vector& masks, - size_t outputWidth, - size_t outputHeight); - }; - -public: - enum class YoloVersion : size_t { YOLO_V1V2 = 0, YOLO_V3, YOLO_V4, YOLO_V4_TINY, YOLOF }; - - ModelYolo(std::shared_ptr& model, const ov::AnyMap& configuration); - ModelYolo(std::shared_ptr& adapter); - - std::unique_ptr postprocess(InferenceResult& infResult) override; - -protected: - void prepareInputsOutputs(std::shared_ptr& model) override; - - void parseYOLOOutput(const std::string& output_name, - const ov::Tensor& tensor, - const unsigned long resized_im_h, - const unsigned long resized_im_w, - const unsigned long original_im_h, - const unsigned long original_im_w, - std::vector& objects); - - static int calculateEntryIndex(int entriesNum, int lcoords, size_t lclasses, int location, int entry); - static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2); - - std::map regions; - float iou_threshold; - bool useAdvancedPostprocessing = true; - bool isObjConf = 1; - YoloVersion yoloVersion = YoloVersion::YOLO_V3; - std::vector presetAnchors; - std::vector presetMasks; - ov::Layout yoloRegionLayout = "NCHW"; -}; - -class YOLOv5 : public DetectionModelExt { - // Reimplementation of ultralytics.YOLO - void prepareInputsOutputs(std::shared_ptr& model) override; - void updateModelInfo() override; - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); - bool agnostic_nms = false; - -public: - YOLOv5(std::shared_ptr& model, const ov::AnyMap& configuration); - YOLOv5(std::shared_ptr& adapter); - std::unique_ptr postprocess(InferenceResult& infResult) override; - static std::string ModelType; -}; - -class YOLOv8 : public YOLOv5 { -public: - // YOLOv5 and YOLOv8 are identical in terms of inference - YOLOv8(std::shared_ptr& model, const ov::AnyMap& configuration) : YOLOv5{model, configuration} {} - YOLOv8(std::shared_ptr& adapter) : YOLOv5{adapter} {} - static std::string ModelType; -}; diff --git a/src/cpp/models/include/models/detection_model_yolov3_onnx.h b/src/cpp/models/include/models/detection_model_yolov3_onnx.h deleted file mode 100644 index 9dead24d..00000000 --- a/src/cpp/models/include/models/detection_model_yolov3_onnx.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include - -#include "models/detection_model.h" - -class ModelYoloV3ONNX : public DetectionModel { -public: - ModelYoloV3ONNX(std::shared_ptr& model, const ov::AnyMap& configuration); - ModelYoloV3ONNX(std::shared_ptr& adapter); - using DetectionModel::DetectionModel; - - std::unique_ptr postprocess(InferenceResult& infResult) override; - std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input) override; - -protected: - void prepareInputsOutputs(std::shared_ptr& model) override; - void initDefaultParameters(const ov::AnyMap& configuration); - - std::string boxesOutputName; - std::string scoresOutputName; - std::string indicesOutputName; - static const int numberOfClasses = 80; -}; diff --git a/src/cpp/models/include/models/detection_model_yolox.h b/src/cpp/models/include/models/detection_model_yolox.h deleted file mode 100644 index bc747ee5..00000000 --- a/src/cpp/models/include/models/detection_model_yolox.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include -#include - -#include "models/detection_model_ext.h" - -class ModelYoloX : public DetectionModelExt { -public: - ModelYoloX(std::shared_ptr& model, const ov::AnyMap& configuration); - ModelYoloX(std::shared_ptr& adapter); - using DetectionModelExt::DetectionModelExt; - - std::unique_ptr postprocess(InferenceResult& infResult) override; - std::shared_ptr preprocess(const InputData& inputData, InferenceInput& input) override; - static std::string ModelType; - -protected: - void prepareInputsOutputs(std::shared_ptr& model) override; - void setStridesGrids(); - void initDefaultParameters(const ov::AnyMap& configuration); - void updateModelInfo() override; - - float iou_threshold; - std::vector> grids; - std::vector expandedStrides; - static const size_t numberOfClasses = 80; -}; diff --git a/src/cpp/models/include/models/input_data.h b/src/cpp/models/include/models/input_data.h deleted file mode 100644 index 6d02e287..00000000 --- a/src/cpp/models/include/models/input_data.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -struct InputData { - virtual ~InputData() {} - - template - T& asRef() { - return dynamic_cast(*this); - } - - template - const T& asRef() const { - return dynamic_cast(*this); - } -}; - -struct ImageInputData : public InputData { - cv::Mat inputImage; - - ImageInputData() {} - ImageInputData(const cv::Mat& img) { - inputImage = img; - } -}; diff --git a/src/cpp/models/include/models/instance_segmentation.h b/src/cpp/models/include/models/instance_segmentation.h deleted file mode 100644 index c6cadce7..00000000 --- a/src/cpp/models/include/models/instance_segmentation.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include - -#include "models/base_model.h" - -namespace ov { -class Model; -} // namespace ov -struct InferenceResult; -struct ResultBase; -struct InstanceSegmentationResult; -struct ImageInputData; -struct SegmentedObject; - -class MaskRCNNModel : public BaseModel { -public: - MaskRCNNModel(std::shared_ptr& model, const ov::AnyMap& configuration); - MaskRCNNModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - static std::unique_ptr create_model(const std::string& modelFile, - const ov::AnyMap& configuration = {}, - bool preload = true, - const std::string& device = "AUTO"); - static std::unique_ptr create_model(std::shared_ptr& adapter); - - std::unique_ptr postprocess(InferenceResult& infResult) override; - - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch( - const std::vector& inputImgs); - static std::string ModelType; - bool postprocess_semantic_masks = true; - -protected: - void prepareInputsOutputs(std::shared_ptr& model) override; - void updateModelInfo() override; - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); - std::string getLabelName(size_t labelID) { - return labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID); - } - - float confidence_threshold = 0.5f; -}; - -cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, int im_h, int im_w); diff --git a/src/cpp/models/include/models/internal_model_data.h b/src/cpp/models/include/models/internal_model_data.h deleted file mode 100644 index 2f1b84f7..00000000 --- a/src/cpp/models/include/models/internal_model_data.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -struct InternalModelData { - virtual ~InternalModelData() {} - - template - T& asRef() { - return dynamic_cast(*this); - } - - template - const T& asRef() const { - return dynamic_cast(*this); - } -}; - -struct InternalImageModelData : public InternalModelData { - InternalImageModelData(int width, int height) : inputImgWidth(width), inputImgHeight(height) {} - - int inputImgWidth; - int inputImgHeight; -}; - -struct InternalScaleData : public InternalImageModelData { - InternalScaleData(int width, int height, float scaleX, float scaleY) - : InternalImageModelData(width, height), - scaleX(scaleX), - scaleY(scaleY) {} - - float scaleX; - float scaleY; -}; diff --git a/src/cpp/models/include/models/keypoint_detection.h b/src/cpp/models/include/models/keypoint_detection.h deleted file mode 100644 index 15d21cba..00000000 --- a/src/cpp/models/include/models/keypoint_detection.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2020-2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include - -#include "models/base_model.h" - -namespace ov { -class Model; -} // namespace ov -struct InferenceResult; -struct ResultBase; -struct KeypointDetectionResult; -struct ImageInputData; - -class KeypointDetectionModel : public BaseModel { -public: - KeypointDetectionModel(std::shared_ptr& model, const ov::AnyMap& configuration); - KeypointDetectionModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - static std::unique_ptr create_model(const std::string& modelFile, - const ov::AnyMap& configuration = {}, - bool preload = true, - const std::string& device = "AUTO"); - static std::unique_ptr create_model(std::shared_ptr& adapter); - - std::unique_ptr postprocess(InferenceResult& infResult) override; - - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch( - const std::vector& inputImgs); - - static std::string ModelType; - -protected: - bool apply_softmax = true; - - void prepareInputsOutputs(std::shared_ptr& model) override; - void updateModelInfo() override; - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); -}; diff --git a/src/cpp/models/include/models/results.h b/src/cpp/models/include/models/results.h deleted file mode 100644 index 1a648723..00000000 --- a/src/cpp/models/include/models/results.h +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include - -#include "internal_model_data.h" - -struct MetaData; -struct ResultBase { - ResultBase(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : frameId(frameId), - metaData(metaData) {} - virtual ~ResultBase() {} - - int64_t frameId; - - std::shared_ptr metaData; - bool IsEmpty() { - return frameId < 0; - } - - template - T& asRef() { - return dynamic_cast(*this); - } - - template - const T& asRef() const { - return dynamic_cast(*this); - } -}; - -struct AnomalyResult : public ResultBase { - AnomalyResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - cv::Mat anomaly_map; - std::vector pred_boxes; - std::string pred_label; - cv::Mat pred_mask; - double pred_score; - - friend std::ostream& operator<<(std::ostream& os, const AnomalyResult& prediction) { - double min_anomaly_map, max_anomaly_map; - cv::minMaxLoc(prediction.anomaly_map, &min_anomaly_map, &max_anomaly_map); - double min_pred_mask, max_pred_mask; - cv::minMaxLoc(prediction.pred_mask, &min_pred_mask, &max_pred_mask); - os << "anomaly_map min:" << min_anomaly_map << " max:" << max_anomaly_map << ";"; - os << "pred_score:" << std::fixed << std::setprecision(1) << prediction.pred_score << ";"; - os << "pred_label:" << prediction.pred_label << ";"; - os << std::fixed << std::setprecision(0) << "pred_mask min:" << min_pred_mask << " max:" << max_pred_mask - << ";"; - - if (!prediction.pred_boxes.empty()) { - os << "pred_boxes:"; - for (const cv::Rect& box : prediction.pred_boxes) { - os << box << ","; - } - } - - return os; - } - explicit operator std::string() { - std::stringstream ss; - ss << *this; - return ss.str(); - } -}; - -struct InferenceResult : public ResultBase { - std::shared_ptr internalModelData; - std::map outputsData; - - /// Returns the first output tensor - /// This function is a useful addition to direct access to outputs list as many models have only one output - /// @returns first output tensor - ov::Tensor getFirstOutputTensor() { - if (outputsData.empty()) { - throw std::out_of_range("Outputs map is empty."); - } - return outputsData.begin()->second; - } - - /// Returns true if object contains no valid data - /// @returns true if object contains no valid data - bool IsEmpty() { - return outputsData.empty(); - } -}; - -struct ClassificationResult : public ResultBase { - ClassificationResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - - friend std::ostream& operator<<(std::ostream& os, const ClassificationResult& prediction) { - for (const ClassificationResult::Classification& classification : prediction.topLabels) { - os << classification << ", "; - } - try { - os << prediction.saliency_map.get_shape() << ", "; - } catch (ov::Exception&) { - os << "[0], "; - } - try { - os << prediction.feature_vector.get_shape() << ", "; - } catch (ov::Exception&) { - os << "[0], "; - } - try { - os << prediction.raw_scores.get_shape(); - } catch (ov::Exception&) { - os << "[0]"; - } - return os; - } - - explicit operator std::string() { - std::stringstream ss; - ss << *this; - return ss.str(); - } - - struct Classification { - size_t id; - std::string label; - float score; - - Classification(size_t id, const std::string& label, float score) : id(id), label(label), score(score) {} - - friend std::ostream& operator<<(std::ostream& os, const Classification& prediction) { - return os << prediction.id << " (" << prediction.label << "): " << std::fixed << std::setprecision(3) - << prediction.score; - } - }; - - std::vector topLabels; - ov::Tensor saliency_map, feature_vector, - raw_scores; // Contains "raw_scores", "saliency_map" and "feature_vector" model outputs if such exist -}; - -struct DetectedObject : public cv::Rect2f { - size_t labelID; - std::string label; - float confidence; - - friend std::ostream& operator<<(std::ostream& os, const DetectedObject& detection) { - return os << int(detection.x) << ", " << int(detection.y) << ", " << int(detection.x + detection.width) << ", " - << int(detection.y + detection.height) << ", " << detection.labelID << " (" << detection.label - << "): " << std::fixed << std::setprecision(3) << detection.confidence; - } -}; - -struct DetectionResult : public ResultBase { - DetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - std::vector objects; - ov::Tensor saliency_map, feature_vector; // Contan "saliency_map" and "feature_vector" model outputs if such exist - - friend std::ostream& operator<<(std::ostream& os, const DetectionResult& prediction) { - for (const DetectedObject& obj : prediction.objects) { - os << obj << "; "; - } - try { - os << prediction.saliency_map.get_shape() << "; "; - } catch (ov::Exception&) { - os << "[0]; "; - } - try { - os << prediction.feature_vector.get_shape(); - } catch (ov::Exception&) { - os << "[0]"; - } - return os; - } - - explicit operator std::string() { - std::stringstream ss; - ss << *this; - return ss.str(); - } -}; - -struct RetinaFaceDetectionResult : public DetectionResult { - RetinaFaceDetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : DetectionResult(frameId, metaData) {} - std::vector landmarks; -}; - -struct SegmentedObject : DetectedObject { - cv::Mat mask; - - friend std::ostream& operator<<(std::ostream& os, const SegmentedObject& prediction) { - return os << static_cast(prediction) << ", " << cv::countNonZero(prediction.mask > 0.5); - } -}; - -struct SegmentedObjectWithRects : SegmentedObject { - cv::RotatedRect rotated_rect; - - SegmentedObjectWithRects(const SegmentedObject& segmented_object) : SegmentedObject(segmented_object) {} - - friend std::ostream& operator<<(std::ostream& os, const SegmentedObjectWithRects& prediction) { - os << static_cast(prediction) << std::fixed << std::setprecision(3); - auto rect = prediction.rotated_rect; - os << ", RotatedRect: " << rect.center.x << ' ' << rect.center.y << ' ' << rect.size.width << ' ' - << rect.size.height << ' ' << rect.angle; - return os; - } -}; - -static inline std::vector add_rotated_rects(std::vector segmented_objects) { - std::vector objects_with_rects; - objects_with_rects.reserve(segmented_objects.size()); - for (const SegmentedObject& segmented_object : segmented_objects) { - objects_with_rects.push_back(SegmentedObjectWithRects{segmented_object}); - cv::Mat mask; - segmented_object.mask.convertTo(mask, CV_8UC1); - std::vector> contours; - cv::findContours(mask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); - - std::vector contour = {}; - for (size_t i = 0; i < contours.size(); i++) { - contour.insert(contour.end(), contours[i].begin(), contours[i].end()); - } - if (contour.size() > 0) { - std::vector hull; - cv::convexHull(contour, hull); - objects_with_rects.back().rotated_rect = cv::minAreaRect(hull); - } - } - return objects_with_rects; -} - -struct InstanceSegmentationResult : ResultBase { - InstanceSegmentationResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - std::vector segmentedObjects; - // Contains per class saliency_maps and "feature_vector" model output if feature_vector exists - std::vector> saliency_map; - ov::Tensor feature_vector; -}; - -struct ImageResult : public ResultBase { - ImageResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - cv::Mat resultImage; - friend std::ostream& operator<<(std::ostream& os, const ImageResult& prediction) { - cv::Mat predicted_mask[] = {prediction.resultImage}; - int nimages = 1; - int* channels = nullptr; - cv::Mat mask; - cv::Mat outHist; - int dims = 1; - int histSize[] = {256}; - float range[] = {0, 256}; - const float* ranges[] = {range}; - cv::calcHist(predicted_mask, nimages, channels, mask, outHist, dims, histSize, ranges); - - os << std::fixed << std::setprecision(3); - for (int i = 0; i < range[1]; ++i) { - const float count = outHist.at(i); - if (count > 0) { - os << i << ": " << count / prediction.resultImage.total() << ", "; - } - } - return os; - } - explicit operator std::string() { - std::stringstream ss; - ss << *this; - return ss.str(); - } -}; - -struct ImageResultWithSoftPrediction : public ImageResult { - ImageResultWithSoftPrediction(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ImageResult(frameId, metaData) {} - cv::Mat soft_prediction; - // Contain per class saliency_maps and "feature_vector" model output if feature_vector exists - cv::Mat saliency_map; // Requires return_soft_prediction==true - ov::Tensor feature_vector; - friend std::ostream& operator<<(std::ostream& os, const ImageResultWithSoftPrediction& prediction) { - os << static_cast(prediction) << '['; - for (int i = 0; i < prediction.soft_prediction.dims; ++i) { - os << prediction.soft_prediction.size[i] << ','; - } - os << prediction.soft_prediction.channels() << "], ["; - if (prediction.saliency_map.data) { - for (int i = 0; i < prediction.saliency_map.dims; ++i) { - os << prediction.saliency_map.size[i] << ','; - } - os << prediction.saliency_map.channels() << "], "; - } else { - os << "0], "; - } - try { - os << prediction.feature_vector.get_shape(); - } catch (ov::Exception&) { - os << "[0]"; - } - return os; - } -}; - -struct Contour { - std::string label; - float probability; - std::vector shape; - - friend std::ostream& operator<<(std::ostream& os, const Contour& contour) { - return os << contour.label << ": " << std::fixed << std::setprecision(3) << contour.probability << ", " - << contour.shape.size(); - } -}; - -static inline std::vector getContours(const std::vector& segmentedObjects) { - std::vector combined_contours; - std::vector> contours; - for (const SegmentedObject& obj : segmentedObjects) { - cv::findContours(obj.mask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_NONE); - // Assuming one contour output for findContours. Based on OTX this is a safe - // assumption - if (contours.size() != 1) { - throw std::runtime_error("findContours() must have returned only one contour"); - } - combined_contours.push_back({obj.label, obj.confidence, contours[0]}); - } - return combined_contours; -} - -struct HumanPose { - std::vector keypoints; - float score; -}; - -struct HumanPoseResult : public ResultBase { - HumanPoseResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - std::vector poses; -}; - -struct DetectedKeypoints { - std::vector keypoints; - std::vector scores; - - friend std::ostream& operator<<(std::ostream& os, const DetectedKeypoints& prediction) { - float kp_x_sum = 0.f; - for (const cv::Point2f& keypoint : prediction.keypoints) { - kp_x_sum += keypoint.x; - } - float scores_sum = std::accumulate(prediction.scores.begin(), prediction.scores.end(), 0.f); - - os << "keypoints: (" << prediction.keypoints.size() << ", 2), keypoints_x_sum: "; - os << std::fixed << std::setprecision(3) << kp_x_sum << ", scores: (" << prediction.scores.size() << ",) " - << std::fixed << std::setprecision(3) << scores_sum; - return os; - } - - explicit operator std::string() { - std::stringstream ss; - ss << *this; - return ss.str(); - } -}; - -struct KeypointDetectionResult : public ResultBase { - KeypointDetectionResult(int64_t frameId = -1, const std::shared_ptr& metaData = nullptr) - : ResultBase(frameId, metaData) {} - std::vector poses; -}; diff --git a/src/cpp/models/include/models/segmentation_model.h b/src/cpp/models/include/models/segmentation_model.h deleted file mode 100644 index 922828f7..00000000 --- a/src/cpp/models/include/models/segmentation_model.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include -#include -#include - -#include "models/base_model.h" - -namespace ov { -class Model; -} // namespace ov -struct InferenceResult; -struct ResultBase; -struct ImageResult; -struct ImageResultWithSoftPrediction; -struct ImageInputData; -struct Contour; - -class SegmentationModel : public BaseModel { -public: - SegmentationModel(std::shared_ptr& model, const ov::AnyMap& configuration); - SegmentationModel(std::shared_ptr& adapter, const ov::AnyMap& configuration = {}); - - static std::unique_ptr create_model(const std::string& modelFile, - const ov::AnyMap& configuration = {}, - bool preload = true, - const std::string& device = "AUTO"); - static std::unique_ptr create_model(std::shared_ptr& adapter); - - std::unique_ptr postprocess(InferenceResult& infResult) override; - - virtual std::unique_ptr infer(const ImageInputData& inputData); - virtual std::vector> inferBatch(const std::vector& inputImgs); - - static std::string ModelType; - std::vector getContours(const ImageResultWithSoftPrediction& imageResult); - -protected: - void prepareInputsOutputs(std::shared_ptr& model) override; - void updateModelInfo() override; - void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority); - - int blur_strength = -1; - float soft_threshold = -std::numeric_limits::infinity(); - bool return_soft_prediction = true; -}; - -cv::Mat create_hard_prediction_from_soft_prediction(const cv::Mat& soft_prediction, - float soft_threshold, - int blur_strength); diff --git a/src/cpp/models/src/anomaly_model.cpp b/src/cpp/models/src/anomaly_model.cpp deleted file mode 100644 index eeccf08a..00000000 --- a/src/cpp/models/src/anomaly_model.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/anomaly_model.h" - -#include -#include -#include -#include - -#include "models/base_model.h" -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/slog.hpp" - -std::string AnomalyModel::ModelType = "AnomalyDetection"; - -/// @brief Initializes the model from the given configuration -/// @param top_priority Uses this as the primary source for setting the parameters -/// @param mid_priority Fallback source for setting the parameters -void AnomalyModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - imageThreshold = get_from_any_maps("image_threshold", top_priority, mid_priority, imageThreshold); - pixelThreshold = get_from_any_maps("pixel_threshold", top_priority, mid_priority, pixelThreshold); - normalizationScale = get_from_any_maps("normalization_scale", top_priority, mid_priority, normalizationScale); - task = get_from_any_maps("task", top_priority, mid_priority, task); -} - -AnomalyModel::AnomalyModel(std::shared_ptr& model, const ov::AnyMap& configuration) - : BaseModel(model, configuration) { - init_from_config(configuration, model->get_rt_info("model_info")); -} - -AnomalyModel::AnomalyModel(std::shared_ptr& adapter, const ov::AnyMap& configuration) - : BaseModel(adapter, configuration) { - init_from_config(configuration, adapter->getModelConfig()); -} - -std::unique_ptr AnomalyModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - - return std::unique_ptr(static_cast(result.release())); -} - -std::vector> AnomalyModel::inferBatch(const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> anoResults; - anoResults.reserve(results.size()); - for (auto& result : results) { - anoResults.emplace_back(static_cast(result.release())); - } - return anoResults; -} - -std::unique_ptr AnomalyModel::postprocess(InferenceResult& infResult) { - ov::Tensor predictions = infResult.outputsData[outputNames[0]]; - const auto& inputImgSize = infResult.internalModelData->asRef(); - - double pred_score; - std::string pred_label; - cv::Mat anomaly_map; - cv::Mat pred_mask; - std::vector pred_boxes; - if (predictions.get_shape().size() == 1) { - pred_score = predictions.data()[0]; - } else { - const ov::Layout& layout = getLayoutFromShape(predictions.get_shape()); - const ov::Shape& predictionsShape = predictions.get_shape(); - anomaly_map = cv::Mat(static_cast(predictionsShape[ov::layout::height_idx(layout)]), - static_cast(predictionsShape[ov::layout::width_idx(layout)]), - CV_32FC1, - predictions.data()); - // find the max predicted score - cv::minMaxLoc(anomaly_map, NULL, &pred_score); - } - pred_label = labels[pred_score > imageThreshold ? 1 : 0]; - - pred_mask = anomaly_map >= pixelThreshold; - pred_mask.convertTo(pred_mask, CV_8UC1, 1 / 255.); - cv::resize(pred_mask, pred_mask, cv::Size{inputImgSize.inputImgWidth, inputImgSize.inputImgHeight}); - anomaly_map = normalize(anomaly_map, pixelThreshold); - anomaly_map.convertTo(anomaly_map, CV_8UC1, 255); - - pred_score = normalize(pred_score, imageThreshold); - if (pred_label == labels[0]) { // normal label - pred_score = 1 - pred_score; // Score of normal is 1 - score of anomaly - } - - if (!anomaly_map.empty()) { - cv::resize(anomaly_map, anomaly_map, cv::Size{inputImgSize.inputImgWidth, inputImgSize.inputImgHeight}); - } - if (task == "detection") { - pred_boxes = getBoxes(pred_mask); - } - - AnomalyResult* result = new AnomalyResult(infResult.frameId, infResult.metaData); - result->anomaly_map = std::move(anomaly_map); - result->pred_score = pred_score; - result->pred_label = std::move(pred_label); - result->pred_mask = std::move(pred_mask); - result->pred_boxes = std::move(pred_boxes); - return std::unique_ptr(result); -} - -cv::Mat AnomalyModel::normalize(cv::Mat& tensor, float threshold) { - cv::Mat normalized = ((tensor - threshold) / normalizationScale) + 0.5f; - normalized = cv::min(cv::max(normalized, 0.f), 1.f); - return normalized; -} - -/// @brief Normalize the value to be in the range [0, 1]. Centered around 0.5 and scaled by the normalization scale. -/// @param value Unbounded value to be normalized. -/// @param threshold This is the value that is subtracted from the input value before normalization. -/// @return value between 0 and 1. -double AnomalyModel::normalize(double& value, float threshold) { - double normalized = ((value - threshold) / normalizationScale) + 0.5f; - return std::min(std::max(normalized, 0.), 1.); -} - -std::vector AnomalyModel::getBoxes(cv::Mat& mask) { - std::vector boxes; - std::vector> contours; - cv::findContours(mask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); - for (auto& contour : contours) { - std::vector box; - cv::Rect rect = cv::boundingRect(contour); - boxes.push_back(rect); - } - return boxes; -} - -std::unique_ptr AnomalyModel::create_model(const std::string& modelFile, - const ov::AnyMap& configuration, - bool preload, - const std::string& device) { - auto core = ov::Core(); - std::shared_ptr model = core.read_model(modelFile); - - std::unique_ptr anomalyModel{new AnomalyModel(model, configuration)}; - - anomalyModel->prepare(); - if (preload) { - anomalyModel->load(core, device); - } - return anomalyModel; -} - -std::unique_ptr AnomalyModel::create_model(std::shared_ptr& adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto model_type_iter = configuration.find("model_type"); - std::string model_type = AnomalyModel::ModelType; - if (model_type_iter != configuration.end()) { - model_type = model_type_iter->second.as(); - } - - if (model_type != AnomalyModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided: " + model_type); - } - std::unique_ptr anomalyModel{new AnomalyModel(adapter)}; - return anomalyModel; -} - -std::ostream& operator<<(std::ostream& os, std::unique_ptr& model) { - os << "AnomalyModel: " << model->task << ", Image threshold: " << model->imageThreshold - << ", Pixel threshold: " << model->pixelThreshold << ", Normalization scale: " << model->normalizationScale - << std::endl; - return os; -} - -void AnomalyModel::prepareInputsOutputs(std::shared_ptr& model) { - const auto& input = model->input(); - inputNames.push_back(input.get_any_name()); - - const ov::Shape& inputShape = input.get_partial_shape().get_max_shape(); - const ov::Layout& inputLayout = getInputLayout(input); - - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{inputShape[ov::layout::width_idx(inputLayout)], inputShape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - embedded_processing = true; - } - outputNames.push_back(model->output().get_any_name()); -} - -void AnomalyModel::updateModelInfo() { - BaseModel::updateModelInfo(); - - model->set_rt_info(AnomalyModel::ModelType, "model_info", "model_type"); - model->set_rt_info(task, "model_info", "task"); - model->set_rt_info(imageThreshold, "model_info", "image_threshold"); - model->set_rt_info(pixelThreshold, "model_info", "pixel_threshold"); - model->set_rt_info(normalizationScale, "model_info", "normalization_scale"); - model->set_rt_info(task, "model_info", "task"); -} diff --git a/src/cpp/models/src/base_model.cpp b/src/cpp/models/src/base_model.cpp deleted file mode 100644 index 6fd83d73..00000000 --- a/src/cpp/models/src/base_model.cpp +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/base_model.h" - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "adapters/openvino_adapter.h" -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/common.hpp" - -namespace { -class TmpCallbackSetter { -public: - BaseModel* model; - std::function, const ov::AnyMap&)> last_callback; - TmpCallbackSetter(BaseModel* model_, - std::function, const ov::AnyMap&)> tmp_callback, - std::function, const ov::AnyMap&)> last_callback_) - : model(model_), - last_callback(last_callback_) { - model->setCallback(tmp_callback); - } - ~TmpCallbackSetter() { - if (last_callback) { - model->setCallback(last_callback); - } else { - model->setCallback([](std::unique_ptr, const ov::AnyMap&) {}); - } - } -}; -} // namespace - -BaseModel::BaseModel(const std::string& modelFile, - const std::string& resize_type, - bool useAutoResize, - const std::string& layout) - : useAutoResize(useAutoResize), - resizeMode(selectResizeMode(resize_type)), - modelFile(modelFile), - inputsLayouts(parseLayoutString(layout)) { - auto core = ov::Core(); - model = core.read_model(modelFile); -} - -void BaseModel::load(ov::Core& core, const std::string& device, size_t num_infer_requests) { - if (!inferenceAdapter) { - inferenceAdapter = std::make_shared(); - } - - // Update model_info erased by pre/postprocessing - updateModelInfo(); - - inferenceAdapter->loadModel(model, core, device, {}, num_infer_requests); -} - -std::shared_ptr BaseModel::prepare() { - prepareInputsOutputs(model); - logBasicModelInfo(model); - ov::set_batch(model, 1); - - return model; -} - -ov::Layout BaseModel::getInputLayout(const ov::Output& input) { - ov::Layout layout = ov::layout::get_layout(input); - if (layout.empty()) { - if (inputsLayouts.empty()) { - layout = getLayoutFromShape(input.get_partial_shape()); - slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '" - << input.get_any_name() << "' will be used." << slog::endl; - } else if (inputsLayouts.size() == 1) { - layout = inputsLayouts.begin()->second; - } else { - layout = inputsLayouts[input.get_any_name()]; - } - } - - return layout; -} - -size_t BaseModel::getNumAsyncExecutors() const { - return inferenceAdapter->getNumAsyncExecutors(); -} - -bool BaseModel::isReady() { - return inferenceAdapter->isReady(); -} -void BaseModel::awaitAll() { - inferenceAdapter->awaitAll(); -} -void BaseModel::awaitAny() { - inferenceAdapter->awaitAny(); -} - -void BaseModel::setCallback( - std::function, const ov::AnyMap& callback_args)> callback) { - lastCallback = callback; - inferenceAdapter->setCallback([this, callback](ov::InferRequest request, CallbackData args) { - InferenceResult result; - - InferenceOutput output; - for (const auto& item : this->getInferenceAdapter()->getOutputNames()) { - output.emplace(item, request.get_tensor(item)); - } - - result.outputsData = output; - auto model_data_iter = args->find("internalModelData"); - if (model_data_iter != args->end()) { - result.internalModelData = std::move(model_data_iter->second.as>()); - } - auto retVal = this->postprocess(result); - *retVal = static_cast(result); - callback(std::move(retVal), args ? *args : ov::AnyMap()); - }); -} - -std::shared_ptr BaseModel::getModel() { - if (!model) { - throw std::runtime_error(std::string("ov::Model is not accessible for the current model adapter: ") + - typeid(inferenceAdapter).name()); - } - - updateModelInfo(); - return model; -} - -std::shared_ptr BaseModel::getInferenceAdapter() { - if (!inferenceAdapter) { - throw std::runtime_error(std::string("Model wasn't loaded")); - } - - return inferenceAdapter; -} - -RESIZE_MODE BaseModel::selectResizeMode(const std::string& resize_type) { - RESIZE_MODE resize = RESIZE_FILL; - if ("crop" == resize_type) { - resize = RESIZE_CROP; - } else if ("standard" == resize_type) { - resize = RESIZE_FILL; - } else if ("fit_to_window" == resize_type) { - resize = RESIZE_KEEP_ASPECT; - } else if ("fit_to_window_letterbox" == resize_type) { - resize = RESIZE_KEEP_ASPECT_LETTERBOX; - } else { - throw std::runtime_error("Unknown value for resize_type arg"); - } - - return resize; -} - -void BaseModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - useAutoResize = get_from_any_maps("auto_resize", top_priority, mid_priority, useAutoResize); - - std::string resize_type = "standard"; - resize_type = get_from_any_maps("resize_type", top_priority, mid_priority, resize_type); - resizeMode = selectResizeMode(resize_type); - - labels = get_from_any_maps("labels", top_priority, mid_priority, labels); - embedded_processing = get_from_any_maps("embedded_processing", top_priority, mid_priority, embedded_processing); - netInputWidth = get_from_any_maps("orig_width", top_priority, mid_priority, netInputWidth); - netInputHeight = get_from_any_maps("orig_height", top_priority, mid_priority, netInputHeight); - int pad_value_int = 0; - pad_value_int = get_from_any_maps("pad_value", top_priority, mid_priority, pad_value_int); - if (0 > pad_value_int || 255 < pad_value_int) { - throw std::runtime_error("pad_value must be in range [0, 255]"); - } - pad_value = static_cast(pad_value_int); - reverse_input_channels = - get_from_any_maps("reverse_input_channels", top_priority, mid_priority, reverse_input_channels); - scale_values = get_from_any_maps("scale_values", top_priority, mid_priority, scale_values); - mean_values = get_from_any_maps("mean_values", top_priority, mid_priority, mean_values); -} - -BaseModel::BaseModel(std::shared_ptr& model, const ov::AnyMap& configuration) : model(model) { - auto layout_iter = configuration.find("layout"); - std::string layout = ""; - - if (layout_iter != configuration.end()) { - layout = layout_iter->second.as(); - } else { - if (model->has_rt_info("model_info", "layout")) { - layout = model->get_rt_info("model_info", "layout"); - } - } - inputsLayouts = parseLayoutString(layout); - init_from_config(configuration, - model->has_rt_info("model_info") ? model->get_rt_info("model_info") : ov::AnyMap{}); -} - -BaseModel::BaseModel(std::shared_ptr& adapter, const ov::AnyMap& configuration) - : inferenceAdapter(adapter) { - const ov::AnyMap& adapter_configuration = adapter->getModelConfig(); - - std::string layout = ""; - layout = get_from_any_maps("layout", configuration, adapter_configuration, layout); - inputsLayouts = parseLayoutString(layout); - - inputNames = adapter->getInputNames(); - outputNames = adapter->getOutputNames(); - - init_from_config(configuration, adapter->getModelConfig()); -} - -std::unique_ptr BaseModel::inferImage(const ImageInputData& inputData) { - InferenceInput inputs; - InferenceResult result; - auto internalModelData = this->preprocess(inputData, inputs); - - result.outputsData = inferenceAdapter->infer(inputs); - result.internalModelData = std::move(internalModelData); - - auto retVal = this->postprocess(result); - *retVal = static_cast(result); - return retVal; -} - -std::vector> BaseModel::inferBatchImage(const std::vector& inputImgs) { - std::vector> inputData; - inputData.reserve(inputImgs.size()); - for (const auto& img : inputImgs) { - inputData.push_back(img); - } - auto results = std::vector>(inputData.size()); - auto setter = TmpCallbackSetter( - this, - [&](std::unique_ptr result, const ov::AnyMap& callback_args) { - size_t id = callback_args.find("id")->second.as(); - results[id] = std::move(result); - }, - lastCallback); - size_t req_id = 0; - for (const auto& data : inputData) { - inferAsync(data, {{"id", req_id++}}); - } - awaitAll(); - return results; -} - -void BaseModel::inferAsync(const ImageInputData& inputData, const ov::AnyMap& callback_args) { - InferenceInput inputs; - auto internalModelData = this->preprocess(inputData, inputs); - auto callback_args_ptr = std::make_shared(callback_args); - (*callback_args_ptr)["internalModelData"] = std::move(internalModelData); - inferenceAdapter->inferAsync(inputs, callback_args_ptr); -} - -void BaseModel::updateModelInfo() { - if (!model) { - throw std::runtime_error("The ov::Model object is not accessible"); - } - - if (!inputsLayouts.empty()) { - auto layouts = formatLayouts(inputsLayouts); - model->set_rt_info(layouts, "model_info", "layout"); - } - - model->set_rt_info(useAutoResize, "model_info", "auto_resize"); - model->set_rt_info(formatResizeMode(resizeMode), "model_info", "resize_type"); - - if (!labels.empty()) { - model->set_rt_info(labels, "model_info", "labels"); - } - - model->set_rt_info(embedded_processing, "model_info", "embedded_processing"); - model->set_rt_info(netInputWidth, "model_info", "orig_width"); - model->set_rt_info(netInputHeight, "model_info", "orig_height"); -} - -std::shared_ptr BaseModel::embedProcessing(std::shared_ptr& model, - const std::string& inputName, - const ov::Layout& layout, - const RESIZE_MODE resize_mode, - const cv::InterpolationFlags interpolationMode, - const ov::Shape& targetShape, - uint8_t pad_value, - bool brg2rgb, - const std::vector& mean, - const std::vector& scale, - const std::type_info& dtype) { - ov::preprocess::PrePostProcessor ppp(model); - - // Change the input type to the 8-bit image - if (dtype == typeid(int)) { - ppp.input(inputName).tensor().set_element_type(ov::element::u8); - } - - ppp.input(inputName).tensor().set_layout(ov::Layout("NHWC")).set_color_format(ov::preprocess::ColorFormat::BGR); - - if (resize_mode != NO_RESIZE) { - ppp.input(inputName).tensor().set_spatial_dynamic_shape(); - // Doing resize in u8 is more efficient than FP32 but can lead to slightly different results - ppp.input(inputName).preprocess().custom( - createResizeGraph(resize_mode, targetShape, interpolationMode, pad_value)); - } - - ppp.input(inputName).model().set_layout(ov::Layout(layout)); - - // Handle color format - if (brg2rgb) { - ppp.input(inputName).preprocess().convert_color(ov::preprocess::ColorFormat::RGB); - } - - ppp.input(inputName).preprocess().convert_element_type(ov::element::f32); - - if (!mean.empty()) { - ppp.input(inputName).preprocess().mean(mean); - } - if (!scale.empty()) { - ppp.input(inputName).preprocess().scale(scale); - } - - return ppp.build(); -} - -std::shared_ptr BaseModel::preprocess(const InputData& inputData, InferenceInput& input) { - const auto& origImg = inputData.asRef().inputImage; - auto img = inputTransform(origImg); - - if (!useAutoResize && !embedded_processing) { - // Resize and copy data from the image to the input tensor - auto tensorShape = - inferenceAdapter->getInputShape(inputNames[0]).get_max_shape(); // first input should be image - const ov::Layout layout("NHWC"); - const size_t width = tensorShape[ov::layout::width_idx(layout)]; - const size_t height = tensorShape[ov::layout::height_idx(layout)]; - const size_t channels = tensorShape[ov::layout::channels_idx(layout)]; - if (static_cast(img.channels()) != channels) { - throw std::runtime_error("The number of channels for model input: " + std::to_string(channels) + - " and image: " + std::to_string(img.channels()) + " - must match"); - } - if (channels != 1 && channels != 3) { - throw std::runtime_error("Unsupported number of channels"); - } - img = resizeImageExt(img, width, height, resizeMode, interpolationMode); - } - input.emplace(inputNames[0], wrapMat2Tensor(img)); - return std::make_shared(origImg.cols, origImg.rows); -} - -std::vector BaseModel::loadLabels(const std::string& labelFilename) { - std::vector labelsList; - - /* Read labels (if any) */ - if (!labelFilename.empty()) { - std::ifstream inputFile(labelFilename); - if (!inputFile.is_open()) - throw std::runtime_error("Can't open the labels file: " + labelFilename); - std::string label; - while (std::getline(inputFile, label)) { - labelsList.push_back(label); - } - if (labelsList.empty()) - throw std::logic_error("File is empty: " + labelFilename); - } - - return labelsList; -} diff --git a/src/cpp/models/src/classification_model.cpp b/src/cpp/models/src/classification_model.cpp deleted file mode 100644 index a9d281e1..00000000 --- a/src/cpp/models/src/classification_model.cpp +++ /dev/null @@ -1,838 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/classification_model.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "models/input_data.h" -#include "models/results.h" - -std::string ClassificationModel::ModelType = "Classification"; - -namespace { -constexpr char indices_name[]{"indices"}; -constexpr char scores_name[]{"scores"}; -constexpr char saliency_map_name[]{"saliency_map"}; -constexpr char feature_vector_name[]{"feature_vector"}; -constexpr char raw_scores_name[]{"raw_scores"}; - -float sigmoid(float x) noexcept { - return 1.0f / (1.0f + std::exp(-x)); -} - -size_t fargmax(const float* x_start, const float* x_end) noexcept { - size_t argmax = 0; - - for (const float* iter = x_start; iter < x_end; ++iter) { - if (x_start[argmax] < *iter) { - argmax = iter - x_start; - } - } - - return argmax; -} - -void softmax(float* x_start, float* x_end, float eps = 1e-9) { - if (x_start == x_end) { - return; - } - - float x_max = *std::max_element(x_start, x_end); - float x_sum = 0.f; - for (auto it = x_start; it < x_end; ++it) { - *it = exp(*it - x_max); - x_sum += *it; - } - - for (auto it = x_start; it < x_end; ++it) { - *it /= x_sum + eps; - } -} - -void addOrFindSoftmaxAndTopkOutputs(std::shared_ptr& model, size_t topk, bool add_raw_scores) { - auto nodes = model->get_ops(); - std::shared_ptr softmaxNode; - for (size_t i = 0; i < model->outputs().size(); ++i) { - auto output_node = model->get_output_op(i)->input(0).get_source_output().get_node_shared_ptr(); - if (std::string(output_node->get_type_name()) == "Softmax") { - softmaxNode = output_node; - } else if (std::string(output_node->get_type_name()) == "TopK") { - return; - } - } - - if (!softmaxNode) { - auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node(); - softmaxNode = std::make_shared(logitsNode->output(0), 1); - } - - const auto k = std::make_shared(ov::element::i32, ov::Shape{}, std::vector{topk}); - std::shared_ptr topkNode = std::make_shared(softmaxNode, - k, - 1, - ov::op::v3::TopK::Mode::MAX, - ov::op::v3::TopK::SortType::SORT_VALUES); - - auto indices = topkNode->output(0); - auto scores = topkNode->output(1); - ov::OutputVector outputs_vector; - if (add_raw_scores) { - auto raw_scores = softmaxNode->output(0); - outputs_vector = {scores, indices, raw_scores}; - } else { - outputs_vector = {scores, indices}; - } - for (const ov::Output& output : model->outputs()) { - if (output.get_names().count(saliency_map_name) > 0 || output.get_names().count(feature_vector_name) > 0) { - outputs_vector.push_back(output); - } - } - - auto source_rt_info = - model->has_rt_info("model_info") ? model->get_rt_info("model_info") : ov::AnyMap{}; - model = std::make_shared(outputs_vector, model->get_parameters(), "classification"); - - // preserve extra model_info items - for (const auto& k : source_rt_info) { - model->set_rt_info(k.second, "model_info", k.first); - } - - // manually set output tensors name for created topK node - model->outputs()[0].set_names({indices_name}); - model->outputs()[1].set_names({scores_name}); - if (add_raw_scores) { - model->outputs()[2].set_names({raw_scores_name}); - } - - // set output precisions - ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); - ppp.output(indices_name).tensor().set_element_type(ov::element::i32); - ppp.output(scores_name).tensor().set_element_type(ov::element::f32); - if (add_raw_scores) { - ppp.output(raw_scores_name).tensor().set_element_type(ov::element::f32); - } - model = ppp.build(); -} - -std::vector get_non_xai_names(const std::vector>& outputs) { - std::vector outputNames; - outputNames.reserve(std::max(1, int(outputs.size()) - 2)); - for (const ov::Output& output : outputs) { - if (output.get_names().count(saliency_map_name) > 0) { - continue; - } - if (output.get_names().count(feature_vector_name) > 0) { - continue; - } - outputNames.push_back(output.get_any_name()); - } - return outputNames; -} - -std::vector get_non_xai_output_indices(const std::vector>& outputs) { - std::vector outputIndices; - outputIndices.reserve(std::max(1, int(outputs.size()) - 2)); - size_t idx = 0; - for (const ov::Output& output : outputs) { - bool is_xai = - output.get_names().count(saliency_map_name) > 0 || output.get_names().count(feature_vector_name) > 0; - if (!is_xai) { - outputIndices.push_back(idx); - } - ++idx; - } - return outputIndices; -} - -std::vector get_non_xai_names(const std::vector& outputs) { - std::vector outputNames; - outputNames.reserve(std::max(1, int(outputs.size()) - 2)); - for (const auto& output : outputs) { - if (output.find(saliency_map_name) != std::string::npos) { - continue; - } - if (output.find(feature_vector_name) != std::string::npos) { - continue; - } - outputNames.push_back(output); - } - return outputNames; -} - -void append_xai_names(const std::vector>& outputs, std::vector& outputNames) { - for (const ov::Output& output : outputs) { - if (output.get_names().count(saliency_map_name) > 0) { - outputNames.emplace_back(saliency_map_name); - } else if (output.get_names().count(feature_vector_name) > 0) { - outputNames.push_back(feature_vector_name); - } - } -} - -void append_xai_names(const std::vector& outputs, std::vector& outputNames) { - for (const auto& output : outputs) { - if (output.find(saliency_map_name) != std::string::npos) { - outputNames.emplace_back(saliency_map_name); - } else if (output.find(feature_vector_name) != std::string::npos) { - outputNames.push_back(feature_vector_name); - } - } -} -} // namespace - -void ClassificationModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - topk = get_from_any_maps("topk", top_priority, mid_priority, topk); - confidence_threshold = get_from_any_maps("confidence_threshold", top_priority, mid_priority, confidence_threshold); - multilabel = get_from_any_maps("multilabel", top_priority, mid_priority, multilabel); - output_raw_scores = get_from_any_maps("output_raw_scores", top_priority, mid_priority, output_raw_scores); - hierarchical = get_from_any_maps("hierarchical", top_priority, mid_priority, hierarchical); - hierarchical_config = get_from_any_maps("hierarchical_config", top_priority, mid_priority, hierarchical_config); - hierarchical_postproc = - get_from_any_maps("hierarchical_postproc", top_priority, mid_priority, hierarchical_postproc); - if (hierarchical) { - if (hierarchical_config.empty()) { - throw std::runtime_error("Error: empty hierarchical classification config"); - } - hierarchical_info = HierarchicalConfig(hierarchical_config); - if (hierarchical_postproc == "probabilistic") { - resolver = std::make_unique(hierarchical_info); - } else if (hierarchical_postproc == "greedy") { - resolver = std::make_unique(hierarchical_info); - } else { - throw std::runtime_error("Wrong hierarchical labels postprocessing type"); - } - } -} - -ClassificationModel::ClassificationModel(std::shared_ptr& model, const ov::AnyMap& configuration) - : BaseModel(model, configuration) { - init_from_config(configuration, - model->has_rt_info("model_info") ? model->get_rt_info("model_info") : ov::AnyMap{}); -} - -ClassificationModel::ClassificationModel(std::shared_ptr& adapter, const ov::AnyMap& configuration) - : BaseModel(adapter, configuration) { - outputNames = get_non_xai_names(adapter->getOutputNames()); - append_xai_names(adapter->getOutputNames(), outputNames); - init_from_config(configuration, adapter->getModelConfig()); -} - -void ClassificationModel::updateModelInfo() { - BaseModel::updateModelInfo(); - - model->set_rt_info(ClassificationModel::ModelType, "model_info", "model_type"); - model->set_rt_info(topk, "model_info", "topk"); - model->set_rt_info(multilabel, "model_info", "multilabel"); - model->set_rt_info(hierarchical, "model_info", "hierarchical"); - model->set_rt_info(output_raw_scores, "model_info", "output_raw_scores"); - model->set_rt_info(confidence_threshold, "model_info", "confidence_threshold"); - model->set_rt_info(hierarchical_config, "model_info", "hierarchical_config"); - model->set_rt_info(hierarchical_postproc, "model_info", "hierarchical_postproc"); -} - -std::unique_ptr ClassificationModel::create_model(const std::string& modelFile, - const ov::AnyMap& configuration, - bool preload, - const std::string& device) { - auto core = ov::Core(); - std::shared_ptr model = core.read_model(modelFile); - - // Check model_type in the rt_info, ignore configuration - std::string model_type = ClassificationModel::ModelType; - try { - if (model->has_rt_info("model_info", "model_type")) { - model_type = model->get_rt_info("model_info", "model_type"); - } - } catch (const std::exception&) { - slog::warn << "Model type is not specified in the rt_info, use default model type: " << model_type - << slog::endl; - } - - if (model_type != ClassificationModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + - model_type); - } - - std::unique_ptr classifier{new ClassificationModel(model, configuration)}; - classifier->prepare(); - if (preload) { - classifier->load(core, device); - } - return classifier; -} - -std::unique_ptr ClassificationModel::create_model(std::shared_ptr& adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto model_type_iter = configuration.find("model_type"); - std::string model_type = ClassificationModel::ModelType; - if (model_type_iter != configuration.end()) { - model_type = model_type_iter->second.as(); - } - - if (model_type != ClassificationModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided: " + model_type); - } - - std::unique_ptr classifier{new ClassificationModel(adapter)}; - return classifier; -} - -std::unique_ptr ClassificationModel::postprocess(InferenceResult& infResult) { - std::unique_ptr result; - if (multilabel) { - result = get_multilabel_predictions(infResult, output_raw_scores); - } else if (hierarchical) { - result = get_hierarchical_predictions(infResult, output_raw_scores); - } else { - result = get_multiclass_predictions(infResult, output_raw_scores); - } - - ClassificationResult* cls_res = static_cast(result.get()); - auto saliency_map_iter = infResult.outputsData.find(saliency_map_name); - if (saliency_map_iter != infResult.outputsData.end()) { - cls_res->saliency_map = std::move(saliency_map_iter->second); - cls_res->saliency_map = reorder_saliency_maps(cls_res->saliency_map); - } - auto feature_vector_iter = infResult.outputsData.find(feature_vector_name); - if (feature_vector_iter != infResult.outputsData.end()) { - cls_res->feature_vector = std::move(feature_vector_iter->second); - } - return result; -} - -std::unique_ptr ClassificationModel::get_multilabel_predictions(InferenceResult& infResult, - bool add_raw_scores) { - const ov::Tensor& logitsTensor = infResult.outputsData.find(outputNames[0])->second; - const float* logitsPtr = logitsTensor.data(); - - ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); - - auto raw_scores = ov::Tensor(); - float* raw_scoresPtr = nullptr; - if (add_raw_scores) { - raw_scores = ov::Tensor(logitsTensor.get_element_type(), logitsTensor.get_shape()); - raw_scoresPtr = raw_scores.data(); - result->raw_scores = raw_scores; - } - - result->topLabels.reserve(labels.size()); - for (size_t i = 0; i < labels.size(); ++i) { - float score = sigmoid(logitsPtr[i]); - if (score > confidence_threshold) { - result->topLabels.emplace_back(i, labels[i], score); - } - if (add_raw_scores) { - raw_scoresPtr[i] = score; - } - } - - return retVal; -} - -std::unique_ptr ClassificationModel::get_hierarchical_predictions(InferenceResult& infResult, - bool add_raw_scores) { - ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); - - const ov::Tensor& logitsTensor = infResult.outputsData.find(outputNames[0])->second; - float* logitsPtr = logitsTensor.data(); - - auto raw_scores = ov::Tensor(); - float* raw_scoresPtr = nullptr; - if (add_raw_scores) { - raw_scores = ov::Tensor(logitsTensor.get_element_type(), logitsTensor.get_shape()); - logitsTensor.copy_to(raw_scores); - raw_scoresPtr = raw_scores.data(); - result->raw_scores = raw_scores; - } - - std::vector> predicted_labels; - std::vector predicted_scores; - - predicted_labels.reserve(hierarchical_info.num_multiclass_heads + hierarchical_info.num_multilabel_heads); - predicted_scores.reserve(hierarchical_info.num_multiclass_heads + hierarchical_info.num_multilabel_heads); - - for (size_t i = 0; i < hierarchical_info.num_multiclass_heads; ++i) { - const auto& logits_range = hierarchical_info.head_idx_to_logits_range[i]; - softmax(logitsPtr + logits_range.first, logitsPtr + logits_range.second); - if (add_raw_scores) { - softmax(raw_scoresPtr + logits_range.first, raw_scoresPtr + logits_range.second); - } - size_t j = fargmax(logitsPtr + logits_range.first, logitsPtr + logits_range.second); - predicted_labels.push_back(hierarchical_info.all_groups[i][j]); - predicted_scores.push_back(logitsPtr[logits_range.first + j]); - } - - if (hierarchical_info.num_multilabel_heads) { - const float* mlc_logitsPtr = logitsPtr + hierarchical_info.num_single_label_classes; - - for (size_t i = 0; i < hierarchical_info.num_multilabel_heads; ++i) { - float score = sigmoid(mlc_logitsPtr[i]); - if (score > confidence_threshold) { - predicted_scores.push_back(score); - predicted_labels.push_back(hierarchical_info.all_groups[hierarchical_info.num_multiclass_heads + i][0]); - } - if (add_raw_scores) { - raw_scoresPtr[hierarchical_info.num_single_label_classes + i] = score; - } - } - } - - auto resolved_labels = resolver->resolve_labels(predicted_labels, predicted_scores); - - auto retVal = std::unique_ptr(result); - result->topLabels.reserve(resolved_labels.size()); - for (const auto& label : resolved_labels) { - result->topLabels.emplace_back(hierarchical_info.label_to_idx[label.first], label.first, label.second); - } - - return retVal; -} - -ov::Tensor ClassificationModel::reorder_saliency_maps(const ov::Tensor& source_maps) { - if (!hierarchical || source_maps.get_shape().size() == 1) { - return source_maps; - } - - auto reordered_maps = ov::Tensor(source_maps.get_element_type(), source_maps.get_shape()); - const std::uint8_t* source_maps_ptr = static_cast(source_maps.data()); - std::uint8_t* reordered_maps_ptr = static_cast(reordered_maps.data()); - - size_t shape_offset = (source_maps.get_shape().size() == 4) ? 1 : 0; - size_t map_byte_size = source_maps.get_element_type().size() * source_maps.get_shape()[shape_offset + 1] * - source_maps.get_shape()[shape_offset + 2]; - - for (size_t i = 0; i < source_maps.get_shape()[shape_offset]; ++i) { - size_t new_index = hierarchical_info.label_to_idx[hierarchical_info.logit_idx_to_label[i]]; - std::copy_n(source_maps_ptr + i * map_byte_size, map_byte_size, reordered_maps_ptr + new_index * map_byte_size); - } - - return reordered_maps; -} - -std::unique_ptr ClassificationModel::get_multiclass_predictions(InferenceResult& infResult, - bool add_raw_scores) { - const ov::Tensor& indicesTensor = infResult.outputsData.find(indices_name)->second; - const int* indicesPtr = indicesTensor.data(); - const ov::Tensor& scoresTensor = infResult.outputsData.find(scores_name)->second; - const float* scoresPtr = scoresTensor.data(); - - ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); - - if (add_raw_scores) { - const ov::Tensor& logitsTensor = infResult.outputsData.find(raw_scores_name)->second; - result->raw_scores = ov::Tensor(logitsTensor.get_element_type(), logitsTensor.get_shape()); - logitsTensor.copy_to(result->raw_scores); - result->raw_scores.set_shape(ov::Shape({result->raw_scores.get_size()})); - } - - result->topLabels.reserve(scoresTensor.get_size()); - for (size_t i = 0; i < scoresTensor.get_size(); ++i) { - int ind = indicesPtr[i]; - if (ind < 0 || ind >= static_cast(labels.size())) { - throw std::runtime_error("Invalid index for the class label is found during postprocessing"); - } - result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]); - } - - return retVal; -} - -void ClassificationModel::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output ------------------------------------------------- - // --------------------------- Prepare input ------------------------------------------------------ - if (model->inputs().size() != 1) { - throw std::logic_error("Classification model wrapper supports topologies with only 1 input"); - } - const auto& input = model->input(); - inputNames.push_back(input.get_any_name()); - - const ov::Shape& inputShape = input.get_partial_shape().get_max_shape(); - const ov::Layout& inputLayout = getInputLayout(input); - - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{inputShape[ov::layout::width_idx(inputLayout)], inputShape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - - ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); - model = ppp.build(); - useAutoResize = true; // temporal solution for classification - } - - // --------------------------- Prepare output ----------------------------------------------------- - if (model->outputs().size() > 5) { - throw std::logic_error("Classification model wrapper supports topologies with up to 4 outputs"); - } - - auto non_xai_idx = get_non_xai_output_indices(model->outputs()); - if (non_xai_idx.size() == 1) { - const ov::Shape& outputShape = model->outputs()[non_xai_idx[0]].get_partial_shape().get_max_shape(); - if (outputShape.size() != 2 && outputShape.size() != 4) { - throw std::logic_error("Classification model wrapper supports topologies only with" - " 2-dimensional or 4-dimensional output"); - } - - const ov::Layout outputLayout("NCHW"); - if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 || - outputShape[ov::layout::width_idx(outputLayout)] != 1)) { - throw std::logic_error("Classification model wrapper supports topologies only" - " with 4-dimensional output which has last two dimensions of size 1"); - } - - size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)]; - if (topk > classesNum) { - throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " + - std::to_string(topk) + " labels are requested to be predicted"); - } - if (classesNum != labels.size()) { - throw std::logic_error("Model's number of classes and parsed labels must match (" + - std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')'); - } - } - - if (multilabel || hierarchical) { - embedded_processing = true; - outputNames = get_non_xai_names(model->outputs()); - append_xai_names(model->outputs(), outputNames); - return; - } - - if (!embedded_processing) { - addOrFindSoftmaxAndTopkOutputs(model, topk, output_raw_scores); - } - embedded_processing = true; - - outputNames = {indices_name, scores_name}; - if (output_raw_scores) { - outputNames.emplace_back("raw_scores"); - } - append_xai_names(model->outputs(), outputNames); -} - -std::unique_ptr ClassificationModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); -} - -std::vector> ClassificationModel::inferBatch( - const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> clsResults; - clsResults.reserve(results.size()); - for (auto& result : results) { - clsResults.emplace_back(static_cast(result.release())); - } - return clsResults; -} - -HierarchicalConfig::HierarchicalConfig(const std::string& json_repr) { - nlohmann::json data = nlohmann::json::parse(json_repr); - - num_multilabel_heads = data.at("cls_heads_info").at("num_multilabel_classes"); - num_multiclass_heads = data.at("cls_heads_info").at("num_multiclass_heads"); - num_single_label_classes = data.at("cls_heads_info").at("num_single_label_classes"); - - data.at("cls_heads_info").at("label_to_idx").get_to(label_to_idx); - data.at("cls_heads_info").at("all_groups").get_to(all_groups); - data.at("label_tree_edges").get_to(label_tree_edges); - - std::map> tmp_head_idx_to_logits_range; - data.at("cls_heads_info").at("head_idx_to_logits_range").get_to(tmp_head_idx_to_logits_range); - - for (const auto& range_descr : tmp_head_idx_to_logits_range) { - head_idx_to_logits_range[stoi(range_descr.first)] = range_descr.second; - } - - size_t logits_processed = 0; - for (size_t i = 0; i < num_multiclass_heads; ++i) { - const auto& logits_range = head_idx_to_logits_range[i]; - for (size_t k = logits_range.first; k < logits_range.second; ++k) { - logit_idx_to_label[logits_processed++] = all_groups[i][k - logits_range.first]; - } - } - for (size_t i = 0; i < num_multilabel_heads; ++i) { - logit_idx_to_label[logits_processed++] = all_groups[num_multiclass_heads + i][0]; - } -} - -GreedyLabelsResolver::GreedyLabelsResolver(const HierarchicalConfig& config) - : label_to_idx(config.label_to_idx), - label_relations(config.label_tree_edges), - label_groups(config.all_groups) {} - -std::map GreedyLabelsResolver::resolve_labels( - const std::vector>& labels, - const std::vector& scores) { - if (labels.size() != scores.size()) { - throw std::runtime_error("Inconsistent number of labels and scores"); - } - std::map label_to_prob; - for (const auto& label_idx : label_to_idx) { - label_to_prob[label_idx.first] = 0.f; - } - - for (size_t i = 0; i < labels.size(); ++i) { - label_to_prob[labels[i]] = scores[i]; - } - - std::vector candidates; - for (const auto& g : label_groups) { - if (g.size() == 1 && label_to_prob[g[0]] > 0.f) { - candidates.push_back(g[0]); - } else { - float max_prob = 0.f; - std::string max_label; - for (const auto& lbl : g) { - if (label_to_prob[lbl] > max_prob) { - max_prob = label_to_prob[lbl]; - max_label = lbl; - } - if (max_label.size() > 0) { - candidates.push_back(max_label); - } - } - } - } - - std::map resolved_label_to_prob; - for (const auto& lbl : candidates) { - if (resolved_label_to_prob.find(lbl) != resolved_label_to_prob.end()) { - continue; - } - auto labels_to_add = get_predecessors(lbl, candidates); - for (const auto& new_lbl : labels_to_add) { - if (resolved_label_to_prob.find(new_lbl) == resolved_label_to_prob.end()) { - resolved_label_to_prob[new_lbl] = label_to_prob[new_lbl]; - } - } - } - - return resolved_label_to_prob; -} - -std::string GreedyLabelsResolver::get_parent(const std::string& label) { - for (const auto& edge : label_relations) { - if (label == edge.first) { - return edge.second; - } - } - return ""; -} - -std::vector GreedyLabelsResolver::get_predecessors(const std::string& label, - const std::vector& candidates) { - std::vector predecessors; - auto last_parent = get_parent(label); - - if (last_parent.size() == 0) { - return {label}; - } - while (last_parent.size() > 0) { - if (std::find(candidates.begin(), candidates.end(), last_parent) == candidates.end()) { - return {}; - } - predecessors.push_back(last_parent); - last_parent = get_parent(last_parent); - } - - if (predecessors.size() > 0) { - predecessors.push_back(label); - } - - return predecessors; -} - -SimpleLabelsGraph::SimpleLabelsGraph(const std::vector& vertices_) - : vertices(vertices_), - t_sort_cache_valid(false) {} - -void SimpleLabelsGraph::add_edge(const std::string& parent, const std::string& child) { - adj[parent].push_back(child); - parents_map[child] = parent; - t_sort_cache_valid = false; -} - -std::vector SimpleLabelsGraph::get_children(const std::string& label) const { - auto iter = adj.find(label); - if (iter == adj.end()) { - return std::vector(); - } - return iter->second; -} - -std::string SimpleLabelsGraph::get_parent(const std::string& label) const { - auto iter = parents_map.find(label); - if (iter == parents_map.end()) { - return std::string(); - } - return iter->second; -} - -std::vector SimpleLabelsGraph::get_ancestors(const std::string& label) const { - std::vector predecessors = {label}; - auto last_parent = get_parent(label); - if (!last_parent.size()) { - return predecessors; - } - - while (last_parent.size()) { - predecessors.push_back(last_parent); - last_parent = get_parent(last_parent); - } - - return predecessors; -} - -std::vector SimpleLabelsGraph::get_labels_in_topological_order() { - if (!t_sort_cache_valid) { - topological_order_cache = topological_sort(); - } - return topological_order_cache; -} - -std::vector SimpleLabelsGraph::topological_sort() { - auto in_degree = std::unordered_map(); - for (const auto& node : vertices) { - in_degree[node] = 0; - } - - for (const auto& item : adj) { - for (const auto& node : item.second) { - in_degree[node] += 1; - } - } - - std::deque nodes_deque; - for (const auto& node : vertices) { - if (in_degree[node] == 0) { - nodes_deque.push_back(node); - } - } - - std::vector ordered_nodes; - while (!nodes_deque.empty()) { - auto u = nodes_deque[0]; - nodes_deque.pop_front(); - ordered_nodes.push_back(u); - - for (const auto& node : adj[u]) { - auto degree = --in_degree[node]; - if (degree == 0) { - nodes_deque.push_back(node); - } - } - } - - if (ordered_nodes.size() != vertices.size()) { - throw std::runtime_error("Topological sort failed: input graph has been" - "changed during the sorting or contains a cycle"); - } - - return ordered_nodes; -} - -ProbabilisticLabelsResolver::ProbabilisticLabelsResolver(const HierarchicalConfig& conf) : GreedyLabelsResolver(conf) { - std::vector all_labels; - for (const auto& item : label_to_idx) { - all_labels.push_back(item.first); - } - label_tree = SimpleLabelsGraph(all_labels); - for (const auto& item : label_relations) { - label_tree.add_edge(item.second, item.first); - } - label_tree.get_labels_in_topological_order(); -} - -std::map ProbabilisticLabelsResolver::resolve_labels( - const std::vector>& labels, - const std::vector& scores) { - if (labels.size() != scores.size()) { - throw std::runtime_error("Inconsistent number of labels and scores"); - } - - std::unordered_map label_to_prob; - for (size_t i = 0; i < labels.size(); ++i) { - label_to_prob[labels[i]] = scores[i]; - } - - label_to_prob = add_missing_ancestors(label_to_prob); - auto hard_classification = resolve_exclusive_labels(label_to_prob); - suppress_descendant_output(hard_classification); - - std::map output_labels_map; - - for (const auto& item : hard_classification) { - if (item.second > 0) { - if (label_to_prob.find(item.first) != label_to_prob.end()) { - output_labels_map[item.first] = item.second * label_to_prob[item.first]; - } else { - output_labels_map[item.first] = item.second; - } - } - } - - return output_labels_map; -} - -std::unordered_map ProbabilisticLabelsResolver::add_missing_ancestors( - const std::unordered_map& label_to_prob) const { - std::unordered_map updated_label_to_probability(label_to_prob); - for (const auto& item : label_to_prob) { - for (const auto& ancestor : label_tree.get_ancestors(item.first)) { - if (updated_label_to_probability.find(ancestor) == updated_label_to_probability.end()) { - updated_label_to_probability[ancestor] = 0.f; - } - } - } - return updated_label_to_probability; -} - -std::map ProbabilisticLabelsResolver::resolve_exclusive_labels( - const std::unordered_map& label_to_prob) const { - std::map hard_classification; - - for (const auto& item : label_to_prob) { - hard_classification[item.first] = static_cast(item.second > 0); - } - - return hard_classification; -} - -void ProbabilisticLabelsResolver::suppress_descendant_output(std::map& hard_classification) { - auto all_labels = label_tree.get_labels_in_topological_order(); - - for (const auto& child : all_labels) { - if (hard_classification.find(child) != hard_classification.end()) { - auto parent = label_tree.get_parent(child); - if (parent.size() && hard_classification.find(parent) != hard_classification.end()) { - hard_classification[child] *= hard_classification[parent]; - } - } - } -} diff --git a/src/cpp/models/src/detection_model.cpp b/src/cpp/models/src/detection_model.cpp deleted file mode 100644 index 6b55eeba..00000000 --- a/src/cpp/models/src/detection_model.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/detection_model.h" - -#include -#include -#include -#include - -#include "models/base_model.h" -#include "models/detection_model_ssd.h" -#include "models/detection_model_yolo.h" -#include "models/detection_model_yolov3_onnx.h" -#include "models/detection_model_yolox.h" -#include "models/input_data.h" -#include "models/results.h" -#include "utils/slog.hpp" - -DetectionModel::DetectionModel(std::shared_ptr& model, const ov::AnyMap& configuration) - : BaseModel(model, configuration) { - auto confidence_threshold_iter = configuration.find("confidence_threshold"); - if (confidence_threshold_iter == configuration.end()) { - if (model->has_rt_info("model_info", "confidence_threshold")) { - confidence_threshold = model->get_rt_info("model_info", "confidence_threshold"); - } - } else { - confidence_threshold = confidence_threshold_iter->second.as(); - } -} - -DetectionModel::DetectionModel(std::shared_ptr& adapter, const ov::AnyMap& configuration) - : BaseModel(adapter, configuration) { - confidence_threshold = - get_from_any_maps("confidence_threshold", configuration, adapter->getModelConfig(), confidence_threshold); -} - -void DetectionModel::updateModelInfo() { - BaseModel::updateModelInfo(); - - model->set_rt_info(confidence_threshold, "model_info", "confidence_threshold"); -} - -std::unique_ptr DetectionModel::create_model(const std::string& modelFile, - const ov::AnyMap& configuration, - std::string model_type, - bool preload, - const std::string& device) { - auto core = ov::Core(); - std::shared_ptr model = core.read_model(modelFile); - if (model_type.empty()) { - try { - if (model->has_rt_info("model_info", "model_type")) { - model_type = model->get_rt_info("model_info", "model_type"); - } - } catch (const std::exception&) { - slog::warn << "Model type is not specified in the rt_info, use default model type: " << model_type - << slog::endl; - } - } - - std::unique_ptr detectionModel; - if (model_type == ModelSSD::ModelType || model_type == "SSD") { - detectionModel = std::unique_ptr(new ModelSSD(model, configuration)); - } else if (model_type == ModelYoloX::ModelType) { - detectionModel = std::unique_ptr(new ModelYoloX(model, configuration)); - } else if (model_type == YOLOv5::ModelType) { - detectionModel = std::unique_ptr(new YOLOv5(model, configuration)); - } else if (model_type == YOLOv8::ModelType) { - detectionModel = std::unique_ptr(new YOLOv8(model, configuration)); - } else { - throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + - model_type); - } - - detectionModel->prepare(); - if (preload) { - detectionModel->load(core, device); - } - return detectionModel; -} - -std::unique_ptr DetectionModel::create_model(std::shared_ptr& adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto model_type_iter = configuration.find("model_type"); - std::string model_type; - if (model_type_iter != configuration.end()) { - model_type = model_type_iter->second.as(); - } - - std::unique_ptr detectionModel; - if (model_type == ModelSSD::ModelType || model_type == "SSD") { - detectionModel = std::unique_ptr(new ModelSSD(adapter)); - } else if (model_type == ModelYoloX::ModelType) { - detectionModel = std::unique_ptr(new ModelYoloX(adapter)); - } else { - throw std::runtime_error("Incorrect or unsupported model_type is provided: " + model_type); - } - - return detectionModel; -} - -std::unique_ptr DetectionModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); -} - -std::vector> DetectionModel::inferBatch(const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> detResults; - detResults.reserve(results.size()); - for (auto& result : results) { - detResults.emplace_back(static_cast(result.release())); - } - return detResults; -} diff --git a/src/cpp/models/src/detection_model_ext.cpp b/src/cpp/models/src/detection_model_ext.cpp deleted file mode 100644 index d1084820..00000000 --- a/src/cpp/models/src/detection_model_ext.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ -#include "models/detection_model_ext.h" - -#include -#include -#include -#include - -#include "models/base_model.h" -#include "models/input_data.h" -#include "models/results.h" - -DetectionModelExt::DetectionModelExt(std::shared_ptr& model, const ov::AnyMap& configuration) - : DetectionModel(model, configuration) { - auto iou_threshold_iter = configuration.find("iou_threshold"); - if (iou_threshold_iter != configuration.end()) { - iou_threshold = iou_threshold_iter->second.as(); - } else { - if (model->has_rt_info("model_info", "iou_threshold")) { - iou_threshold = model->get_rt_info("model_info", "iou_threshold"); - } - } -} - -DetectionModelExt::DetectionModelExt(std::shared_ptr& adapter) : DetectionModel(adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto iou_threshold_iter = configuration.find("iou_threshold"); - if (iou_threshold_iter != configuration.end()) { - iou_threshold = iou_threshold_iter->second.as(); - } -} - -void DetectionModelExt::updateModelInfo() { - DetectionModel::updateModelInfo(); - - model->set_rt_info(iou_threshold, "model_info", "iou_threshold"); -} diff --git a/src/cpp/models/src/detection_model_ssd.cpp b/src/cpp/models/src/detection_model_ssd.cpp deleted file mode 100644 index f6f7c818..00000000 --- a/src/cpp/models/src/detection_model_ssd.cpp +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/detection_model_ssd.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "models/internal_model_data.h" -#include "models/results.h" - -namespace { -constexpr char saliency_map_name[]{"saliency_map"}; -constexpr char feature_vector_name[]{"feature_vector"}; -constexpr float box_area_threshold = 1.0f; - -struct NumAndStep { - size_t detectionsNum, objectSize; -}; - -NumAndStep fromSingleOutput(const ov::Shape& shape) { - const ov::Layout& layout("NCHW"); - if (shape.size() != 4) { - throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size())); - } - size_t detectionsNum = shape[ov::layout::height_idx(layout)]; - size_t objectSize = shape[ov::layout::width_idx(layout)]; - if (objectSize != 7) { - throw std::logic_error("SSD single output must have 7 as a last dimension, but had " + - std::to_string(objectSize)); - } - return {detectionsNum, objectSize}; -} - -NumAndStep fromMultipleOutputs(const ov::Shape& boxesShape) { - if (boxesShape.size() == 2) { - ov::Layout boxesLayout = "NC"; - size_t detectionsNum = boxesShape[ov::layout::batch_idx(boxesLayout)]; - size_t objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)]; - - if (objectSize != 5) { - throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required"); - } - return {detectionsNum, objectSize}; - } - if (boxesShape.size() == 3) { - ov::Layout boxesLayout = "CHW"; - size_t detectionsNum = boxesShape[ov::layout::height_idx(boxesLayout)]; - size_t objectSize = boxesShape[ov::layout::width_idx(boxesLayout)]; - - if (objectSize != 4 && objectSize != 5) { - throw std::logic_error("Incorrect 'boxes' output shape, [b][n][{4 or 5}] shape is required"); - } - return {detectionsNum, objectSize}; - } - throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " + - std::to_string(boxesShape.size())); -} - -std::vector filterOutXai(const std::vector& names) { - std::vector filtered; - std::copy_if(names.begin(), names.end(), std::back_inserter(filtered), [](const std::string& name) { - return name != saliency_map_name && name != feature_vector_name; - }); - return filtered; -} - -float clamp_and_round(float val, float min, float max) { - return std::round(std::max(min, std::min(max, val))); -} -} // namespace - -std::string ModelSSD::ModelType = "ssd"; - -std::shared_ptr ModelSSD::preprocess(const InputData& inputData, InferenceInput& input) { - if (inputNames.size() > 1) { - ov::Tensor info{ov::element::i32, ov::Shape({1, 3})}; - int32_t* data = info.data(); - data[0] = netInputHeight; - data[1] = netInputWidth; - data[3] = 1; - input.emplace(inputNames[1], std::move(info)); - } - return DetectionModel::preprocess(inputData, input); -} - -std::unique_ptr ModelSSD::postprocess(InferenceResult& infResult) { - std::unique_ptr result = filterOutXai(outputNames).size() > 1 ? postprocessMultipleOutputs(infResult) - : postprocessSingleOutput(infResult); - DetectionResult* cls_res = static_cast(result.get()); - auto saliency_map_iter = infResult.outputsData.find(saliency_map_name); - if (saliency_map_iter != infResult.outputsData.end()) { - cls_res->saliency_map = std::move(saliency_map_iter->second); - } - auto feature_vector_iter = infResult.outputsData.find(feature_vector_name); - if (feature_vector_iter != infResult.outputsData.end()) { - cls_res->feature_vector = std::move(feature_vector_iter->second); - } - return result; -} - -std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { - const std::vector namesWithoutXai = filterOutXai(outputNames); - assert(namesWithoutXai.size() == 1); - const ov::Tensor& detectionsTensor = infResult.outputsData[namesWithoutXai[0]]; - NumAndStep numAndStep = fromSingleOutput(detectionsTensor.get_shape()); - const float* detections = detectionsTensor.data(); - - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); - - const auto& internalData = infResult.internalModelData->asRef(); - float floatInputImgWidth = float(internalData.inputImgWidth), - floatInputImgHeight = float(internalData.inputImgHeight); - float invertedScaleX = floatInputImgWidth / netInputWidth, invertedScaleY = floatInputImgHeight / netInputHeight; - int padLeft = 0, padTop = 0; - if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY); - if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2; - padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2; - } - } - - for (size_t i = 0; i < numAndStep.detectionsNum; i++) { - float image_id = detections[i * numAndStep.objectSize + 0]; - if (image_id < 0) { - break; - } - - float confidence = detections[i * numAndStep.objectSize + 2]; - - /** Filtering out objects with confidence < confidence_threshold probability **/ - if (confidence > confidence_threshold) { - DetectedObject desc; - - desc.confidence = confidence; - desc.labelID = static_cast(detections[i * numAndStep.objectSize + 1]); - desc.label = getLabelName(desc.labelID); - desc.x = - clamp(round((detections[i * numAndStep.objectSize + 3] * netInputWidth - padLeft) * invertedScaleX), - 0.f, - floatInputImgWidth); - desc.y = - clamp(round((detections[i * numAndStep.objectSize + 4] * netInputHeight - padTop) * invertedScaleY), - 0.f, - floatInputImgHeight); - desc.width = - clamp(round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX), - 0.f, - floatInputImgWidth) - - desc.x; - desc.height = - clamp(round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY), - 0.f, - floatInputImgHeight) - - desc.y; - result->objects.push_back(desc); - } - } - - return retVal; -} - -std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) { - const std::vector namesWithoutXai = filterOutXai(outputNames); - const float* boxes = infResult.outputsData[namesWithoutXai[0]].data(); - NumAndStep numAndStep = fromMultipleOutputs(infResult.outputsData[namesWithoutXai[0]].get_shape()); - const int64_t* labels = infResult.outputsData[namesWithoutXai[1]].data(); - const float* scores = - namesWithoutXai.size() > 2 ? infResult.outputsData[namesWithoutXai[2]].data() : nullptr; - - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); - - const auto& internalData = infResult.internalModelData->asRef(); - float floatInputImgWidth = float(internalData.inputImgWidth), - floatInputImgHeight = float(internalData.inputImgHeight); - float invertedScaleX = floatInputImgWidth / netInputWidth, invertedScaleY = floatInputImgHeight / netInputHeight; - int padLeft = 0, padTop = 0; - if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY); - if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2; - padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2; - } - } - - // In models with scores stored in separate output coordinates are normalized to [0,1] - // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight] - float widthScale = scores ? netInputWidth : 1.0f; - float heightScale = scores ? netInputHeight : 1.0f; - - for (size_t i = 0; i < numAndStep.detectionsNum; i++) { - float confidence = scores ? scores[i] : boxes[i * numAndStep.objectSize + 4]; - - /** Filtering out objects with confidence < confidence_threshold probability **/ - if (confidence > confidence_threshold) { - DetectedObject desc; - - desc.confidence = confidence; - desc.labelID = labels[i]; - desc.label = getLabelName(desc.labelID); - desc.x = clamp_and_round((boxes[i * numAndStep.objectSize] * widthScale - padLeft) * invertedScaleX, - 0.f, - floatInputImgWidth); - desc.y = clamp_and_round((boxes[i * numAndStep.objectSize + 1] * heightScale - padTop) * invertedScaleY, - 0.f, - floatInputImgHeight); - desc.width = clamp_and_round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX, - 0.f, - floatInputImgWidth) - - desc.x; - desc.height = - clamp_and_round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY, - 0.f, - floatInputImgHeight) - - desc.y; - - if (desc.width * desc.height >= box_area_threshold) { - result->objects.push_back(desc); - } - } - } - - return retVal; -} - -void ModelSSD::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output ------------------------------------------------- - // --------------------------- Prepare input ------------------------------------------------------ - for (const auto& input : model->inputs()) { - auto inputTensorName = input.get_any_name(); - const ov::Shape& shape = input.get_partial_shape().get_max_shape(); - ov::Layout inputLayout = getInputLayout(input); - - if (shape.size() == 4) { // 1st input contains images - if (inputNames.empty()) { - inputNames.push_back(inputTensorName); - } else { - inputNames[0] = inputTensorName; - } - - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{shape[ov::layout::width_idx(inputLayout)], shape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - - netInputWidth = shape[ov::layout::width_idx(inputLayout)]; - netInputHeight = shape[ov::layout::height_idx(inputLayout)]; - - useAutoResize = true; // temporal solution for SSD - embedded_processing = true; - } - } else if (shape.size() == 2) { // 2nd input contains image info - inputNames.resize(2); - inputNames[1] = inputTensorName; - if (!embedded_processing) { - ov::preprocess::PrePostProcessor ppp(model); - ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32); - model = ppp.build(); - } - } else { - throw std::logic_error("Unsupported " + std::to_string(input.get_partial_shape().size()) + - "D " - "input layer '" + - input.get_any_name() + - "'. " - "Only 2D and 4D input layers are supported"); - } - } - - // --------------------------- Prepare output ----------------------------------------------------- - if (model->outputs().size() == 1) { - prepareSingleOutput(model); - } else { - prepareMultipleOutputs(model); - } - embedded_processing = true; -} - -void ModelSSD::prepareSingleOutput(std::shared_ptr& model) { - const auto& output = model->output(); - outputNames.push_back(output.get_any_name()); - - fromSingleOutput(output.get_partial_shape().get_max_shape()); - - if (!embedded_processing) { - ov::preprocess::PrePostProcessor ppp(model); - ppp.output().tensor().set_element_type(ov::element::f32); - model = ppp.build(); - } -} - -void ModelSSD::prepareMultipleOutputs(std::shared_ptr& model) { - const ov::OutputVector& outputs = model->outputs(); - for (auto& output : outputs) { - const auto& tensorNames = output.get_names(); - for (const auto& name : tensorNames) { - if (name.find("boxes") != std::string::npos) { - outputNames.push_back(name); - break; - } else if (name.find("labels") != std::string::npos) { - outputNames.push_back(name); - break; - } else if (name.find("scores") != std::string::npos) { - outputNames.push_back(name); - break; - } - } - } - if (outputNames.size() != 2 && outputNames.size() != 3) { - throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " + - std::to_string(outputNames.size())); - } - std::sort(outputNames.begin(), outputNames.end()); - - fromMultipleOutputs(model->output(outputNames[0]).get_partial_shape().get_max_shape()); - - if (!embedded_processing) { - ov::preprocess::PrePostProcessor ppp(model); - - for (const auto& outName : outputNames) { - ppp.output(outName).tensor().set_element_type(ov::element::f32); - } - model = ppp.build(); - } -} - -void ModelSSD::updateModelInfo() { - DetectionModel::updateModelInfo(); - - model->set_rt_info(ModelSSD::ModelType, "model_info", "model_type"); -} diff --git a/src/cpp/models/src/detection_model_yolo.cpp b/src/cpp/models/src/detection_model_yolo.cpp deleted file mode 100644 index 1698b8e6..00000000 --- a/src/cpp/models/src/detection_model_yolo.cpp +++ /dev/null @@ -1,642 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/detection_model_yolo.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "models/internal_model_data.h" -#include "models/results.h" - -namespace { -const std::vector defaultAnchors[]{ - // YOLOv1v2 - {0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f}, - // YOLOv3 - {10.0f, - 13.0f, - 16.0f, - 30.0f, - 33.0f, - 23.0f, - 30.0f, - 61.0f, - 62.0f, - 45.0f, - 59.0f, - 119.0f, - 116.0f, - 90.0f, - 156.0f, - 198.0f, - 373.0f, - 326.0f}, - // YOLOv4 - {12.0f, - 16.0f, - 19.0f, - 36.0f, - 40.0f, - 28.0f, - 36.0f, - 75.0f, - 76.0f, - 55.0f, - 72.0f, - 146.0f, - 142.0f, - 110.0f, - 192.0f, - 243.0f, - 459.0f, - 401.0f}, - // YOLOv4_Tiny - {10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f}, - // YOLOF - {16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}}; - -float sigmoid(float x) noexcept { - return 1.0f / (1.0f + std::exp(-x)); -} - -constexpr float identity(float x) noexcept { - return x; -} -} // namespace - -ModelYolo::ModelYolo(std::shared_ptr& model, const ov::AnyMap& configuration) - : DetectionModelExt(model, configuration) { - auto anchors_iter = configuration.find("anchors"); - if (anchors_iter == configuration.end()) { - if (model->has_rt_info("model_info", "anchors")) { - // presetAnchors = - // model->get_rt_info().at("model_info").as>().get().at("anchors").as>(); - presetAnchors = model->get_rt_info>("model_info", "anchors"); - } - } else { - presetAnchors = anchors_iter->second.as>(); - } - auto masks_iter = configuration.find("masks"); - if (masks_iter == configuration.end()) { - if (model->has_rt_info("model_info", "masks")) { - presetMasks = model->get_rt_info>("model_info", "masks"); - } - } else { - presetMasks = masks_iter->second.as>(); - } - - resizeMode = RESIZE_FILL; // Ignore resize_type for now -} - -ModelYolo::ModelYolo(std::shared_ptr& adapter) : DetectionModelExt(adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto anchors_iter = configuration.find("anchors"); - if (anchors_iter != configuration.end()) { - presetAnchors = anchors_iter->second.as>(); - } - auto masks_iter = configuration.find("masks"); - if (masks_iter != configuration.end()) { - presetMasks = masks_iter->second.as>(); - } - - resizeMode = RESIZE_FILL; // Ignore resize_type for now -} - -void ModelYolo::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output ------------------------------------------------- - // --------------------------- Prepare input ------------------------------------------------------ - if (model->inputs().size() != 1) { - throw std::logic_error("YOLO model wrapper accepts models that have only 1 input"); - } - - const auto& input = model->input(); - const ov::Shape& inputShape = model->input().get_shape(); - ov::Layout inputLayout = getInputLayout(input); - - if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { - throw std::logic_error("Expected 3-channel input"); - } - - ov::preprocess::PrePostProcessor ppp(model); - inputTransform.setPrecision(ppp, input.get_any_name()); - ppp.input().tensor().set_layout({"NHWC"}); - - if (useAutoResize) { - ppp.input().tensor().set_spatial_dynamic_shape(); - - ppp.input() - .preprocess() - .convert_element_type(ov::element::f32) - .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); - } - - ppp.input().model().set_layout(inputLayout); - - //--- Reading image input parameters - inputNames.push_back(model->input().get_any_name()); - netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; - netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; - - // --------------------------- Prepare output ----------------------------------------------------- - const ov::OutputVector& outputs = model->outputs(); - std::map outShapes; - for (auto& out : outputs) { - ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32); - if (out.get_shape().size() == 4) { - if (out.get_shape()[ov::layout::height_idx("NCHW")] != out.get_shape()[ov::layout::width_idx("NCHW")] && - out.get_shape()[ov::layout::height_idx("NHWC")] == out.get_shape()[ov::layout::width_idx("NHWC")]) { - ppp.output(out.get_any_name()).model().set_layout("NHWC"); - // outShapes are saved before ppp.build() thus set yoloRegionLayout as it is in model before ppp.build() - yoloRegionLayout = "NHWC"; - } - // yolo-v1-tiny-tf out shape is [1, 21125] thus set layout only for 4 dim tensors - ppp.output(out.get_any_name()).tensor().set_layout("NCHW"); - } - outputNames.push_back(out.get_any_name()); - outShapes[out.get_any_name()] = out.get_shape(); - } - model = ppp.build(); - - yoloVersion = YoloVersion::YOLO_V3; - bool isRegionFound = false; - for (const auto& op : model->get_ordered_ops()) { - if (std::string("RegionYolo") == op->get_type_name()) { - auto regionYolo = std::dynamic_pointer_cast(op); - - if (regionYolo) { - if (!regionYolo->get_mask().size()) { - yoloVersion = YoloVersion::YOLO_V1V2; - } - - const auto& opName = op->get_friendly_name(); - for (const auto& out : outputs) { - if (out.get_node()->get_friendly_name() == opName || - out.get_node()->get_input_node_ptr(0)->get_friendly_name() == opName) { - isRegionFound = true; - regions.emplace(out.get_any_name(), Region(regionYolo)); - } - } - } - } - } - - if (!isRegionFound) { - switch (outputNames.size()) { - case 1: - yoloVersion = YoloVersion::YOLOF; - break; - case 2: - yoloVersion = YoloVersion::YOLO_V4_TINY; - break; - case 3: - yoloVersion = YoloVersion::YOLO_V4; - break; - } - - int num = yoloVersion == YoloVersion::YOLOF ? 6 : 3; - isObjConf = yoloVersion == YoloVersion::YOLOF ? 0 : 1; - int i = 0; - - const std::vector defaultMasks[]{// YOLOv1v2 - {}, - // YOLOv3 - {}, - // YOLOv4 - {0, 1, 2, 3, 4, 5, 6, 7, 8}, - // YOLOv4_Tiny - {1, 2, 3, 3, 4, 5}, - // YOLOF - {0, 1, 2, 3, 4, 5}}; - auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[size_t(yoloVersion)]; - if (chosenMasks.size() != num * outputs.size()) { - throw std::runtime_error("Invalid size of masks array, got " + std::to_string(presetMasks.size()) + - ", should be " + std::to_string(num * outputs.size())); - } - - std::sort(outputNames.begin(), - outputNames.end(), - [&outShapes, this](const std::string& x, const std::string& y) { - return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] > - outShapes[y][ov::layout::height_idx(yoloRegionLayout)]; - }); - - for (const auto& name : outputNames) { - const auto& shape = outShapes[name]; - if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) { - throw std::logic_error(std::string("Output tensor ") + name + " has wrong channel dimension"); - } - regions.emplace( - name, - Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0), - 4, - presetAnchors.size() ? presetAnchors : defaultAnchors[size_t(yoloVersion)], - std::vector(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num), - shape[ov::layout::width_idx(yoloRegionLayout)], - shape[ov::layout::height_idx(yoloRegionLayout)])); - i++; - } - } else { - // Currently externally set anchors and masks are supported only for YoloV4 - if (presetAnchors.size() || presetMasks.size()) { - slog::warn << "Preset anchors and mask can be set for YoloV4 model only. " - "This model is not YoloV4, so these options will be ignored." - << slog::endl; - } - } -} - -std::unique_ptr ModelYolo::postprocess(InferenceResult& infResult) { - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - std::vector objects; - - // Parsing outputs - const auto& internalData = infResult.internalModelData->asRef(); - - for (auto& output : infResult.outputsData) { - this->parseYOLOOutput(output.first, - output.second, - netInputHeight, - netInputWidth, - internalData.inputImgHeight, - internalData.inputImgWidth, - objects); - } - - if (useAdvancedPostprocessing) { - // Advanced postprocessing - // Checking IOU threshold conformance - // For every i-th object we're finding all objects it intersects with, and comparing confidence - // If i-th object has greater confidence than all others, we include it into result - for (const auto& obj1 : objects) { - bool isGoodResult = true; - for (const auto& obj2 : objects) { - if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence && - intersectionOverUnion(obj1, obj2) >= iou_threshold) { // if obj1 is the same as obj2, condition - // expression will evaluate to false anyway - isGoodResult = false; - break; - } - } - if (isGoodResult) { - result->objects.push_back(obj1); - } - } - } else { - // Classic postprocessing - std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) { - return x.confidence > y.confidence; - }); - for (size_t i = 0; i < objects.size(); ++i) { - if (objects[i].confidence == 0) - continue; - for (size_t j = i + 1; j < objects.size(); ++j) - if (intersectionOverUnion(objects[i], objects[j]) >= iou_threshold) - objects[j].confidence = 0; - result->objects.push_back(objects[i]); - } - } - - return std::unique_ptr(result); -} - -void ModelYolo::parseYOLOOutput(const std::string& output_name, - const ov::Tensor& tensor, - const unsigned long resized_im_h, - const unsigned long resized_im_w, - const unsigned long original_im_h, - const unsigned long original_im_w, - std::vector& objects) { - // --------------------------- Extracting layer parameters ------------------------------------- - auto it = regions.find(output_name); - if (it == regions.end()) { - throw std::runtime_error(std::string("Can't find output layer with name ") + output_name); - } - auto& region = it->second; - - int sideW = 0; - int sideH = 0; - unsigned long scaleH; - unsigned long scaleW; - switch (yoloVersion) { - case YoloVersion::YOLO_V1V2: - sideH = region.outputHeight; - sideW = region.outputWidth; - scaleW = region.outputWidth; - scaleH = region.outputHeight; - break; - case YoloVersion::YOLO_V3: - case YoloVersion::YOLO_V4: - case YoloVersion::YOLO_V4_TINY: - case YoloVersion::YOLOF: - sideH = static_cast(tensor.get_shape()[ov::layout::height_idx("NCHW")]); - sideW = static_cast(tensor.get_shape()[ov::layout::width_idx("NCHW")]); - scaleW = resized_im_w; - scaleH = resized_im_h; - break; - default: - throw std::runtime_error("Unknown YoloVersion"); - } - - auto entriesNum = sideW * sideH; - const float* outData = tensor.data(); - - auto postprocessRawData = (yoloVersion == YoloVersion::YOLO_V4 || yoloVersion == YoloVersion::YOLO_V4_TINY || - yoloVersion == YoloVersion::YOLOF) - ? sigmoid - : identity; - - // --------------------------- Parsing YOLO Region output ------------------------------------- - for (int i = 0; i < entriesNum; ++i) { - int row = i / sideW; - int col = i % sideW; - for (int n = 0; n < region.num; ++n) { - //--- Getting region data - int obj_index = calculateEntryIndex(entriesNum, - region.coords, - region.classes + isObjConf, - n * entriesNum + i, - region.coords); - int box_index = - calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0); - float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1; - - //--- Preliminary check for confidence threshold conformance - if (scale >= confidence_threshold) { - //--- Calculating scaled region's coordinates - float x, y; - if (yoloVersion == YoloVersion::YOLOF) { - x = (static_cast(col) / sideW + - outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) * - original_im_w; - y = (static_cast(row) / sideH + - outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) * - original_im_h; - } else { - x = static_cast((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW * - original_im_w); - y = static_cast((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH * - original_im_h); - } - float height = static_cast(std::exp(outData[box_index + 3 * entriesNum]) * - region.anchors[2 * n + 1] * original_im_h / scaleH); - float width = static_cast(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] * - original_im_w / scaleW); - - DetectedObject obj; - obj.x = clamp(x - width / 2, 0.f, static_cast(original_im_w)); - obj.y = clamp(y - height / 2, 0.f, static_cast(original_im_h)); - obj.width = clamp(width, 0.f, static_cast(original_im_w - obj.x)); - obj.height = clamp(height, 0.f, static_cast(original_im_h - obj.y)); - - for (size_t j = 0; j < region.classes; ++j) { - int class_index = calculateEntryIndex(entriesNum, - region.coords, - region.classes + isObjConf, - n * entriesNum + i, - region.coords + isObjConf + j); - float prob = scale * postprocessRawData(outData[class_index]); - - //--- Checking confidence threshold conformance and adding region to the list - if (prob >= confidence_threshold) { - obj.confidence = prob; - obj.labelID = j; - obj.label = getLabelName(obj.labelID); - objects.push_back(obj); - } - } - } - } - } -} - -int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) { - int n = location / totalCells; - int loc = location % totalCells; - return (n * (lcoords + lclasses) + entry) * totalCells + loc; -} - -double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) { - double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x); - double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y); - double intersectionArea = - (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth; - double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea; - return intersectionArea / unionArea; -} - -ModelYolo::Region::Region(const std::shared_ptr& regionYolo) { - coords = regionYolo->get_num_coords(); - classes = regionYolo->get_num_classes(); - auto mask = regionYolo->get_mask(); - num = mask.size(); - - auto shape = regionYolo->get_input_shape(0); - outputWidth = shape[3]; - outputHeight = shape[2]; - - if (num) { - // Parsing YoloV3 parameters - anchors.resize(num * 2); - - for (int i = 0; i < num; ++i) { - anchors[i * 2] = regionYolo->get_anchors()[mask[i] * 2]; - anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1]; - } - } else { - // Parsing YoloV2 parameters - num = regionYolo->get_num_regions(); - anchors = regionYolo->get_anchors(); - if (anchors.empty()) { - anchors = defaultAnchors[size_t(YoloVersion::YOLO_V1V2)]; - num = 5; - } - } -} - -ModelYolo::Region::Region(size_t classes, - int coords, - const std::vector& anchors, - const std::vector& masks, - size_t outputWidth, - size_t outputHeight) - : classes(classes), - coords(coords), - outputWidth(outputWidth), - outputHeight(outputHeight) { - num = masks.size(); - - if (anchors.size() == 0 || anchors.size() % 2 != 0) { - throw std::runtime_error("Explicitly initialized region should have non-empty even-sized regions vector"); - } - - if (num) { - this->anchors.resize(num * 2); - - for (int i = 0; i < num; ++i) { - this->anchors[i * 2] = anchors[masks[i] * 2]; - this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1]; - } - } else { - this->anchors = anchors; - num = anchors.size() / 2; - } -} - -std::string YOLOv5::ModelType = "YOLOv5"; - -void YOLOv5::prepareInputsOutputs(std::shared_ptr& model) { - const ov::Output& input = model->input(); - const ov::Shape& in_shape = input.get_partial_shape().get_max_shape(); - if (in_shape.size() != 4) { - throw std::runtime_error("YOLO: the rank of the input must be 4"); - } - inputNames.push_back(input.get_any_name()); - const ov::Layout& inputLayout = getInputLayout(input); - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{in_shape[ov::layout::width_idx(inputLayout)], in_shape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - - netInputWidth = in_shape[ov::layout::width_idx(inputLayout)]; - netInputHeight = in_shape[ov::layout::height_idx(inputLayout)]; - - embedded_processing = true; - } - - const ov::Output& output = model->output(); - if (ov::element::Type_t::f32 != output.get_element_type()) { - throw std::runtime_error("YOLO: the output must be of precision f32"); - } - const ov::Shape& out_shape = output.get_partial_shape().get_max_shape(); - if (3 != out_shape.size()) { - throw std::runtime_error("YOLO: the output must be of rank 3"); - } - if (!labels.empty() && labels.size() + 4 != out_shape[1]) { - throw std::runtime_error("YOLO: number of labels must be smaller than out_shape[1] by 4"); - } -} - -void YOLOv5::updateModelInfo() { - DetectionModelExt::updateModelInfo(); - model->set_rt_info(YOLOv5::ModelType, "model_info", "model_type"); - model->set_rt_info(agnostic_nms, "model_info", "agnostic_nms"); - model->set_rt_info(iou_threshold, "model_info", "iou_threshold"); -} - -void YOLOv5::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - pad_value = get_from_any_maps("pad_value", top_priority, mid_priority, 114); - if (top_priority.find("resize_type") == top_priority.end() && - mid_priority.find("resize_type") == mid_priority.end()) { - interpolationMode = cv::INTER_LINEAR; - resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX; - } - reverse_input_channels = get_from_any_maps("reverse_input_channels", top_priority, mid_priority, true); - scale_values = get_from_any_maps("scale_values", top_priority, mid_priority, std::vector{255.0f}); - confidence_threshold = get_from_any_maps("confidence_threshold", top_priority, mid_priority, 0.25f); - agnostic_nms = get_from_any_maps("agnostic_nms", top_priority, mid_priority, agnostic_nms); - iou_threshold = get_from_any_maps("iou_threshold", top_priority, mid_priority, 0.7f); -} - -YOLOv5::YOLOv5(std::shared_ptr& model, const ov::AnyMap& configuration) - : DetectionModelExt(model, configuration) { - init_from_config(configuration, model->get_rt_info("model_info")); -} - -YOLOv5::YOLOv5(std::shared_ptr& adapter) : DetectionModelExt(adapter) { - init_from_config(adapter->getModelConfig(), ov::AnyMap{}); -} - -std::unique_ptr YOLOv5::postprocess(InferenceResult& infResult) { - if (1 != infResult.outputsData.size()) { - throw std::runtime_error("YOLO: expect 1 output"); - } - const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor(); - const ov::Shape& out_shape = detectionsTensor.get_shape(); - if (3 != out_shape.size()) { - throw std::runtime_error("YOLO: the output must be of rank 3"); - } - if (1 != out_shape[0]) { - throw std::runtime_error("YOLO: the first dim of the output must be 1"); - } - size_t num_proposals = out_shape[2]; - std::vector boxes_with_class; - std::vector confidences; - const float* const detections = detectionsTensor.data(); - for (size_t i = 0; i < num_proposals; ++i) { - float confidence = 0.0f; - size_t max_id = 0; - constexpr size_t LABELS_START = 4; - for (size_t j = LABELS_START; j < out_shape[1]; ++j) { - if (detections[j * num_proposals + i] > confidence) { - confidence = detections[j * num_proposals + i]; - max_id = j; - } - } - if (confidence > confidence_threshold) { - boxes_with_class.emplace_back(detections[0 * num_proposals + i] - detections[2 * num_proposals + i] / 2.0f, - detections[1 * num_proposals + i] - detections[3 * num_proposals + i] / 2.0f, - detections[0 * num_proposals + i] + detections[2 * num_proposals + i] / 2.0f, - detections[1 * num_proposals + i] + detections[3 * num_proposals + i] / 2.0f, - max_id - LABELS_START); - confidences.push_back(confidence); - } - } - constexpr bool includeBoundaries = false; - constexpr size_t keep_top_k = 30000; - std::vector keep; - if (agnostic_nms) { - keep = nms(boxes_with_class, confidences, iou_threshold, includeBoundaries, keep_top_k); - } else { - keep = multiclass_nms(boxes_with_class, confidences, iou_threshold, includeBoundaries, keep_top_k); - } - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - auto base = std::unique_ptr(result); - const auto& internalData = infResult.internalModelData->asRef(); - float floatInputImgWidth = float(internalData.inputImgWidth), - floatInputImgHeight = float(internalData.inputImgHeight); - float invertedScaleX = floatInputImgWidth / netInputWidth, invertedScaleY = floatInputImgHeight / netInputHeight; - int padLeft = 0, padTop = 0; - if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY); - if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2; - padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2; - } - } - for (size_t idx : keep) { - DetectedObject desc; - desc.x = clamp(round((boxes_with_class[idx].left - padLeft) * invertedScaleX), 0.f, floatInputImgWidth); - desc.y = clamp(round((boxes_with_class[idx].top - padTop) * invertedScaleY), 0.f, floatInputImgHeight); - desc.width = - clamp(round((boxes_with_class[idx].right - padLeft) * invertedScaleX), 0.f, floatInputImgWidth) - desc.x; - desc.height = - clamp(round((boxes_with_class[idx].bottom - padTop) * invertedScaleY), 0.f, floatInputImgHeight) - desc.y; - desc.confidence = confidences[idx]; - desc.labelID = static_cast(boxes_with_class[idx].labelID); - desc.label = getLabelName(desc.labelID); - result->objects.push_back(desc); - } - return base; -} - -std::string YOLOv8::ModelType = "YOLOv8"; diff --git a/src/cpp/models/src/detection_model_yolov3_onnx.cpp b/src/cpp/models/src/detection_model_yolov3_onnx.cpp deleted file mode 100644 index 68830220..00000000 --- a/src/cpp/models/src/detection_model_yolov3_onnx.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/detection_model_yolov3_onnx.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/image_utils.h" - -void ModelYoloV3ONNX::initDefaultParameters(const ov::AnyMap&) { - resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX; // Ignore configuration for now - useAutoResize = false; -} - -ModelYoloV3ONNX::ModelYoloV3ONNX(std::shared_ptr& model, const ov::AnyMap& configuration) - : DetectionModel(model, configuration) { - initDefaultParameters(configuration); -} - -ModelYoloV3ONNX::ModelYoloV3ONNX(std::shared_ptr& adapter) : DetectionModel(adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - initDefaultParameters(configuration); -} - -void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output ------------------------------------------------- - // --------------------------- Prepare inputs ------------------------------------------------------ - const ov::OutputVector& inputs = model->inputs(); - if (inputs.size() != 2) { - throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs"); - } - - ov::preprocess::PrePostProcessor ppp(model); - inputNames.reserve(inputs.size()); - for (auto& input : inputs) { - const ov::Shape& currentShape = input.get_shape(); - std::string currentName = input.get_any_name(); - const ov::Layout& currentLayout = getInputLayout(input); - - if (currentShape.size() == 4) { - if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) { - throw std::logic_error("Expected 4D image input with 3 channels"); - } - inputNames[0] = currentName; - netInputWidth = currentShape[ov::layout::width_idx(currentLayout)]; - netInputHeight = currentShape[ov::layout::height_idx(currentLayout)]; - ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); - } else if (currentShape.size() == 2) { - if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) { - throw std::logic_error("Expected 2D image info input with 2 channels"); - } - inputNames[1] = currentName; - ppp.input(currentName).tensor().set_element_type(ov::element::i32); - } - ppp.input(currentName).model().set_layout(currentLayout); - } - - // --------------------------- Prepare outputs ----------------------------------------------------- - const ov::OutputVector& outputs = model->outputs(); - if (outputs.size() != 3) { - throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs"); - } - - for (auto& output : outputs) { - const ov::Shape& currentShape = output.get_partial_shape().get_max_shape(); - std::string currentName = output.get_any_name(); - if (currentShape.back() == 3) { - indicesOutputName = currentName; - ppp.output(currentName).tensor().set_element_type(ov::element::i32); - } else if (currentShape[2] == 4) { - boxesOutputName = currentName; - ppp.output(currentName).tensor().set_element_type(ov::element::f32); - } else if (currentShape[1] == numberOfClasses) { - scoresOutputName = currentName; - ppp.output(currentName).tensor().set_element_type(ov::element::f32); - } else { - throw std::logic_error("Expected shapes [:,:,4], [:," + std::to_string(numberOfClasses) + - ",:] and [:,3] for outputs"); - } - outputNames.push_back(currentName); - } - model = ppp.build(); -} - -std::shared_ptr ModelYoloV3ONNX::preprocess(const InputData& inputData, InferenceInput& input) { - const auto& origImg = inputData.asRef().inputImage; - ov::Tensor info{ov::element::i32, ov::Shape({1, 2})}; - int32_t* data = info.data(); - data[0] = origImg.rows; - data[1] = origImg.cols; - input.emplace(inputNames[1], std::move(info)); - return BaseModel::preprocess(inputData, input); -} - -namespace { -float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) { - const float* scoresPtr = scoresTensor.data(); - const auto shape = scoresTensor.get_shape(); - size_t N = shape[2]; - - return scoresPtr[classInd * N + boxInd]; -} -} // namespace - -std::unique_ptr ModelYoloV3ONNX::postprocess(InferenceResult& infResult) { - // Get info about input image - const auto imgWidth = infResult.internalModelData->asRef().inputImgWidth; - const auto imgHeight = infResult.internalModelData->asRef().inputImgHeight; - - // Get outputs tensors - const ov::Tensor& boxes = infResult.outputsData[boxesOutputName]; - const float* boxesPtr = boxes.data(); - - const ov::Tensor& scores = infResult.outputsData[scoresOutputName]; - const ov::Tensor& indices = infResult.outputsData[indicesOutputName]; - - const int* indicesData = indices.data(); - const auto indicesShape = indices.get_shape(); - const auto boxShape = boxes.get_shape(); - - // Generate detection results - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0]; - size_t indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1]; - - for (size_t i = 0; i < numberOfBoxes; ++i) { - int batchInd = indicesData[i * indicesStride]; - int classInd = indicesData[i * indicesStride + 1]; - int boxInd = indicesData[i * indicesStride + 2]; - - if (batchInd == -1) { - break; - } - - float score = getScore(scores, classInd, boxInd); - - if (score > confidence_threshold) { - DetectedObject obj; - size_t startPos = boxShape[2] * boxInd; - - auto x = boxesPtr[startPos + 1]; - auto y = boxesPtr[startPos]; - auto width = boxesPtr[startPos + 3] - x; - auto height = boxesPtr[startPos + 2] - y; - - // Create new detected box - obj.x = clamp(x, 0.f, static_cast(imgWidth)); - obj.y = clamp(y, 0.f, static_cast(imgHeight)); - obj.height = clamp(height, 0.f, static_cast(imgHeight)); - obj.width = clamp(width, 0.f, static_cast(imgWidth)); - obj.confidence = score; - obj.labelID = classInd; - obj.label = getLabelName(classInd); - - result->objects.push_back(obj); - } - } - - return std::unique_ptr(result); -} diff --git a/src/cpp/models/src/detection_model_yolox.cpp b/src/cpp/models/src/detection_model_yolox.cpp deleted file mode 100644 index 3c4df1fe..00000000 --- a/src/cpp/models/src/detection_model_yolox.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/detection_model_yolox.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/image_utils.h" -#include "utils/nms.hpp" - -std::string ModelYoloX::ModelType = "yolox"; - -void ModelYoloX::initDefaultParameters(const ov::AnyMap&) { - resizeMode = RESIZE_KEEP_ASPECT; // Ignore configuration for now - useAutoResize = false; -} - -ModelYoloX::ModelYoloX(std::shared_ptr& model, const ov::AnyMap& configuration) - : DetectionModelExt(model, configuration) { - initDefaultParameters(configuration); -} - -ModelYoloX::ModelYoloX(std::shared_ptr& adapter) : DetectionModelExt(adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - initDefaultParameters(configuration); -} - -void ModelYoloX::updateModelInfo() { - DetectionModelExt::updateModelInfo(); - - model->set_rt_info(ModelYoloX::ModelType, "model_info", "model_type"); -} - -void ModelYoloX::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output ------------------------------------------------- - // --------------------------- Prepare input ------------------------------------------------------ - const ov::OutputVector& inputs = model->inputs(); - if (inputs.size() != 1) { - throw std::logic_error("YOLOX model wrapper accepts models that have only 1 input"); - } - - //--- Check image input - const auto& input = model->input(); - const ov::Shape& inputShape = model->input().get_shape(); - ov::Layout inputLayout = getInputLayout(input); - - if (inputShape.size() != 4 && inputShape[ov::layout::channels_idx(inputLayout)] != 3) { - throw std::logic_error("Expected 4D image input with 3 channels"); - } - - ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); - - ppp.input().model().set_layout(inputLayout); - - //--- Reading image input parameters - inputNames.push_back(input.get_any_name()); - netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; - netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; - setStridesGrids(); - - // --------------------------- Prepare output ----------------------------------------------------- - if (model->outputs().size() != 1) { - throw std::logic_error("YoloX model wrapper expects models that have only 1 output"); - } - const auto& output = model->output(); - outputNames.push_back(output.get_any_name()); - const ov::Shape& shape = output.get_shape(); - - if (shape.size() != 3) { - throw std::logic_error("YOLOX single output must have 3 dimensions, but had " + std::to_string(shape.size())); - } - ppp.output().tensor().set_element_type(ov::element::f32); - - model = ppp.build(); -} - -void ModelYoloX::setStridesGrids() { - std::vector strides = {8, 16, 32}; - std::vector hsizes(3); - std::vector wsizes(3); - - for (size_t i = 0; i < strides.size(); ++i) { - hsizes[i] = netInputHeight / strides[i]; - wsizes[i] = netInputWidth / strides[i]; - } - - for (size_t size_index = 0; size_index < hsizes.size(); ++size_index) { - for (size_t h_index = 0; h_index < hsizes[size_index]; ++h_index) { - for (size_t w_index = 0; w_index < wsizes[size_index]; ++w_index) { - grids.emplace_back(w_index, h_index); - expandedStrides.push_back(strides[size_index]); - } - } - } -} - -std::shared_ptr ModelYoloX::preprocess(const InputData& inputData, InferenceInput& input) { - const auto& origImg = inputData.asRef().inputImage; - float scale = - std::min(static_cast(netInputWidth) / origImg.cols, static_cast(netInputHeight) / origImg.rows); - - cv::Mat resizedImage = resizeImageExt(origImg, - netInputWidth, - netInputHeight, - resizeMode, - interpolationMode, - nullptr, - cv::Scalar(114, 114, 114)); - - input.emplace(inputNames[0], wrapMat2Tensor(resizedImage)); - return std::make_shared(origImg.cols, origImg.rows, scale, scale); -} - -std::unique_ptr ModelYoloX::postprocess(InferenceResult& infResult) { - // Get metadata about input image shape and scale - const auto& scale = infResult.internalModelData->asRef(); - - // Get output tensor - const ov::Tensor& output = infResult.outputsData[outputNames[0]]; - const auto& outputShape = output.get_shape(); - float* outputPtr = output.data(); - - // Generate detection results - DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); - - // Update coordinates according to strides - for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) { - size_t startPos = outputShape[2] * box_index; - outputPtr[startPos] = (outputPtr[startPos] + grids[box_index].first) * expandedStrides[box_index]; - outputPtr[startPos + 1] = (outputPtr[startPos + 1] + grids[box_index].second) * expandedStrides[box_index]; - outputPtr[startPos + 2] = std::exp(outputPtr[startPos + 2]) * expandedStrides[box_index]; - outputPtr[startPos + 3] = std::exp(outputPtr[startPos + 3]) * expandedStrides[box_index]; - } - - // Filter predictions - std::vector validBoxes; - std::vector scores; - std::vector classes; - for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) { - size_t startPos = outputShape[2] * box_index; - float score = outputPtr[startPos + 4]; - if (score < confidence_threshold) - continue; - float maxClassScore = -1; - size_t mainClass = 0; - for (size_t class_index = 0; class_index < numberOfClasses; ++class_index) { - if (outputPtr[startPos + 5 + class_index] > maxClassScore) { - maxClassScore = outputPtr[startPos + 5 + class_index]; - mainClass = class_index; - } - } - - // Filter by score - score *= maxClassScore; - if (score < confidence_threshold) - continue; - - // Add successful boxes - scores.push_back(score); - classes.push_back(mainClass); - Anchor trueBox = {outputPtr[startPos + 0] - outputPtr[startPos + 2] / 2, - outputPtr[startPos + 1] - outputPtr[startPos + 3] / 2, - outputPtr[startPos + 0] + outputPtr[startPos + 2] / 2, - outputPtr[startPos + 1] + outputPtr[startPos + 3] / 2}; - validBoxes.push_back(Anchor({trueBox.left / scale.scaleX, - trueBox.top / scale.scaleY, - trueBox.right / scale.scaleX, - trueBox.bottom / scale.scaleY})); - } - - // NMS for valid boxes - const std::vector& keep = nms(validBoxes, scores, iou_threshold, true); - for (size_t index : keep) { - // Create new detected box - DetectedObject obj; - obj.x = clamp(validBoxes[index].left, 0.f, static_cast(scale.inputImgWidth)); - obj.y = clamp(validBoxes[index].top, 0.f, static_cast(scale.inputImgHeight)); - obj.height = - clamp(validBoxes[index].bottom - validBoxes[index].top, 0.f, static_cast(scale.inputImgHeight)); - obj.width = - clamp(validBoxes[index].right - validBoxes[index].left, 0.f, static_cast(scale.inputImgWidth)); - obj.confidence = scores[index]; - obj.labelID = classes[index]; - obj.label = getLabelName(classes[index]); - result->objects.push_back(obj); - } - - return std::unique_ptr(result); -} diff --git a/src/cpp/models/src/instance_segmentation.cpp b/src/cpp/models/src/instance_segmentation.cpp deleted file mode 100644 index 384fb057..00000000 --- a/src/cpp/models/src/instance_segmentation.cpp +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/instance_segmentation.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/common.hpp" - -namespace { -constexpr char saliency_map_name[]{"saliency_map"}; -constexpr char feature_vector_name[]{"feature_vector"}; - -void append_xai_names(const std::vector>& outputs, std::vector& outputNames) { - for (const ov::Output& output : outputs) { - if (output.get_names().count(saliency_map_name) > 0) { - outputNames.emplace_back(saliency_map_name); - } else if (output.get_names().count(feature_vector_name) > 0) { - outputNames.push_back(feature_vector_name); - } - } -} - -cv::Rect expand_box(const cv::Rect2f& box, float scale) { - float w_half = box.width * 0.5f * scale, h_half = box.height * 0.5f * scale; - const cv::Point2f& center = (box.tl() + box.br()) * 0.5f; - return {cv::Point(int(center.x - w_half), int(center.y - h_half)), - cv::Point(int(center.x + w_half), int(center.y + h_half))}; -} - -std::vector> average_and_normalize(const std::vector>& saliency_maps) { - std::vector> aggregated; - aggregated.reserve(saliency_maps.size()); - for (const std::vector& per_object_maps : saliency_maps) { - if (per_object_maps.empty()) { - aggregated.emplace_back(); - } else { - cv::Mat_ saliency_map{per_object_maps.front().size()}; - for (const cv::Mat& per_object_map : per_object_maps) { - if (saliency_map.size != per_object_map.size) { - throw std::runtime_error("saliency_maps must have same size"); - } - if (per_object_map.channels() != 1) { - throw std::runtime_error("saliency_maps must have one channel"); - } - if (per_object_map.type() != CV_8U) { - throw std::runtime_error("saliency_maps must have type CV_8U"); - } - } - for (int row = 0; row < saliency_map.rows; ++row) { - for (int col = 0; col < saliency_map.cols; ++col) { - std::uint8_t max_val = 0; - for (const cv::Mat& per_object_map : per_object_maps) { - max_val = std::max(max_val, per_object_map.at(row, col)); - } - saliency_map.at(row, col) = max_val; - } - } - double min, max; - cv::minMaxLoc(saliency_map, &min, &max); - cv::Mat_ converted; - saliency_map.convertTo(converted, CV_8U, 255.0 / (max + 1e-12)); - aggregated.push_back(std::move(converted)); - } - } - return aggregated; -} - -struct Lbm { - ov::Tensor labels, boxes, masks; -}; - -Lbm filterTensors(const std::map& infResult) { - Lbm lbm; - for (const auto& pair : infResult) { - if (pair.first == saliency_map_name || pair.first == feature_vector_name) { - continue; - } - switch (pair.second.get_shape().size()) { - case 2: - lbm.labels = pair.second; - break; - case 3: - lbm.boxes = pair.second; - break; - case 4: - lbm.masks = pair.second; - break; - case 0: - break; - default: - throw std::runtime_error("Unexpected result: " + pair.first); - } - } - return lbm; -} -} // namespace - -cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, int im_h, int im_w) { - // Add zero border to prevent upsampling artifacts on segment borders. - cv::Mat raw_cls_mask; - cv::copyMakeBorder(unpadded, raw_cls_mask, 1, 1, 1, 1, cv::BORDER_CONSTANT, {0}); - cv::Rect extended_box = expand_box(box, float(raw_cls_mask.cols) / (raw_cls_mask.cols - 2)); - - int w = std::max(extended_box.width + 1, 1); - int h = std::max(extended_box.height + 1, 1); - int x0 = clamp(extended_box.x, 0, im_w); - int y0 = clamp(extended_box.y, 0, im_h); - int x1 = clamp(extended_box.x + extended_box.width + 1, 0, im_w); - int y1 = clamp(extended_box.y + extended_box.height + 1, 0, im_h); - - cv::Mat resized; - cv::resize(raw_cls_mask, resized, {w, h}); - cv::Mat im_mask(cv::Size{im_w, im_h}, CV_8UC1, cv::Scalar{0}); - im_mask(cv::Rect{x0, y0, x1 - x0, y1 - y0}) - .setTo(1, - resized({cv::Point(x0 - extended_box.x, y0 - extended_box.y), - cv::Point(x1 - extended_box.x, y1 - extended_box.y)}) > 0.5f); - return im_mask; -} - -std::string MaskRCNNModel::ModelType = "MaskRCNN"; - -void MaskRCNNModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - confidence_threshold = get_from_any_maps("confidence_threshold", top_priority, mid_priority, confidence_threshold); - postprocess_semantic_masks = - get_from_any_maps("postprocess_semantic_masks", top_priority, mid_priority, postprocess_semantic_masks); -} - -MaskRCNNModel::MaskRCNNModel(std::shared_ptr& model, const ov::AnyMap& configuration) - : BaseModel(model, configuration) { - init_from_config(configuration, - model->has_rt_info("model_info") ? model->get_rt_info("model_info") : ov::AnyMap{}); -} - -MaskRCNNModel::MaskRCNNModel(std::shared_ptr& adapter, const ov::AnyMap& configuration) - : BaseModel(adapter, configuration) { - init_from_config(configuration, adapter->getModelConfig()); -} - -std::unique_ptr MaskRCNNModel::create_model(const std::string& modelFile, - const ov::AnyMap& configuration, - bool preload, - const std::string& device) { - auto core = ov::Core(); - std::shared_ptr model = core.read_model(modelFile); - - // Check model_type in the rt_info, ignore configuration - std::string model_type = MaskRCNNModel::ModelType; - try { - if (model->has_rt_info("model_info", "model_type")) { - model_type = model->get_rt_info("model_info", "model_type"); - } - } catch (const std::exception&) { - slog::warn << "Model type is not specified in the rt_info, use default model type: " << model_type - << slog::endl; - } - - if (model_type != MaskRCNNModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + - model_type); - } - - std::unique_ptr segmentor{new MaskRCNNModel(model, configuration)}; - segmentor->prepare(); - if (preload) { - segmentor->load(core, device); - } - return segmentor; -} - -std::unique_ptr MaskRCNNModel::create_model(std::shared_ptr& adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto model_type_iter = configuration.find("model_type"); - std::string model_type = MaskRCNNModel::ModelType; - if (model_type_iter != configuration.end()) { - model_type = model_type_iter->second.as(); - } - - if (model_type != MaskRCNNModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided: " + model_type); - } - - std::unique_ptr segmentor{new MaskRCNNModel(adapter)}; - return segmentor; -} - -void MaskRCNNModel::updateModelInfo() { - BaseModel::updateModelInfo(); - - model->set_rt_info(MaskRCNNModel::ModelType, "model_info", "model_type"); - model->set_rt_info(confidence_threshold, "model_info", "confidence_threshold"); - model->set_rt_info(postprocess_semantic_masks, "model_info", "postprocess_semantic_masks"); -} - -void MaskRCNNModel::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output --------------------------------------------- - // --------------------------- Prepare input ----------------------------------------------------- - if (model->inputs().size() != 1) { - throw std::logic_error("MaskRCNNModel model wrapper supports topologies with only 1 input"); - } - const auto& input = model->input(); - inputNames.push_back(input.get_any_name()); - - const ov::Layout& inputLayout = getInputLayout(input); - const ov::Shape& inputShape = input.get_partial_shape().get_max_shape(); - if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { - throw std::logic_error("3-channel 4-dimensional model's input is expected"); - } - - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{inputShape[ov::layout::width_idx(inputLayout)], inputShape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - - netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; - netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; - useAutoResize = true; // temporal solution - embedded_processing = true; - } - - // --------------------------- Prepare output ----------------------------------------------------- - struct NameRank { - std::string name; - size_t rank; - }; - std::vector filtered; - filtered.reserve(3); - for (ov::Output& output : model->outputs()) { - const std::unordered_set& out_names = output.get_names(); - if (out_names.find(saliency_map_name) == out_names.end() && - out_names.find(feature_vector_name) == out_names.end()) { - filtered.push_back({output.get_any_name(), output.get_partial_shape().get_max_shape().size()}); - } - } - if (filtered.size() != 3 && filtered.size() != 4) { - throw std::logic_error(std::string{"MaskRCNNModel model wrapper supports topologies with "} + - saliency_map_name + ", " + feature_vector_name + " and 3 or 4 other outputs"); - } - outputNames.resize(3); - for (const NameRank& name_rank : filtered) { - switch (name_rank.rank) { - case 2: - outputNames[0] = name_rank.name; - break; - case 3: - outputNames[1] = name_rank.name; - break; - case 4: - outputNames[2] = name_rank.name; - break; - case 0: - break; - default: - throw std::runtime_error("Unexpected output: " + name_rank.name); - } - } - append_xai_names(model->outputs(), outputNames); -} - -std::unique_ptr MaskRCNNModel::postprocess(InferenceResult& infResult) { - const auto& internalData = infResult.internalModelData->asRef(); - float floatInputImgWidth = float(internalData.inputImgWidth), - floatInputImgHeight = float(internalData.inputImgHeight); - float invertedScaleX = floatInputImgWidth / netInputWidth, invertedScaleY = floatInputImgHeight / netInputHeight; - int padLeft = 0, padTop = 0; - if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY); - if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2; - padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2; - } - } - const Lbm& lbm = filterTensors(infResult.outputsData); - const int64_t* const labels_tensor_ptr = lbm.labels.data(); - const float* const boxes = lbm.boxes.data(); - size_t objectSize = lbm.boxes.get_shape().back(); - float* const masks = lbm.masks.data(); - const cv::Size& masks_size{int(lbm.masks.get_shape()[3]), int(lbm.masks.get_shape()[2])}; - InstanceSegmentationResult* result = new InstanceSegmentationResult(infResult.frameId, infResult.metaData); - auto retVal = std::unique_ptr(result); - std::vector> saliency_maps; - bool has_feature_vector_name = - std::find(outputNames.begin(), outputNames.end(), feature_vector_name) != outputNames.end(); - if (has_feature_vector_name) { - if (labels.empty()) { - throw std::runtime_error("Can't get number of classes because labels are empty"); - } - saliency_maps.resize(labels.size()); - } - for (size_t i = 0; i < lbm.labels.get_size(); ++i) { - float confidence = boxes[i * objectSize + 4]; - if (confidence <= confidence_threshold && !has_feature_vector_name) { - continue; - } - SegmentedObject obj; - - obj.confidence = confidence; - obj.labelID = labels_tensor_ptr[i] + 1; - if (!labels.empty() && obj.labelID >= labels.size()) { - continue; - } - obj.label = getLabelName(obj.labelID); - - obj.x = clamp(round((boxes[i * objectSize + 0] - padLeft) * invertedScaleX), 0.f, floatInputImgWidth); - obj.y = clamp(round((boxes[i * objectSize + 1] - padTop) * invertedScaleY), 0.f, floatInputImgHeight); - obj.width = - clamp(round((boxes[i * objectSize + 2] - padLeft) * invertedScaleX - obj.x), 0.f, floatInputImgWidth); - obj.height = - clamp(round((boxes[i * objectSize + 3] - padTop) * invertedScaleY - obj.y), 0.f, floatInputImgHeight); - - if (obj.height * obj.width <= 1) { - continue; - } - - cv::Mat raw_cls_mask{masks_size, CV_32F, masks + masks_size.area() * i}; - cv::Mat resized_mask; - if (postprocess_semantic_masks || has_feature_vector_name) { - resized_mask = segm_postprocess(obj, raw_cls_mask, internalData.inputImgHeight, internalData.inputImgWidth); - } else { - resized_mask = raw_cls_mask; - } - obj.mask = postprocess_semantic_masks ? resized_mask : raw_cls_mask.clone(); - if (confidence > confidence_threshold) { - result->segmentedObjects.push_back(obj); - } - if (has_feature_vector_name && confidence > confidence_threshold) { - saliency_maps[obj.labelID - 1].push_back(resized_mask); - } - } - result->saliency_map = average_and_normalize(saliency_maps); - if (has_feature_vector_name) { - result->feature_vector = std::move(infResult.outputsData[feature_vector_name]); - } - return retVal; -} - -std::unique_ptr MaskRCNNModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); -} - -std::vector> MaskRCNNModel::inferBatch( - const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> isegResults; - isegResults.reserve(results.size()); - for (auto& result : results) { - isegResults.emplace_back(static_cast(result.release())); - } - return isegResults; -} diff --git a/src/cpp/models/src/keypoint_detection.cpp b/src/cpp/models/src/keypoint_detection.cpp deleted file mode 100644 index 4fbe778c..00000000 --- a/src/cpp/models/src/keypoint_detection.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (C) 2020-2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/keypoint_detection.h" - -#include -#include -#include -#include -#include - -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/slog.hpp" - -namespace { - -void colArgMax(const cv::Mat& src, - cv::Mat& dst_locs, - cv::Mat& dst_values, - bool apply_softmax = false, - float eps = 1e-6f) { - dst_locs = cv::Mat::zeros(src.rows, 1, CV_32S); - dst_values = cv::Mat::zeros(src.rows, 1, CV_32F); - - for (int row = 0; row < src.rows; ++row) { - const float* ptr_row = src.ptr(row); - int max_val_idx = 0; - float max_val = ptr_row[0]; - for (int col = 1; col < src.cols; ++col) { - if (ptr_row[col] > max_val) { - max_val_idx = col; - dst_locs.at(row) = max_val_idx; - max_val = ptr_row[col]; - } - } - - if (apply_softmax) { - float sum = 0.0f; - for (int col = 0; col < src.cols; ++col) { - sum += exp(ptr_row[col] - max_val); - } - dst_values.at(row) = exp(ptr_row[max_val_idx] - max_val) / (sum + eps); - } else { - dst_values.at(row) = max_val; - } - } -} - -DetectedKeypoints decode_simcc(const cv::Mat& simcc_x, - const cv::Mat& simcc_y, - const cv::Point2f& extra_scale = cv::Point2f(1.f, 1.f), - const cv::Point2i& extra_offset = cv::Point2f(0.f, 0.f), - bool apply_softmax = false, - float simcc_split_ratio = 2.0f, - float decode_beta = 150.0f, - float sigma = 6.0f) { - cv::Mat x_locs, max_val_x; - colArgMax(simcc_x, x_locs, max_val_x, false); - - cv::Mat y_locs, max_val_y; - colArgMax(simcc_y, y_locs, max_val_y, false); - - if (apply_softmax) { - cv::Mat tmp_locs; - colArgMax(decode_beta * sigma * simcc_x, tmp_locs, max_val_x, true); - colArgMax(decode_beta * sigma * simcc_y, tmp_locs, max_val_y, true); - } - - std::vector keypoints(x_locs.rows); - cv::Mat scores = cv::Mat::zeros(x_locs.rows, 1, CV_32F); - for (int i = 0; i < x_locs.rows; ++i) { - keypoints[i] = cv::Point2f((x_locs.at(i) - extra_offset.x) * extra_scale.x, - (y_locs.at(i) - extra_offset.y) * extra_scale.y) / - simcc_split_ratio; - scores.at(i) = std::min(max_val_x.at(i), max_val_y.at(i)); - - if (scores.at(i) <= 0.f) { - keypoints[i] = cv::Point2f(-1.f, -1.f); - } - } - - return {std::move(keypoints), scores}; -} - -} // namespace - -std::string KeypointDetectionModel::ModelType = "keypoint_detection"; - -void KeypointDetectionModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - labels = get_from_any_maps("labels", top_priority, mid_priority, labels); - apply_softmax = get_from_any_maps("apply_softmax", top_priority, mid_priority, apply_softmax); -} - -KeypointDetectionModel::KeypointDetectionModel(std::shared_ptr& model, const ov::AnyMap& configuration) - : BaseModel(model, configuration) { - init_from_config(configuration, - model->has_rt_info("model_info") ? model->get_rt_info("model_info") : ov::AnyMap{}); -} - -KeypointDetectionModel::KeypointDetectionModel(std::shared_ptr& adapter, - const ov::AnyMap& configuration) - : BaseModel(adapter, configuration) { - init_from_config(configuration, adapter->getModelConfig()); -} - -std::unique_ptr KeypointDetectionModel::create_model(const std::string& modelFile, - const ov::AnyMap& configuration, - bool preload, - const std::string& device) { - auto core = ov::Core(); - std::shared_ptr model = core.read_model(modelFile); - - // Check model_type in the rt_info, ignore configuration - std::string model_type = KeypointDetectionModel::ModelType; - try { - if (model->has_rt_info("model_info", "model_type")) { - model_type = model->get_rt_info("model_info", "model_type"); - } - } catch (const std::exception&) { - slog::warn << "Model type is not specified in the rt_info, use default model type: " << model_type - << slog::endl; - } - - if (model_type != KeypointDetectionModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + - model_type); - } - - std::unique_ptr kp_detector{new KeypointDetectionModel(model, configuration)}; - kp_detector->prepare(); - if (preload) { - kp_detector->load(core, device); - } - return kp_detector; -} - -std::unique_ptr KeypointDetectionModel::create_model( - std::shared_ptr& adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto model_type_iter = configuration.find("model_type"); - std::string model_type = KeypointDetectionModel::ModelType; - if (model_type_iter != configuration.end()) { - model_type = model_type_iter->second.as(); - } - - if (model_type != KeypointDetectionModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided: " + model_type); - } - - std::unique_ptr kp_detector{new KeypointDetectionModel(adapter)}; - return kp_detector; -} - -void KeypointDetectionModel::updateModelInfo() { - BaseModel::updateModelInfo(); - - model->set_rt_info(KeypointDetectionModel::ModelType, "model_info", "model_type"); - model->set_rt_info(labels, "model_info", "labels"); -} - -void KeypointDetectionModel::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output --------------------------------------------- - // --------------------------- Prepare input ----------------------------------------------------- - if (model->inputs().size() != 1) { - throw std::logic_error(KeypointDetectionModel::ModelType + - " model wrapper supports topologies with only 1 input"); - } - const auto& input = model->input(); - inputNames.push_back(input.get_any_name()); - const ov::Layout& inputLayout = getInputLayout(input); - const ov::Shape& inputShape = input.get_partial_shape().get_max_shape(); - if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { - throw std::logic_error("3-channel 4-dimensional model's input is expected"); - } - - if (model->outputs().size() != 2) { - throw std::logic_error(KeypointDetectionModel::ModelType + " model wrapper supports topologies with 2 outputs"); - } - - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{inputShape[ov::layout::width_idx(inputLayout)], inputShape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - - ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); - model = ppp.build(); - embedded_processing = true; - useAutoResize = true; - netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; - netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; - } - - for (ov::Output& output : model->outputs()) { - outputNames.push_back(output.get_any_name()); - } -} - -std::unique_ptr KeypointDetectionModel::postprocess(InferenceResult& infResult) { - KeypointDetectionResult* result = new KeypointDetectionResult(infResult.frameId, infResult.metaData); - - const ov::Tensor& pred_x_tensor = infResult.outputsData.find(outputNames[0])->second; - size_t shape_offset = pred_x_tensor.get_shape().size() == 3 ? 1 : 0; - auto pred_x_mat = cv::Mat(cv::Size(static_cast(pred_x_tensor.get_shape()[shape_offset + 1]), - static_cast(pred_x_tensor.get_shape()[shape_offset])), - CV_32F, - pred_x_tensor.data(), - pred_x_tensor.get_strides()[shape_offset]); - - const ov::Tensor& pred_y_tensor = infResult.outputsData.find(outputNames[1])->second; - shape_offset = pred_y_tensor.get_shape().size() == 3 ? 1 : 0; - auto pred_y_mat = cv::Mat(cv::Size(static_cast(pred_y_tensor.get_shape()[shape_offset + 1]), - static_cast(pred_y_tensor.get_shape()[shape_offset])), - CV_32F, - pred_y_tensor.data(), - pred_y_tensor.get_strides()[shape_offset]); - - const auto& image_data = infResult.internalModelData->asRef(); - float inverted_scale_x = static_cast(image_data.inputImgWidth) / netInputWidth, - inverted_scale_y = static_cast(image_data.inputImgHeight) / netInputHeight; - - int pad_left = 0, pad_top = 0; - if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - inverted_scale_x = inverted_scale_y = std::max(inverted_scale_x, inverted_scale_y); - if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) { - pad_left = (netInputWidth - - static_cast(std::round(static_cast(image_data.inputImgWidth) / inverted_scale_x))) / - 2; - pad_top = (netInputHeight - - static_cast(std::round(static_cast(image_data.inputImgHeight) / inverted_scale_y))) / - 2; - } - } - - result->poses.emplace_back( - decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, {pad_left, pad_top}, apply_softmax)); - - return std::unique_ptr(result); -} - -std::unique_ptr KeypointDetectionModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); -} - -std::vector> KeypointDetectionModel::inferBatch( - const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> kpDetResults; - kpDetResults.reserve(results.size()); - for (auto& result : results) { - kpDetResults.emplace_back(static_cast(result.release())); - } - return kpDetResults; -} diff --git a/src/cpp/models/src/segmentation_model.cpp b/src/cpp/models/src/segmentation_model.cpp deleted file mode 100644 index deea1c87..00000000 --- a/src/cpp/models/src/segmentation_model.cpp +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "models/segmentation_model.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "models/input_data.h" -#include "models/internal_model_data.h" -#include "models/results.h" -#include "utils/slog.hpp" - -namespace { -constexpr char feature_vector_name[]{"feature_vector"}; - -cv::Mat get_activation_map(const cv::Mat& features) { - double min_soft_score, max_soft_score; - cv::minMaxLoc(features, &min_soft_score, &max_soft_score); - double factor = 255.0 / (max_soft_score - min_soft_score + 1e-12); - - cv::Mat int_act_map; - features.convertTo(int_act_map, CV_8U, factor, -min_soft_score * factor); - return int_act_map; -} -} // namespace - -cv::Mat create_hard_prediction_from_soft_prediction(const cv::Mat& soft_prediction, - float soft_threshold, - int blur_strength) { - if (soft_prediction.channels() == 1) { - return soft_prediction; - } - - cv::Mat soft_prediction_blurred = soft_prediction.clone(); - - bool applyBlurAndSoftThreshold = (blur_strength > -1 && soft_threshold < std::numeric_limits::infinity()); - if (applyBlurAndSoftThreshold) { - cv::blur(soft_prediction_blurred, soft_prediction_blurred, cv::Size{blur_strength, blur_strength}); - } - - cv::Mat hard_prediction{cv::Size{soft_prediction_blurred.cols, soft_prediction_blurred.rows}, CV_8UC1}; - for (int i = 0; i < soft_prediction_blurred.rows; ++i) { - for (int j = 0; j < soft_prediction_blurred.cols; ++j) { - float max_prob = -std::numeric_limits::infinity(); - if (applyBlurAndSoftThreshold) { - max_prob = soft_threshold; - } - - uint8_t max_id = 0; - for (int c = 0; c < soft_prediction_blurred.channels(); ++c) { - float prob = ((float*)soft_prediction_blurred.ptr(i, j))[c]; - if (prob > max_prob) { - max_prob = prob; - max_id = c; - } - } - hard_prediction.at(i, j) = max_id; - } - } - return hard_prediction; -} - -std::string SegmentationModel::ModelType = "Segmentation"; - -void SegmentationModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { - blur_strength = get_from_any_maps("blur_strength", top_priority, mid_priority, blur_strength); - soft_threshold = get_from_any_maps("soft_threshold", top_priority, mid_priority, soft_threshold); - return_soft_prediction = - get_from_any_maps("return_soft_prediction", top_priority, mid_priority, return_soft_prediction); -} - -SegmentationModel::SegmentationModel(std::shared_ptr& model, const ov::AnyMap& configuration) - : BaseModel(model, configuration) { - init_from_config(configuration, - model->has_rt_info("model_info") ? model->get_rt_info("model_info") : ov::AnyMap{}); -} - -SegmentationModel::SegmentationModel(std::shared_ptr& adapter, const ov::AnyMap& configuration) - : BaseModel(adapter, configuration) { - init_from_config(configuration, adapter->getModelConfig()); -} - -std::unique_ptr SegmentationModel::create_model(const std::string& modelFile, - const ov::AnyMap& configuration, - bool preload, - const std::string& device) { - auto core = ov::Core(); - std::shared_ptr model = core.read_model(modelFile); - - // Check model_type in the rt_info, ignore configuration - std::string model_type = SegmentationModel::ModelType; - try { - if (model->has_rt_info("model_info", "model_type")) { - model_type = model->get_rt_info("model_info", "model_type"); - } - } catch (const std::exception&) { - slog::warn << "Model type is not specified in the rt_info, use default model type: " << model_type - << slog::endl; - } - - if (model_type != SegmentationModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + - model_type); - } - - std::unique_ptr segmentor{new SegmentationModel(model, configuration)}; - segmentor->prepare(); - if (preload) { - segmentor->load(core, device); - } - return segmentor; -} - -std::unique_ptr SegmentationModel::create_model(std::shared_ptr& adapter) { - const ov::AnyMap& configuration = adapter->getModelConfig(); - auto model_type_iter = configuration.find("model_type"); - std::string model_type = SegmentationModel::ModelType; - if (model_type_iter != configuration.end()) { - model_type = model_type_iter->second.as(); - } - - if (model_type != SegmentationModel::ModelType) { - throw std::runtime_error("Incorrect or unsupported model_type is provided: " + model_type); - } - - std::unique_ptr segmentor{new SegmentationModel(adapter)}; - return segmentor; -} - -void SegmentationModel::updateModelInfo() { - BaseModel::updateModelInfo(); - - model->set_rt_info(SegmentationModel::ModelType, "model_info", "model_type"); - model->set_rt_info(blur_strength, "model_info", "blur_strength"); - model->set_rt_info(soft_threshold, "model_info", "soft_threshold"); - model->set_rt_info(return_soft_prediction, "model_info", "return_soft_prediction"); -} - -void SegmentationModel::prepareInputsOutputs(std::shared_ptr& model) { - // --------------------------- Configure input & output --------------------------------------------- - // --------------------------- Prepare input ----------------------------------------------------- - if (model->inputs().size() != 1) { - throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input"); - } - const auto& input = model->input(); - inputNames.push_back(input.get_any_name()); - - const ov::Layout& inputLayout = getInputLayout(input); - const ov::Shape& inputShape = input.get_partial_shape().get_max_shape(); - if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { - throw std::logic_error("3-channel 4-dimensional model's input is expected"); - } - if (model->outputs().size() > 2) { - throw std::logic_error("Segmentation model wrapper supports topologies with 1 or 2 outputs"); - } - - std::string out_name; - for (ov::Output& output : model->outputs()) { - const std::unordered_set& out_names = output.get_names(); - if (out_names.find(feature_vector_name) == out_names.end()) { - if (out_name.empty()) { - out_name = output.get_any_name(); - } else { - throw std::runtime_error(std::string{"Only "} + feature_vector_name + - " and 1 other output are allowed"); - } - } - } - if (out_name.empty()) { - throw std::runtime_error("No output containing segmentation masks found"); - } - - if (!embedded_processing) { - model = BaseModel::embedProcessing( - model, - inputNames[0], - inputLayout, - resizeMode, - interpolationMode, - ov::Shape{inputShape[ov::layout::width_idx(inputLayout)], inputShape[ov::layout::height_idx(inputLayout)]}, - pad_value, - reverse_input_channels, - mean_values, - scale_values); - - ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); - ov::Layout out_layout = getLayoutFromShape(model->output(out_name).get_partial_shape()); - ppp.output(out_name).model().set_layout(out_layout); - ppp.output(out_name).tensor().set_element_type(ov::element::f32); - if (ov::layout::has_channels(out_layout)) { - ppp.output(out_name).tensor().set_layout("NCHW"); - } else { - // deeplabv3 - ppp.output(out_name).tensor().set_layout("NHW"); - } - model = ppp.build(); - useAutoResize = true; // temporal solution - embedded_processing = true; - } - - outputNames.push_back(out_name); - for (ov::Output& output : model->outputs()) { - const std::unordered_set& out_names = output.get_names(); - if (out_names.find(feature_vector_name) == out_names.end()) { - outputNames.emplace_back(feature_vector_name); - return; - } - } -} - -std::unique_ptr SegmentationModel::postprocess(InferenceResult& infResult) { - const auto& inputImgSize = infResult.internalModelData->asRef(); - const auto& outputName = outputNames[0] == feature_vector_name ? outputNames[1] : outputNames[0]; - const auto& outTensor = infResult.outputsData[outputName]; - const ov::Shape& outputShape = outTensor.get_shape(); - const ov::Layout& outputLayout = getLayoutFromShape(outputShape); - size_t outChannels = - ov::layout::has_channels(outputLayout) ? outputShape[ov::layout::channels_idx(outputLayout)] : 1; - int outHeight = static_cast(outputShape[ov::layout::height_idx(outputLayout)]); - int outWidth = static_cast(outputShape[ov::layout::width_idx(outputLayout)]); - cv::Mat soft_prediction; - if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) { - cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data()); - predictions.convertTo(soft_prediction, CV_8UC1); - } else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) { - cv::Mat predictions(outHeight, outWidth, CV_32SC1); - const auto data = outTensor.data(); - for (size_t i = 0; i < predictions.total(); ++i) { - reinterpret_cast(predictions.data)[i] = int32_t(data[i]); - } - predictions.convertTo(soft_prediction, CV_8UC1); - } else if (outTensor.get_element_type() == ov::element::f32) { - float* data = outTensor.data(); - std::vector channels; - for (size_t c = 0; c < outTensor.get_shape()[1]; ++c) { - channels.emplace_back(cv::Size{outWidth, outHeight}, CV_32FC1, data + c * outHeight * outWidth); - } - cv::merge(channels, soft_prediction); - } - - cv::Mat hard_prediction = - create_hard_prediction_from_soft_prediction(soft_prediction, soft_threshold, blur_strength); - - cv::resize(hard_prediction, - hard_prediction, - {inputImgSize.inputImgWidth, inputImgSize.inputImgHeight}, - 0.0, - 0.0, - cv::INTER_NEAREST); - - if (return_soft_prediction) { - ImageResultWithSoftPrediction* result = - new ImageResultWithSoftPrediction(infResult.frameId, infResult.metaData); - result->resultImage = hard_prediction; - cv::resize(soft_prediction, - soft_prediction, - {inputImgSize.inputImgWidth, inputImgSize.inputImgHeight}, - 0.0, - 0.0, - cv::INTER_NEAREST); - result->soft_prediction = soft_prediction; - auto iter = infResult.outputsData.find(feature_vector_name); - if (infResult.outputsData.end() != iter) { - result->saliency_map = get_activation_map(soft_prediction); - result->feature_vector = iter->second; - } - return std::unique_ptr(result); - } - - ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData); - result->resultImage = hard_prediction; - return std::unique_ptr(result); -} - -std::vector SegmentationModel::getContours(const ImageResultWithSoftPrediction& imageResult) { - if (imageResult.soft_prediction.channels() == 1) { - throw std::runtime_error{"Cannot get contours from soft prediction with 1 layer"}; - } - - std::vector combined_contours = {}; - cv::Mat label_index_map; - cv::Mat current_label_soft_prediction; - for (int index = 1; index < imageResult.soft_prediction.channels(); index++) { - cv::extractChannel(imageResult.soft_prediction, current_label_soft_prediction, index); - cv::inRange(imageResult.resultImage, - cv::Scalar(index, index, index), - cv::Scalar(index, index, index), - label_index_map); - std::vector> contours; - cv::findContours(label_index_map, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_NONE); - - std::string label = getLabelName(index - 1); - - for (unsigned int i = 0; i < contours.size(); i++) { - cv::Mat mask = cv::Mat::zeros(imageResult.resultImage.rows, - imageResult.resultImage.cols, - imageResult.resultImage.type()); - cv::drawContours(mask, contours, i, 255, -1); - float probability = (float)cv::mean(current_label_soft_prediction, mask)[0]; - combined_contours.push_back({label, probability, contours[i]}); - } - } - - return combined_contours; -} - -std::unique_ptr SegmentationModel::infer(const ImageInputData& inputData) { - auto result = BaseModel::inferImage(inputData); - return std::unique_ptr(static_cast(result.release())); -} - -std::vector> SegmentationModel::inferBatch(const std::vector& inputImgs) { - auto results = BaseModel::inferBatchImage(inputImgs); - std::vector> segResults; - segResults.reserve(results.size()); - for (auto& result : results) { - segResults.emplace_back(static_cast(result.release())); - } - return segResults; -} diff --git a/src/cpp/py_bindings/.gitignore b/src/cpp/py_bindings/.gitignore deleted file mode 100644 index 20b07f5e..00000000 --- a/src/cpp/py_bindings/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.whl -*.dll -*.so* diff --git a/src/cpp/py_bindings/CMakeLists.txt b/src/cpp/py_bindings/CMakeLists.txt deleted file mode 100644 index 4ed1c5ad..00000000 --- a/src/cpp/py_bindings/CMakeLists.txt +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -cmake_minimum_required(VERSION 3.26) - -if(WIN32) - set(CMAKE_GENERATOR_TOOLSET "v142") -endif() - - -add_subdirectory(../ model_api/cpp) - -set(Python_FIND_VIRTUALENV FIRST) -project(_vision_api LANGUAGES CXX) -find_package(Python COMPONENTS Interpreter Development REQUIRED) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir - OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT) -find_package(nanobind CONFIG REQUIRED) - - -file(GLOB BINDINGS_SOURCES src/vision_api/*.cpp) -file(GLOB BINDINGS_HEADERS src/vision_api/*.hpp) - -message(INFO ${BINDINGS_SOURCES}) - -nanobind_add_module(_vision_api NB_STATIC STABLE_ABI LTO ${BINDINGS_SOURCES} ${BINDINGS_HEADERS}) - -set_target_properties(_vision_api PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" -) - -target_link_libraries(_vision_api PRIVATE model_api) - -install(TARGETS _vision_api - LIBRARY DESTINATION vision_api # Same place relative to package -) diff --git a/src/cpp/py_bindings/pyproject.toml b/src/cpp/py_bindings/pyproject.toml deleted file mode 100644 index 1436f3e5..00000000 --- a/src/cpp/py_bindings/pyproject.toml +++ /dev/null @@ -1,32 +0,0 @@ -[build-system] -requires = ["scikit-build-core >=0.4.3", "nanobind >=1.3.2"] -build-backend = "scikit_build_core.build" - -[project] -name = "vision_api" -version = "0.3.0.2" -requires-python = ">=3.9" -authors = [ - {name = "Intel(R) Corporation"}, -] -maintainers = [ - {name = "Intel(R) Corporation"}, -] -description = "Model API: model wrappers and pipelines for inference with OpenVINO" -readme = "../../python/README.md" -classifiers = [ - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.9" -] - -[project.urls] -Homepage = "https://github.com/open-edge-platform/model_api" - -[tool.scikit-build] -# Protect the configuration against future changes in scikit-build-core -minimum-version = "0.4" -# Setuptools-style build caching in a local directory -build-dir = "build/{wheel_tag}" -# Build stable ABI wheels for CPython 3.12+ -wheel.py-api = "cp312" -sdist.include = ["*.so*"] diff --git a/src/cpp/py_bindings/src/vision_api/__init__.py b/src/cpp/py_bindings/src/vision_api/__init__.py deleted file mode 100644 index 0dbae48c..00000000 --- a/src/cpp/py_bindings/src/vision_api/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -try: - from openvino import Core - - _ = Core() # Triggers loading of shared libs like libopenvino.so -except Exception as e: - raise ImportError(f"Failed to initialize OpenVINO runtime: {e}") - -from ._vision_api import ClassificationModel - -__all__ = [ClassificationModel] diff --git a/src/cpp/py_bindings/src/vision_api/py_base.cpp b/src/cpp/py_bindings/src/vision_api/py_base.cpp deleted file mode 100644 index ecc5e4c7..00000000 --- a/src/cpp/py_bindings/src/vision_api/py_base.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include - -#include - -#include "models/base_model.h" -#include "models/results.h" - -namespace nb = nanobind; - -void init_base_modules(nb::module_& m) { - nb::class_(m, "ResultBase").def(nb::init<>()); - - nb::class_(m, "BaseModel") - .def("load", [](BaseModel& self, const std::string& device, size_t num_infer_requests) { - auto core = ov::Core(); - self.load(core, device, num_infer_requests); - }); -} diff --git a/src/cpp/py_bindings/src/vision_api/py_classification.cpp b/src/cpp/py_bindings/src/vision_api/py_classification.cpp deleted file mode 100644 index d01d84f0..00000000 --- a/src/cpp/py_bindings/src/vision_api/py_classification.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (C) 2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include -#include -#include -#include - -#include "models/classification_model.h" -#include "models/results.h" -#include "py_utils.hpp" - -namespace pyutils = vision::nanobind::utils; - -void init_classification(nb::module_& m) { - nb::class_(m, "Classification") - .def(nb::init()) - .def_rw("id", &ClassificationResult::Classification::id) - .def_rw("label", &ClassificationResult::Classification::label) - .def_rw("score", &ClassificationResult::Classification::score); - - nb::class_(m, "ClassificationResult") - .def(nb::init<>()) - .def_ro("topLabels", &ClassificationResult::topLabels) - .def("__repr__", &ClassificationResult::operator std::string) - .def_prop_ro( - "feature_vector", - [](ClassificationResult& r) { - if (!r.feature_vector) { - return nb::ndarray(); - } - - return nb::ndarray(r.feature_vector.data(), - r.feature_vector.get_shape().size(), - r.feature_vector.get_shape().data()); - }, - nb::rv_policy::reference_internal) - .def_prop_ro( - "saliency_map", - [](ClassificationResult& r) { - if (!r.saliency_map) { - return nb::ndarray(); - } - - return nb::ndarray(r.saliency_map.data(), - r.saliency_map.get_shape().size(), - r.saliency_map.get_shape().data()); - }, - nb::rv_policy::reference_internal); - - nb::class_(m, "ClassificationModel") - .def_static( - "create_model", - [](const std::string& model_path, - const std::map& configuration, - bool preload, - const std::string& device) { - auto ov_any_config = ov::AnyMap(); - for (const auto& item : configuration) { - ov_any_config[item.first] = pyutils::py_object_to_any(item.second, item.first); - } - - return ClassificationModel::create_model(model_path, ov_any_config, preload, device); - }, - nb::arg("model_path"), - nb::arg("configuration") = ov::AnyMap({}), - nb::arg("preload") = true, - nb::arg("device") = "AUTO") - - .def("__call__", - [](ClassificationModel& self, const nb::ndarray<>& input) { - return self.infer(pyutils::wrap_np_mat(input)); - }) - .def("infer_batch", [](ClassificationModel& self, const std::vector> inputs) { - std::vector input_mats; - input_mats.reserve(inputs.size()); - - for (const auto& input : inputs) { - input_mats.push_back(pyutils::wrap_np_mat(input)); - } - - return self.inferBatch(input_mats); - }); -} diff --git a/src/cpp/py_bindings/src/vision_api/py_utils.cpp b/src/cpp/py_bindings/src/vision_api/py_utils.cpp deleted file mode 100644 index d08881c5..00000000 --- a/src/cpp/py_bindings/src/vision_api/py_utils.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "py_utils.hpp" - -namespace vision::nanobind::utils { - -cv::Mat wrap_np_mat(const nb::ndarray<>& input) { - if (input.ndim() != 3 || input.shape(2) != 3 || input.dtype() != nb::dtype()) { - throw std::runtime_error("Input image should have HWC_8U layout"); - } - - int height = input.shape(0); - int width = input.shape(1); - - return cv::Mat(height, width, CV_8UC3, input.data()); -} - -ov::Any py_object_to_any(const nb::object& py_obj, const std::string& property_name) { - if (nb::isinstance(py_obj)) { - return ov::Any(std::string(static_cast(py_obj).c_str())); - } else if (nb::isinstance(py_obj)) { - return ov::Any(static_cast(static_cast(py_obj))); - } else if (nb::isinstance(py_obj)) { - return ov::Any(static_cast(static_cast(py_obj))); - } else { - OPENVINO_THROW("Property \"" + property_name + "\" has unsupported type."); - } -} - -} // namespace vision::nanobind::utils diff --git a/src/cpp/py_bindings/src/vision_api/py_utils.hpp b/src/cpp/py_bindings/src/vision_api/py_utils.hpp deleted file mode 100644 index 955dbfe3..00000000 --- a/src/cpp/py_bindings/src/vision_api/py_utils.hpp +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -#include -#include - -namespace nb = nanobind; - -namespace vision::nanobind::utils { -cv::Mat wrap_np_mat(const nb::ndarray<>& input); -ov::Any py_object_to_any(const nb::object& py_obj, const std::string& property_name); -} // namespace vision::nanobind::utils diff --git a/src/cpp/py_bindings/src/vision_api/py_vision_api.cpp b/src/cpp/py_bindings/src/vision_api/py_vision_api.cpp deleted file mode 100644 index 13605a01..00000000 --- a/src/cpp/py_bindings/src/vision_api/py_vision_api.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2025 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include - -namespace nb = nanobind; - -void init_classification(nb::module_& m); -void init_base_modules(nb::module_& m); - -NB_MODULE(_vision_api, m) { - m.doc() = "Nanobind binding for OpenVINO Vision API library"; - init_base_modules(m); - init_classification(m); -} diff --git a/src/cpp/tilers/include/tilers/detection.h b/src/cpp/tilers/include/tilers/detection.h deleted file mode 100644 index 8fde112b..00000000 --- a/src/cpp/tilers/include/tilers/detection.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -struct DetectionResult; - -class DetectionTiler : public TilerBase { -public: - DetectionTiler(const std::shared_ptr& model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode = ExecutionMode::sync); - virtual ~DetectionTiler() = default; - - virtual std::unique_ptr run(const ImageInputData& inputData); - -protected: - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); - virtual std::unique_ptr merge_results(const std::vector>&, - const cv::Size&, - const std::vector&); - ov::Tensor merge_saliency_maps(const std::vector>&, - const cv::Size&, - const std::vector&); - - size_t max_pred_number = 200; -}; diff --git a/src/cpp/tilers/include/tilers/instance_segmentation.h b/src/cpp/tilers/include/tilers/instance_segmentation.h deleted file mode 100644 index 3ca20dcb..00000000 --- a/src/cpp/tilers/include/tilers/instance_segmentation.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -struct InstanceSegmentationResult; - -class InstanceSegmentationTiler : public TilerBase { - /*InstanceSegmentationTiler tiler works with MaskRCNNModel model only*/ -public: - InstanceSegmentationTiler(std::shared_ptr model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode = ExecutionMode::sync); - virtual std::unique_ptr run(const ImageInputData& inputData); - virtual ~InstanceSegmentationTiler() = default; - bool postprocess_semantic_masks = true; - -protected: - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); - virtual std::unique_ptr merge_results(const std::vector>&, - const cv::Size&, - const std::vector&); - - std::vector> merge_saliency_maps(const std::vector>&, - const cv::Size&, - const std::vector&); - - size_t max_pred_number = 200; -}; diff --git a/src/cpp/tilers/include/tilers/semantic_segmentation.h b/src/cpp/tilers/include/tilers/semantic_segmentation.h deleted file mode 100644 index 4c9b9d1d..00000000 --- a/src/cpp/tilers/include/tilers/semantic_segmentation.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -struct ImageResult; -struct ImageResultWithSoftPrediction; - -class SemanticSegmentationTiler : public TilerBase { -public: - SemanticSegmentationTiler(std::shared_ptr model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode = ExecutionMode::sync); - virtual std::unique_ptr run(const ImageInputData& inputData); - virtual ~SemanticSegmentationTiler() = default; - -protected: - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&); - virtual std::unique_ptr merge_results(const std::vector>&, - const cv::Size&, - const std::vector&); - - int blur_strength = -1; - float soft_threshold = -std::numeric_limits::infinity(); - bool return_soft_prediction = true; -}; diff --git a/src/cpp/tilers/include/tilers/tiler_base.h b/src/cpp/tilers/include/tilers/tiler_base.h deleted file mode 100644 index 3fb45d1e..00000000 --- a/src/cpp/tilers/include/tilers/tiler_base.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once -#include - -#include -#include -#include -#include -#include -#include -#include - -struct ResultBase; - -enum class ExecutionMode { sync, async }; - -class TilerBase { -public: - TilerBase(const std::shared_ptr& model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode = ExecutionMode::sync); - - virtual ~TilerBase() = default; - -protected: - virtual std::unique_ptr run_impl(const ImageInputData& inputData); - std::vector tile(const cv::Size&); - std::vector filter_tiles(const cv::Mat&, const std::vector&); - std::unique_ptr predict_sync(const cv::Mat&, const std::vector&); - std::unique_ptr predict_async(const cv::Mat&, const std::vector&); - cv::Mat crop_tile(const cv::Mat&, const cv::Rect&); - virtual std::unique_ptr postprocess_tile(std::unique_ptr, const cv::Rect&) = 0; - virtual std::unique_ptr merge_results(const std::vector>&, - const cv::Size&, - const std::vector&) = 0; - - std::shared_ptr model; - size_t tile_size = 400; - float tiles_overlap = 0.5f; - float iou_threshold = 0.45f; - bool tile_with_full_img = true; - ExecutionMode run_mode = ExecutionMode::sync; -}; diff --git a/src/cpp/tilers/src/detection.cpp b/src/cpp/tilers/src/detection.cpp deleted file mode 100644 index ec248664..00000000 --- a/src/cpp/tilers/src/detection.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include - -#include -#include -#include -#include -#include - -namespace { - -cv::Mat non_linear_normalization(cv::Mat& class_map) { - double min_soft_score, max_soft_score; - cv::minMaxLoc(class_map, &min_soft_score); - cv::pow(class_map - min_soft_score, 1.5, class_map); - - cv::minMaxLoc(class_map, &min_soft_score, &max_soft_score); - class_map = 255.0 / (max_soft_score + 1e-12) * class_map; - - return class_map; -} - -} // namespace - -DetectionTiler::DetectionTiler(const std::shared_ptr& _model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode) - : TilerBase(_model, configuration, exec_mode) { - ov::AnyMap extra_config; - try { - auto ov_model = model->getModel(); - extra_config = ov_model->get_rt_info("model_info"); - } catch (const std::runtime_error&) { - extra_config = model->getInferenceAdapter()->getModelConfig(); - } - - max_pred_number = get_from_any_maps("max_pred_number", configuration, extra_config, max_pred_number); -} - -std::unique_ptr DetectionTiler::postprocess_tile(std::unique_ptr tile_result, - const cv::Rect& coord) { - DetectionResult* det_res = static_cast(tile_result.get()); - for (auto& det : det_res->objects) { - det.x += coord.x; - det.y += coord.y; - } - - if (det_res->feature_vector) { - auto tmp_feature_vector = - ov::Tensor(det_res->feature_vector.get_element_type(), det_res->feature_vector.get_shape()); - det_res->feature_vector.copy_to(tmp_feature_vector); - det_res->feature_vector = tmp_feature_vector; - } - - if (det_res->saliency_map) { - auto tmp_saliency_map = ov::Tensor(det_res->saliency_map.get_element_type(), det_res->saliency_map.get_shape()); - det_res->saliency_map.copy_to(tmp_saliency_map); - det_res->saliency_map = tmp_saliency_map; - } - - return tile_result; -} - -std::unique_ptr DetectionTiler::merge_results(const std::vector>& tiles_results, - const cv::Size& image_size, - const std::vector& tile_coords) { - DetectionResult* result = new DetectionResult(); - auto retVal = std::unique_ptr(result); - - std::vector all_detections; - std::vector> all_detections_refs; - std::vector all_scores; - - for (const auto& result : tiles_results) { - DetectionResult* det_res = static_cast(result.get()); - for (auto& det : det_res->objects) { - all_detections.emplace_back(det.x, det.y, det.x + det.width, det.y + det.height, det.labelID); - all_scores.push_back(det.confidence); - all_detections_refs.push_back(det); - } - } - - auto keep_idx = multiclass_nms(all_detections, all_scores, iou_threshold, false, max_pred_number); - - result->objects.reserve(keep_idx.size()); - for (auto idx : keep_idx) { - result->objects.push_back(all_detections_refs[idx]); - } - - if (tiles_results.size()) { - DetectionResult* det_res = static_cast(tiles_results.begin()->get()); - if (det_res->feature_vector) { - result->feature_vector = - ov::Tensor(det_res->feature_vector.get_element_type(), det_res->feature_vector.get_shape()); - } - if (det_res->saliency_map) { - result->saliency_map = merge_saliency_maps(tiles_results, image_size, tile_coords); - } - } - - if (result->feature_vector) { - float* feature_ptr = result->feature_vector.data(); - size_t feature_size = result->feature_vector.get_size(); - - std::fill(feature_ptr, feature_ptr + feature_size, 0.f); - - for (const auto& result : tiles_results) { - DetectionResult* det_res = static_cast(result.get()); - const float* current_feature_ptr = det_res->feature_vector.data(); - - for (size_t i = 0; i < feature_size; ++i) { - feature_ptr[i] += current_feature_ptr[i]; - } - } - - for (size_t i = 0; i < feature_size; ++i) { - feature_ptr[i] /= tiles_results.size(); - } - } - - return retVal; -} - -ov::Tensor DetectionTiler::merge_saliency_maps(const std::vector>& tiles_results, - const cv::Size& image_size, - const std::vector& tile_coords) { - std::vector all_saliency_maps; - all_saliency_maps.reserve(tiles_results.size()); - for (const auto& result : tiles_results) { - auto det_res = static_cast(result.get()); - all_saliency_maps.push_back(det_res->saliency_map); - } - - ov::Tensor image_saliency_map; - if (all_saliency_maps.size()) { - image_saliency_map = all_saliency_maps[0]; - } - - if ((image_saliency_map.get_size() == 1) || (all_saliency_maps.size() == 1)) { - return image_saliency_map; - } - - size_t shape_shift = (image_saliency_map.get_shape().size() > 3) ? 1 : 0; - size_t num_classes = image_saliency_map.get_shape()[shape_shift]; - size_t map_h = image_saliency_map.get_shape()[shape_shift + 1]; - size_t map_w = image_saliency_map.get_shape()[shape_shift + 2]; - - float ratio_h = static_cast(map_h) / std::min(tile_size, static_cast(image_size.height)); - float ratio_w = static_cast(map_w) / std::min(tile_size, static_cast(image_size.width)); - - size_t image_map_h = static_cast(image_size.height * ratio_h); - size_t image_map_w = static_cast(image_size.width * ratio_w); - - std::vector> merged_map_mat(num_classes); - for (auto& class_map : merged_map_mat) { - class_map = cv::Mat_(cv::Size{int(image_map_w), int(image_map_h)}, 0.f); - } - - size_t start_idx = tile_with_full_img ? 1 : 0; - for (size_t i = start_idx; i < all_saliency_maps.size(); ++i) { - for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) { - auto current_cls_map_mat = wrap_saliency_map_tensor_to_mat(all_saliency_maps[i], shape_shift, class_idx); - cv::Mat current_cls_map_mat_float; - current_cls_map_mat.convertTo(current_cls_map_mat_float, CV_32F); - - cv::Rect map_location( - static_cast(tile_coords[i].x * ratio_w), - static_cast(tile_coords[i].y * ratio_h), - static_cast(static_cast(tile_coords[i].width + tile_coords[i].x) * ratio_w - - static_cast(tile_coords[i].x * ratio_w)), - static_cast(static_cast(tile_coords[i].height + tile_coords[i].y) * ratio_h - - static_cast(tile_coords[i].y * ratio_h))); - - if (current_cls_map_mat.rows > map_location.height && map_location.height > 0 && - current_cls_map_mat.cols > map_location.width && map_location.width > 0) { - cv::resize(current_cls_map_mat_float, - current_cls_map_mat_float, - cv::Size(map_location.width, map_location.height)); - } - - auto class_map_roi = cv::Mat(merged_map_mat[class_idx], map_location); - for (int row_i = 0; row_i < map_location.height; ++row_i) { - for (int col_i = 0; col_i < map_location.width; ++col_i) { - float merged_mixel = class_map_roi.at(row_i, col_i); - if (merged_mixel > 0) { - class_map_roi.at(row_i, col_i) = - 0.5f * (merged_mixel + current_cls_map_mat_float.at(row_i, col_i)); - } else { - class_map_roi.at(row_i, col_i) = current_cls_map_mat_float.at(row_i, col_i); - } - } - } - } - } - - ov::Tensor merged_map; - if (shape_shift) { - merged_map = ov::Tensor(ov::element::Type("u8"), {1, num_classes, image_map_h, image_map_w}); - } else { - merged_map = ov::Tensor(ov::element::Type("u8"), {num_classes, image_map_h, image_map_w}); - } - - for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) { - if (tile_with_full_img) { - auto image_map_cls = wrap_saliency_map_tensor_to_mat(image_saliency_map, shape_shift, class_idx); - cv::resize(image_map_cls, image_map_cls, cv::Size(image_map_w, image_map_h)); - cv::addWeighted(merged_map_mat[class_idx], 1.0, image_map_cls, 0.5, 0., merged_map_mat[class_idx]); - } - merged_map_mat[class_idx] = non_linear_normalization(merged_map_mat[class_idx]); - auto merged_cls_map_mat = wrap_saliency_map_tensor_to_mat(merged_map, shape_shift, class_idx); - merged_map_mat[class_idx].convertTo(merged_cls_map_mat, merged_cls_map_mat.type()); - } - - return merged_map; -} - -std::unique_ptr DetectionTiler::run(const ImageInputData& inputData) { - auto result = this->run_impl(inputData); - return std::unique_ptr(static_cast(result.release())); -} diff --git a/src/cpp/tilers/src/instance_segmentation.cpp b/src/cpp/tilers/src/instance_segmentation.cpp deleted file mode 100644 index 211a4761..00000000 --- a/src/cpp/tilers/src/instance_segmentation.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "utils/common.hpp" - -namespace { -class MaskRCNNModelParamsSetter { -public: - std::shared_ptr model; - bool state; - MaskRCNNModel* model_ptr; - MaskRCNNModelParamsSetter(std::shared_ptr model_) : model(model_) { - model_ptr = static_cast(model.get()); - state = model_ptr->postprocess_semantic_masks; - model_ptr->postprocess_semantic_masks = false; - } - ~MaskRCNNModelParamsSetter() { - model_ptr->postprocess_semantic_masks = state; - } -}; -} // namespace - -InstanceSegmentationTiler::InstanceSegmentationTiler(std::shared_ptr _model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode) - : TilerBase(_model, configuration, exec_mode) { - ov::AnyMap extra_config; - try { - auto ov_model = model->getModel(); - extra_config = ov_model->get_rt_info("model_info"); - } catch (const std::runtime_error&) { - extra_config = model->getInferenceAdapter()->getModelConfig(); - } - - postprocess_semantic_masks = - get_from_any_maps("postprocess_semantic_masks", configuration, extra_config, postprocess_semantic_masks); - max_pred_number = get_from_any_maps("max_pred_number", configuration, extra_config, max_pred_number); -} - -std::unique_ptr InstanceSegmentationTiler::run(const ImageInputData& inputData) { - auto setter = MaskRCNNModelParamsSetter(model); - auto result = this->run_impl(inputData); - return std::unique_ptr(static_cast(result.release())); -} - -std::unique_ptr InstanceSegmentationTiler::postprocess_tile(std::unique_ptr tile_result, - const cv::Rect& coord) { - auto* iseg_res = static_cast(tile_result.get()); - for (auto& det : iseg_res->segmentedObjects) { - det.x += coord.x; - det.y += coord.y; - } - - if (iseg_res->feature_vector) { - auto tmp_feature_vector = - ov::Tensor(iseg_res->feature_vector.get_element_type(), iseg_res->feature_vector.get_shape()); - iseg_res->feature_vector.copy_to(tmp_feature_vector); - iseg_res->feature_vector = tmp_feature_vector; - } - - return tile_result; -} - -std::unique_ptr InstanceSegmentationTiler::merge_results( - const std::vector>& tiles_results, - const cv::Size& image_size, - const std::vector& tile_coords) { - auto* result = new InstanceSegmentationResult(); - auto retVal = std::unique_ptr(result); - - std::vector all_detections; - std::vector> all_detections_ptrs; - std::vector all_scores; - - for (const auto& result : tiles_results) { - auto* iseg_res = static_cast(result.get()); - for (auto& det : iseg_res->segmentedObjects) { - all_detections.emplace_back(det.x, det.y, det.x + det.width, det.y + det.height, det.labelID); - all_scores.push_back(det.confidence); - all_detections_ptrs.push_back(det); - } - } - - auto keep_idx = multiclass_nms(all_detections, all_scores, iou_threshold, false, max_pred_number); - - result->segmentedObjects.reserve(keep_idx.size()); - for (auto idx : keep_idx) { - if (postprocess_semantic_masks) { - all_detections_ptrs[idx].get().mask = segm_postprocess(all_detections_ptrs[idx], - all_detections_ptrs[idx].get().mask, - image_size.height, - image_size.width); - } - result->segmentedObjects.push_back(all_detections_ptrs[idx]); - } - - if (tiles_results.size()) { - auto* iseg_res = static_cast(tiles_results.begin()->get()); - if (iseg_res->feature_vector) { - result->feature_vector = - ov::Tensor(iseg_res->feature_vector.get_element_type(), iseg_res->feature_vector.get_shape()); - } - } - - if (result->feature_vector) { - float* feature_ptr = result->feature_vector.data(); - size_t feature_size = result->feature_vector.get_size(); - - std::fill(feature_ptr, feature_ptr + feature_size, 0.f); - - for (const auto& result : tiles_results) { - auto* iseg_res = static_cast(result.get()); - const float* current_feature_ptr = iseg_res->feature_vector.data(); - - for (size_t i = 0; i < feature_size; ++i) { - feature_ptr[i] += current_feature_ptr[i]; - } - } - - for (size_t i = 0; i < feature_size; ++i) { - feature_ptr[i] /= tiles_results.size(); - } - } - - result->saliency_map = merge_saliency_maps(tiles_results, image_size, tile_coords); - - return retVal; -} - -std::vector> InstanceSegmentationTiler::merge_saliency_maps( - const std::vector>& tiles_results, - const cv::Size& image_size, - const std::vector& tile_coords) { - std::vector>> all_saliecy_maps; - all_saliecy_maps.reserve(tiles_results.size()); - for (const auto& result : tiles_results) { - auto det_res = static_cast(result.get()); - all_saliecy_maps.push_back(det_res->saliency_map); - } - - std::vector> image_saliency_map; - if (all_saliecy_maps.size()) { - image_saliency_map = all_saliecy_maps[0]; - } - - if (image_saliency_map.empty()) { - return image_saliency_map; - } - - size_t num_classes = image_saliency_map.size(); - std::vector> merged_map(num_classes); - for (auto& map : merged_map) { - map = cv::Mat_(image_size, 0); - } - - size_t start_idx = tile_with_full_img ? 1 : 0; - for (size_t i = start_idx; i < all_saliecy_maps.size(); ++i) { - for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) { - auto current_cls_map_mat = all_saliecy_maps[i][class_idx]; - if (current_cls_map_mat.empty()) { - continue; - } - const auto& tile = tile_coords[i]; - cv::Mat tile_map; - cv::resize(current_cls_map_mat, tile_map, tile.size()); - auto tile_map_merged = cv::Mat(merged_map[class_idx], tile); - cv::Mat(cv::max(tile_map, tile_map_merged)).copyTo(tile_map_merged); - } - } - - for (size_t class_idx = 0; class_idx < num_classes; ++class_idx) { - auto image_map_cls = tile_with_full_img ? image_saliency_map[class_idx] : cv::Mat_(); - if (image_map_cls.empty()) { - if (cv::sum(merged_map[class_idx]) == cv::Scalar(0.)) { - merged_map[class_idx] = cv::Mat_(); - } - } else { - cv::resize(image_map_cls, image_map_cls, image_size); - cv::Mat(cv::max(merged_map[class_idx], image_map_cls)).copyTo(merged_map[class_idx]); - } - } - - return merged_map; -} diff --git a/src/cpp/tilers/src/semantic_segmentation.cpp b/src/cpp/tilers/src/semantic_segmentation.cpp deleted file mode 100644 index 6a8efc89..00000000 --- a/src/cpp/tilers/src/semantic_segmentation.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include - -#include -#include - -#include "utils/common.hpp" - -namespace { -void normalize_soft_prediction(cv::Mat& soft_prediction, const cv::Mat& normalize_factor) { - float* data = soft_prediction.ptr(0); - const int num_classes = soft_prediction.channels(); - const size_t step_rows = soft_prediction.step[0] / sizeof(float); - const size_t step_cols = soft_prediction.step[1] / sizeof(float); - - for (int y = 0; y < soft_prediction.rows; ++y) { - for (int x = 0; x < soft_prediction.cols; ++x) { - int weight = normalize_factor.at(y, x); - if (weight > 0) { - for (int c = 0; c < num_classes; ++c) { - data[y * step_rows + x * step_cols + c] /= weight; - } - } - } - } -} -} // namespace - -SemanticSegmentationTiler::SemanticSegmentationTiler(std::shared_ptr _model, - const ov::AnyMap& configuration, - ExecutionMode exec_mode) - : TilerBase(_model, configuration, exec_mode) { - ov::AnyMap extra_config; - try { - auto ov_model = model->getModel(); - extra_config = ov_model->get_rt_info("model_info"); - } catch (const std::runtime_error&) { - extra_config = model->getInferenceAdapter()->getModelConfig(); - } - - blur_strength = get_from_any_maps("blur_strength", configuration, extra_config, blur_strength); - soft_threshold = get_from_any_maps("soft_threshold", configuration, extra_config, soft_threshold); - return_soft_prediction = - get_from_any_maps("return_soft_prediction", configuration, extra_config, return_soft_prediction); -} - -std::unique_ptr SemanticSegmentationTiler::run(const ImageInputData& inputData) { - auto result = this->run_impl(inputData); - return std::unique_ptr( - static_cast(result.release())); -} - -std::unique_ptr SemanticSegmentationTiler::postprocess_tile(std::unique_ptr tile_result, - const cv::Rect&) { - ImageResultWithSoftPrediction* soft = dynamic_cast(tile_result.get()); - if (!soft) { - throw std::runtime_error( - "SemanticSegmentationTiler requires the underlying model to return ImageResultWithSoftPrediction"); - } - return tile_result; -} - -std::unique_ptr SemanticSegmentationTiler::merge_results( - const std::vector>& tiles_results, - const cv::Size& image_size, - const std::vector& tile_coords) { - if (tiles_results.empty()) { - return std::unique_ptr(new ImageResultWithSoftPrediction()); - } - - cv::Mat voting_mask(cv::Size(image_size.width, image_size.height), CV_32SC1, cv::Scalar(0)); - auto* sseg_res = static_cast(tiles_results[0].get()); - cv::Mat merged_soft_prediction(cv::Size(image_size.width, image_size.height), - CV_32FC(sseg_res->soft_prediction.channels()), - cv::Scalar(0)); - - for (size_t i = 0; i < tiles_results.size(); ++i) { - auto* sseg_res = static_cast(tiles_results[i].get()); - voting_mask(tile_coords[i]) += 1; - merged_soft_prediction(tile_coords[i]) += sseg_res->soft_prediction; - } - - normalize_soft_prediction(merged_soft_prediction, voting_mask); - - cv::Mat hard_prediction = - create_hard_prediction_from_soft_prediction(merged_soft_prediction, soft_threshold, blur_strength); - - std::unique_ptr retVal; - if (return_soft_prediction) { - auto* result = new ImageResultWithSoftPrediction(); - retVal = std::unique_ptr(result); - result->soft_prediction = merged_soft_prediction; - result->resultImage = hard_prediction; - } else { - auto* result = new ImageResult(); - retVal = std::unique_ptr(result); - result->resultImage = hard_prediction; - } - return retVal; -} diff --git a/src/cpp/tilers/src/tiler_base.cpp b/src/cpp/tilers/src/tiler_base.cpp deleted file mode 100644 index 6d979dea..00000000 --- a/src/cpp/tilers/src/tiler_base.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#include -#include -#include -#include - -#include -#include - -TilerBase::TilerBase(const std::shared_ptr& _model, const ov::AnyMap& configuration, ExecutionMode exec_mode) - : model(_model), - run_mode(exec_mode) { - ov::AnyMap extra_config; - try { - auto ov_model = model->getModel(); - extra_config = ov_model->get_rt_info("model_info"); - } catch (const std::runtime_error&) { - extra_config = model->getInferenceAdapter()->getModelConfig(); - } - - tile_size = get_from_any_maps("tile_size", configuration, extra_config, tile_size); - tiles_overlap = get_from_any_maps("tiles_overlap", configuration, extra_config, tiles_overlap); - iou_threshold = get_from_any_maps("iou_threshold", configuration, extra_config, iou_threshold); - tile_with_full_img = get_from_any_maps("tile_with_full_img", configuration, extra_config, tile_with_full_img); -} - -std::vector TilerBase::tile(const cv::Size& image_size) { - std::vector coords; - - size_t tile_step = static_cast(tile_size * (1.f - tiles_overlap)); - size_t num_h_tiles = image_size.height / tile_step; - size_t num_w_tiles = image_size.width / tile_step; - - if (num_h_tiles * tile_step < static_cast(image_size.height)) { - num_h_tiles += 1; - } - - if (num_w_tiles * tile_step < static_cast(image_size.width)) { - num_w_tiles += 1; - } - - if (tile_with_full_img) { - coords.reserve(num_h_tiles * num_w_tiles + 1); - coords.push_back(cv::Rect(0, 0, image_size.width, image_size.height)); - } else { - coords.reserve(num_h_tiles * num_w_tiles); - } - - for (size_t i = 0; i < num_w_tiles; ++i) { - for (size_t j = 0; j < num_h_tiles; ++j) { - int loc_h = static_cast(j * tile_step); - int loc_w = static_cast(i * tile_step); - - coords.push_back(cv::Rect(loc_w, - loc_h, - std::min(static_cast(tile_size), image_size.width - loc_w), - std::min(static_cast(tile_size), image_size.height - loc_h))); - } - } - return coords; -} - -std::vector TilerBase::filter_tiles(const cv::Mat&, const std::vector& coords) { - return coords; -} - -std::unique_ptr TilerBase::predict_sync(const cv::Mat& image, const std::vector& tile_coords) { - std::vector> tile_results; - - for (const auto& coord : tile_coords) { - auto tile_img = crop_tile(image, coord); - auto tile_prediction = model->inferImage(ImageInputData(tile_img.clone())); - auto tile_result = postprocess_tile(std::move(tile_prediction), coord); - tile_results.push_back(std::move(tile_result)); - } - - return merge_results(tile_results, image.size(), tile_coords); -} - -std::unique_ptr TilerBase::predict_async(const cv::Mat& image, const std::vector& tile_coords) { - std::vector input_data; - - input_data.reserve(tile_coords.size()); - for (const auto& coord : tile_coords) { - auto tile_img = crop_tile(image, coord); - input_data.push_back(ImageInputData(tile_img.clone())); - } - - std::vector> tile_results; - auto tile_predictions = model->inferBatchImage(input_data); - for (size_t i = 0; i < tile_predictions.size(); ++i) { - auto tile_result = postprocess_tile(std::move(tile_predictions[i]), tile_coords[i]); - tile_results.push_back(std::move(tile_result)); - } - return merge_results(tile_results, image.size(), tile_coords); -} - -cv::Mat TilerBase::crop_tile(const cv::Mat& image, const cv::Rect& coord) { - return cv::Mat(image, coord); -} - -std::unique_ptr TilerBase::run_impl(const ImageInputData& inputData) { - auto& image = inputData.inputImage; - auto tile_coords = tile(image.size()); - tile_coords = filter_tiles(image, tile_coords); - if (run_mode == ExecutionMode::async) { - return predict_async(image, tile_coords); - } - return predict_sync(image, tile_coords); -} diff --git a/src/cpp/utils/include/utils/args_helper.hpp b/src/cpp/utils/include/utils/args_helper.hpp deleted file mode 100644 index d57a0947..00000000 --- a/src/cpp/utils/include/utils/args_helper.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief a header file with common samples functionality - * @file args_helper.hpp - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -std::vector split(const std::string& s, char delim); - -std::vector parseDevices(const std::string& device_string); - -std::map parseValuePerDevice(const std::set& devices, - const std::string& values_string); - -std::map parseLayoutString(const std::string& layout_string); - -std::string formatLayouts(const std::map& layouts); - -template -Type get_from_any_maps(const std::string& key, - const ov::AnyMap& top_priority, - const ov::AnyMap& mid_priority, - Type low_priority) { - auto topk_iter = top_priority.find(key); - if (topk_iter != top_priority.end()) { - return topk_iter->second.as(); - } - topk_iter = mid_priority.find(key); - if (topk_iter != mid_priority.end()) { - return topk_iter->second.as(); - } - return low_priority; -} - -template <> -inline bool get_from_any_maps(const std::string& key, - const ov::AnyMap& top_priority, - const ov::AnyMap& mid_priority, - bool low_priority) { - auto topk_iter = top_priority.find(key); - if (topk_iter != top_priority.end()) { - const std::string& val = topk_iter->second.as(); - return val == "True" || val == "YES"; - } - topk_iter = mid_priority.find(key); - if (topk_iter != mid_priority.end()) { - const std::string& val = topk_iter->second.as(); - return val == "True" || val == "YES"; - } - return low_priority; -} diff --git a/src/cpp/utils/include/utils/async_infer_queue.hpp b/src/cpp/utils/include/utils/async_infer_queue.hpp deleted file mode 100644 index 1dd38aa4..00000000 --- a/src/cpp/utils/include/utils/async_infer_queue.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* -// Copyright (C) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ - -#include -#include -#include -#include -#include - -class AsyncInferQueue { -public: - AsyncInferQueue() = default; - AsyncInferQueue(ov::CompiledModel& model, size_t jobs); - ~AsyncInferQueue() = default; - - bool is_ready(); - size_t get_idle_request_id(); - void wait_all(); - void set_default_callbacks(); - void set_custom_callbacks( - std::function callback_args)> f_callback); - size_t size() const; - void start_async(const ov::Tensor& input, std::shared_ptr userdata = nullptr); - void start_async(const std::map& input, std::shared_ptr userdata = nullptr); - ov::InferRequest operator[](size_t i); - - // AsyncInferQueue is not the sole owner of infer requests, although it calls create_infer_request() method. - // ov::InferRequest contains a shared pointer internally, and therefore requests can be copied by clients of - // AsyncInferQueue. -protected: - std::vector m_requests; - std::queue m_idle_handles; - std::vector> m_user_ids; - std::mutex m_mutex; - std::condition_variable m_cv; - std::queue> m_errors; -}; diff --git a/src/cpp/utils/include/utils/common.hpp b/src/cpp/utils/include/utils/common.hpp deleted file mode 100644 index 5e4db57e..00000000 --- a/src/cpp/utils/include/utils/common.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief a header file with common samples functionality - * @file common.hpp - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "utils/slog.hpp" - -template -constexpr std::size_t arraySize(const T (&)[N]) noexcept { - return N; -} - -template -T clamp(T value, T low, T high) { - return value < low ? low : (value > high ? high : value); -} - -static inline void logBasicModelInfo(const std::shared_ptr& model) { - slog::info << "Model name: " << model->get_friendly_name() << slog::endl; - - // Dump information about model inputs/outputs - ov::OutputVector inputs = model->inputs(); - ov::OutputVector outputs = model->outputs(); - - slog::info << "\tInputs: " << slog::endl; - for (const ov::Output& input : inputs) { - const std::string name = input.get_any_name(); - const ov::element::Type type = input.get_element_type(); - const ov::PartialShape shape = input.get_partial_shape(); - const ov::Layout layout = ov::layout::get_layout(input); - - slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl; - } - - slog::info << "\tOutputs: " << slog::endl; - for (const ov::Output& output : outputs) { - const std::string name = output.get_names().size() ? output.get_any_name() : ""; - const ov::element::Type type = output.get_element_type(); - const ov::PartialShape shape = output.get_partial_shape(); - const ov::Layout layout = ov::layout::get_layout(output); - - slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl; - } - - return; -} diff --git a/src/cpp/utils/include/utils/image_utils.h b/src/cpp/utils/include/utils/image_utils.h deleted file mode 100644 index 59ba39e9..00000000 --- a/src/cpp/utils/include/utils/image_utils.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2020-2024 Intel Corporation - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include - -enum RESIZE_MODE { - RESIZE_FILL, - RESIZE_KEEP_ASPECT, - RESIZE_KEEP_ASPECT_LETTERBOX, - RESIZE_CROP, - NO_RESIZE, -}; - -inline std::string formatResizeMode(RESIZE_MODE mode) { - switch (mode) { - case RESIZE_FILL: - return "standard"; - case RESIZE_KEEP_ASPECT: - return "fit_to_window"; - case RESIZE_KEEP_ASPECT_LETTERBOX: - return "fit_to_window_letterbox"; - case RESIZE_CROP: - return "crop"; - default: - return "unknown"; - } -} - -cv::Mat resizeImageExt(const cv::Mat& mat, - int width, - int height, - RESIZE_MODE resizeMode = RESIZE_FILL, - cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR, - cv::Rect* roi = nullptr, - cv::Scalar BorderConstant = cv::Scalar(0, 0, 0)); - -ov::preprocess::PostProcessSteps::CustomPostprocessOp createResizeGraph(RESIZE_MODE resizeMode, - const ov::Shape& size, - const cv::InterpolationFlags interpolationMode, - uint8_t pad_value); diff --git a/src/cpp/utils/include/utils/kuhn_munkres.hpp b/src/cpp/utils/include/utils/kuhn_munkres.hpp deleted file mode 100644 index 4423d03c..00000000 --- a/src/cpp/utils/include/utils/kuhn_munkres.hpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include "opencv2/core.hpp" - -/// -/// \brief The KuhnMunkres class -/// -/// Solves the assignment problem. -/// -class KuhnMunkres { -public: - /// - /// \brief Initializes the class for assignment problem solving. - /// \param[in] greedy If a faster greedy matching algorithm should be used. - explicit KuhnMunkres(bool greedy = false); - - /// - /// \brief Solves the assignment problem for given dissimilarity matrix. - /// It returns a vector that where each element is a column index for - /// corresponding row (e.g. result[0] stores optimal column index for very - /// first row in the dissimilarity matrix). - /// \param dissimilarity_matrix CV_32F dissimilarity matrix. - /// \return Optimal column index for each row. -1 means that there is no - /// column for row. - /// - std::vector Solve(const cv::Mat& dissimilarity_matrix); - -private: - static constexpr int kStar = 1; - static constexpr int kPrime = 2; - - cv::Mat dm_; - cv::Mat marked_; - std::vector points_; - - std::vector is_row_visited_; - std::vector is_col_visited_; - - int n_; - bool greedy_; - - void TrySimpleCase(); - bool CheckIfOptimumIsFound(); - cv::Point FindUncoveredMinValPos(); - void UpdateDissimilarityMatrix(float val); - int FindInRow(int row, int what); - int FindInCol(int col, int what); - void Run(); -}; diff --git a/src/cpp/utils/include/utils/nms.hpp b/src/cpp/utils/include/utils/nms.hpp deleted file mode 100644 index 725d4f80..00000000 --- a/src/cpp/utils/include/utils/nms.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* -// Copyright (C) 2021-2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ - -#pragma once - -#include -#include - -#include "opencv2/core.hpp" - -struct Anchor { - float left; - float top; - float right; - float bottom; - - Anchor() = default; - Anchor(float _left, float _top, float _right, float _bottom) - : left(_left), - top(_top), - right(_right), - bottom(_bottom) {} - - float getWidth() const { - return (right - left) + 1.0f; - } - float getHeight() const { - return (bottom - top) + 1.0f; - } - float getXCenter() const { - return left + (getWidth() - 1.0f) / 2.0f; - } - float getYCenter() const { - return top + (getHeight() - 1.0f) / 2.0f; - } -}; - -struct AnchorLabeled : public Anchor { - int labelID = -1; - - AnchorLabeled() = default; - AnchorLabeled(float _left, float _top, float _right, float _bottom, int _labelID) - : Anchor(_left, _top, _right, _bottom), - labelID(_labelID) {} -}; - -template -std::vector nms(const std::vector& boxes, - const std::vector& scores, - const float thresh, - bool includeBoundaries = false, - size_t keep_top_k = 0) { - if (keep_top_k == 0) { - keep_top_k = boxes.size(); - } - std::vector areas(boxes.size()); - for (size_t i = 0; i < boxes.size(); ++i) { - areas[i] = - (boxes[i].right - boxes[i].left + includeBoundaries) * (boxes[i].bottom - boxes[i].top + includeBoundaries); - } - std::vector order(scores.size()); - std::iota(order.begin(), order.end(), 0); - std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { - return scores[o1] > scores[o2]; - }); - - size_t ordersNum = 0; - for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < keep_top_k; ordersNum++) - ; - - std::vector keep; - bool shouldContinue = true; - for (size_t i = 0; shouldContinue && i < ordersNum; ++i) { - int idx1 = order[i]; - if (idx1 >= 0) { - keep.push_back(idx1); - shouldContinue = false; - for (size_t j = i + 1; j < ordersNum; ++j) { - int idx2 = order[j]; - if (idx2 >= 0) { - shouldContinue = true; - float overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - - std::fmaxf(boxes[idx1].left, boxes[idx2].left); - float overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - - std::fmaxf(boxes[idx1].top, boxes[idx2].top); - float intersection = - overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0; - float union_area = areas[idx1] + areas[idx2] - intersection; - if (0.0f == union_area || intersection / union_area > thresh) { - order[j] = -1; - } - } - } - } - } - return keep; -} - -std::vector multiclass_nms(const std::vector& boxes, - const std::vector& scores, - const float iou_threshold = 0.45f, - bool includeBoundaries = false, - size_t maxNum = 200); diff --git a/src/cpp/utils/include/utils/ocv_common.hpp b/src/cpp/utils/include/utils/ocv_common.hpp deleted file mode 100644 index 3a25f755..00000000 --- a/src/cpp/utils/include/utils/ocv_common.hpp +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief a header file with common samples functionality using OpenCV - * @file ocv_common.hpp - */ - -#pragma once - -#include -#include -#include - -static inline ov::Tensor wrapMat2Tensor(const cv::Mat& mat) { - auto matType = mat.type() & CV_MAT_DEPTH_MASK; - if (matType != CV_8U && matType != CV_32F) { - throw std::runtime_error("Unsupported mat type for wrapping"); - } - bool isMatFloat = matType == CV_32F; - - const size_t channels = mat.channels(); - const size_t height = mat.rows; - const size_t width = mat.cols; - - const size_t strideH = mat.step.buf[0]; - const size_t strideW = mat.step.buf[1]; - - const bool isDense = !isMatFloat - ? (strideW == channels && strideH == channels * width) - : (strideW == channels * sizeof(float) && strideH == channels * width * sizeof(float)); - if (!isDense) { - throw std::runtime_error("Doesn't support conversion from not dense cv::Mat"); - } - auto precision = isMatFloat ? ov::element::f32 : ov::element::u8; - struct SharedMatAllocator { - const cv::Mat mat; - void* allocate(size_t bytes, size_t) { - return bytes <= mat.rows * mat.step[0] ? mat.data : nullptr; - } - void deallocate(void*, size_t, size_t) {} - bool is_equal(const SharedMatAllocator& other) const noexcept { - return this == &other; - } - }; - return ov::Tensor(precision, ov::Shape{1, height, width, channels}, SharedMatAllocator{mat}); -} - -struct IntervalCondition { - using DimType = size_t; - using IndexType = size_t; - using ConditionChecker = std::function; - - template - constexpr IntervalCondition(IndexType i1, IndexType i2, Cond c) - : impl([=](IndexType i0, const ov::PartialShape& shape) { - return c(shape[i0].get_max_length(), shape[i1].get_max_length()) && - c(shape[i0].get_max_length(), shape[i2].get_max_length()); - }) {} - bool operator()(IndexType i0, const ov::PartialShape& shape) const { - return impl(i0, shape); - } - -private: - ConditionChecker impl; -}; - -template