openvinotoolkit
diff --git a/‎demos/common/cpp/models/include/models/detection_model_faceboxes.h
Lines changed: 2 additions & 19 deletions b/‎demos/common/cpp/models/include/models/detection_model_faceboxes.h
Lines changed: 2 additions & 19 deletions
diff --git a/‎demos/common/cpp/models/include/models/detection_model_retinaface.h
Lines changed: 2 additions & 20 deletions b/‎demos/common/cpp/models/include/models/detection_model_retinaface.h
Lines changed: 2 additions & 20 deletions
diff --git a/‎demos/common/cpp/models/include/models/detection_model_retinaface_pt.h
Lines changed: 5 additions & 24 deletions b/‎demos/common/cpp/models/include/models/detection_model_retinaface_pt.h
Lines changed: 5 additions & 24 deletions
diff --git a/‎demos/common/cpp/models/include/models/detection_model_yolov3_onnx.h
Lines changed: 50 additions & 0 deletions b/‎demos/common/cpp/models/include/models/detection_model_yolov3_onnx.h
Lines changed: 50 additions & 0 deletions
diff --git a/‎demos/common/cpp/models/include/models/detection_model_yolox.h
Lines changed: 54 additions & 0 deletions b/‎demos/common/cpp/models/include/models/detection_model_yolox.h
Lines changed: 54 additions & 0 deletions
diff --git a/‎demos/common/cpp/models/include/models/hpe_model_associative_embedding.h
Lines changed: 0 additions & 1 deletion b/‎demos/common/cpp/models/include/models/hpe_model_associative_embedding.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎demos/common/cpp/models/include/models/image_model.h
Lines changed: 3 additions & 0 deletions b/‎demos/common/cpp/models/include/models/image_model.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎demos/common/cpp/models/include/models/style_transfer_model.h
Lines changed: 0 additions & 1 deletion b/‎demos/common/cpp/models/include/models/style_transfer_model.h
Lines changed: 0 additions & 1 deletion
diff --git a/‎demos/common/cpp/models/src/detection_model_faceboxes.cpp
Lines changed: 7 additions & 7 deletions b/‎demos/common/cpp/models/src/detection_model_faceboxes.cpp
Lines changed: 7 additions & 7 deletions
diff --git a/‎demos/common/cpp/models/src/detection_model_retinaface.cpp
Lines changed: 11 additions & 11 deletions b/‎demos/common/cpp/models/src/detection_model_retinaface.cpp
Lines changed: 11 additions & 11 deletions
@@ -22,6 +22,8 @@
 #include <utility>
 #include <vector>
 
+#include <utils/nms.hpp>
+
 #include "models/detection_model.h"
 
 namespace ov {
@@ -32,25 +34,6 @@ struct ResultBase;
 
 class ModelFaceBoxes : public DetectionModel {
 public:
-    struct Anchor {
-        float left;
-        float top;
-        float right;
-        float bottom;
-
-        float getWidth() const {
-            return (right - left) + 1.0f;
-        }
-        float getHeight() const {
-            return (bottom - top) + 1.0f;
-        }
-        float getXCenter() const {
-            return left + (getWidth() - 1.0f) / 2.0f;
-        }
-        float getYCenter() const {
-            return top + (getHeight() - 1.0f) / 2.0f;
-        }
-    };
     static const int INIT_VECTOR_SIZE = 200;
 
     ModelFaceBoxes(const std::string& modelFileName,
 
@@ -20,6 +20,8 @@
 #include <string>
 #include <vector>
 
+#include <utils/nms.hpp>
+
 #include "models/detection_model.h"
 
 namespace ov {
@@ -30,26 +32,6 @@ struct ResultBase;
 
 class ModelRetinaFace : public DetectionModel {
 public:
-    struct Anchor {
-        float left;
-        float top;
-        float right;
-        float bottom;
-
-        float getWidth() const {
-            return (right - left) + 1.0f;
-        }
-        float getHeight() const {
-            return (bottom - top) + 1.0f;
-        }
-        float getXCenter() const {
-            return left + (getWidth() - 1.0f) / 2.0f;
-        }
-        float getYCenter() const {
-            return top + (getHeight() - 1.0f) / 2.0f;
-        }
-    };
-
     static const int LANDMARKS_NUM = 5;
     static const int INIT_VECTOR_SIZE = 200;
     /// Loads model and performs required initialization
 
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include <opencv2/core/types.hpp>
+#include <utils/nms.hpp>
 
 #include "models/detection_model.h"
 
@@ -41,26 +42,6 @@ class ModelRetinaFacePT : public DetectionModel {
         float height;
     };
 
-    struct Rect {
-        float left;
-        float top;
-        float right;
-        float bottom;
-
-        float getWidth() const {
-            return (right - left) + 1.0f;
-        }
-        float getHeight() const {
-            return (bottom - top) + 1.0f;
-        }
-        float getXCenter() const {
-            return left + (getWidth() - 1.0f) / 2.0f;
-        }
-        float getYCenter() const {
-            return top + (getHeight() - 1.0f) / 2.0f;
-        }
-    };
-
     /// Loads model and performs required initialization
     /// @param model_name name of model to load
     /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
@@ -91,10 +72,10 @@ class ModelRetinaFacePT : public DetectionModel {
                                                   int imgWidth,
                                                   int imgHeight);
     std::vector<ModelRetinaFacePT::Box> generatePriorData();
-    std::vector<ModelRetinaFacePT::Rect> getFilteredProposals(const ov::Tensor& boxesTensor,
-                                                              const std::vector<size_t>& indicies,
-                                                              int imgWidth,
-                                                              int imgHeight);
+    std::vector<Anchor> getFilteredProposals(const ov::Tensor& boxesTensor,
+                                             const std::vector<size_t>& indicies,
+                                             int imgWidth,
+                                             int imgHeight);
 
     void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
 };
@@ -0,0 +1,50 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+class ModelYoloV3ONNX: public DetectionModel {
+public:
+    /// Constructor.
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param layout - model input layout
+    ModelYoloV3ONNX(const std::string& modelFileName,
+                    float confidenceThreshold,
+                    const std::vector<std::string>& labels = std::vector<std::string>(),
+                    const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+    std::string boxesOutputName;
+    std::string scoresOutputName;
+    std::string indicesOutputName;
+    static const int numberOfClasses = 80;
+};
@@ -0,0 +1,54 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+class ModelYoloX: public DetectionModel {
+public:
+    /// Constructor.
+    /// @param modelFileName name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering
+    /// during postprocessing (only one of them should stay). The default value is 0.5
+    /// @param labels - array of labels for every class. If this array is empty or contains less elements
+    /// than actual classes number, default "Label #N" will be shown for missing items.
+    /// @param layout - model input layout
+    ModelYoloX(const std::string& modelFileName,
+                    float confidenceThreshold,
+                    float boxIOUThreshold = 0.5,
+                    const std::vector<std::string>& labels = std::vector<std::string>(),
+                    const std::string& layout = "");
+
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+    void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+    void setStridesGrids();
+
+    double boxIOUThreshold;
+    std::vector<std::pair<size_t, size_t>> grids;
+    std::vector<size_t> expandedStrides;
+    static const size_t numberOfClasses = 80;
+};
@@ -65,7 +65,6 @@ class HpeAssociativeEmbedding : public ImageModel {
     int targetSize;
     float confidenceThreshold;
     float delta;
-    RESIZE_MODE resizeMode;
 
     std::string embeddingsTensorName;
     std::string heatmapsTensorName;
 
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "models/model_base.h"
+#include "utils/image_utils.h"
 
 namespace ov {
 class InferRequest;
@@ -43,4 +44,6 @@ class ImageModel : public ModelBase {
 
     size_t netInputHeight = 0;
     size_t netInputWidth = 0;
+    cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR;
+    RESIZE_MODE resizeMode = RESIZE_FILL;
 };
@@ -36,7 +36,6 @@ class StyleTransferModel : public ImageModel {
     /// @param layout - model input layout
     StyleTransferModel(const std::string& modelFileName, const std::string& layout = "");
 
-    std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
     std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
 
 protected:
 
@@ -100,7 +100,7 @@ void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     priorBoxes(featureMaps);
 }
 
-void calculateAnchors(std::vector<ModelFaceBoxes::Anchor>& anchors,
+void calculateAnchors(std::vector<Anchor>& anchors,
                       const std::vector<float>& vx,
                       const std::vector<float>& vy,
                       const int minSize,
@@ -126,7 +126,7 @@ void calculateAnchors(std::vector<ModelFaceBoxes::Anchor>& anchors,
     }
 }
 
-void calculateAnchorsZeroLevel(std::vector<ModelFaceBoxes::Anchor>& anchors,
+void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors,
                                const int fx,
                                const int fy,
                                const std::vector<int>& minSizes,
@@ -193,14 +193,14 @@ std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor
     return {indices, scores};
 }
 
-std::vector<ModelFaceBoxes::Anchor> filterBoxes(const ov::Tensor& boxesTensor,
-                                                const std::vector<ModelFaceBoxes::Anchor>& anchors,
-                                                const std::vector<size_t>& validIndices,
-                                                const std::vector<float>& variance) {
+std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor,
+                                const std::vector<Anchor>& anchors,
+                                const std::vector<size_t>& validIndices,
+                                const std::vector<float>& variance) {
     auto shape = boxesTensor.get_shape();
     const float* boxesPtr = boxesTensor.data<float>();
 
-    std::vector<ModelFaceBoxes::Anchor> boxes;
+    std::vector<Anchor> boxes;
     boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
     for (auto i : validIndices) {
         auto objStart = shape[2] * i;
 
@@ -129,7 +129,7 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
         auto s = anchorCfg[idx].stride;
         auto anchorNum = anchorsFpn[s].size();
 
-        anchors.push_back(std::vector<ModelRetinaFace::Anchor>(height * width * anchorNum));
+        anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
         for (size_t iw = 0; iw < width; ++iw) {
             size_t sw = iw * s;
             for (size_t ih = 0; ih < height; ++ih) {
@@ -146,8 +146,8 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     }
 }
 
-std::vector<ModelRetinaFace::Anchor> ratioEnum(const ModelRetinaFace::Anchor& anchor, const std::vector<int>& ratios) {
-    std::vector<ModelRetinaFace::Anchor> retVal;
+std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
+    std::vector<Anchor> retVal;
     const auto w = anchor.getWidth();
     const auto h = anchor.getHeight();
     const auto xCtr = anchor.getXCenter();
@@ -166,8 +166,8 @@ std::vector<ModelRetinaFace::Anchor> ratioEnum(const ModelRetinaFace::Anchor& an
     return retVal;
 }
 
-std::vector<ModelRetinaFace::Anchor> scaleEnum(const ModelRetinaFace::Anchor& anchor, const std::vector<int>& scales) {
-    std::vector<ModelRetinaFace::Anchor> retVal;
+std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
+    std::vector<Anchor> retVal;
     const auto w = anchor.getWidth();
     const auto h = anchor.getHeight();
     const auto xCtr = anchor.getXCenter();
@@ -184,12 +184,12 @@ std::vector<ModelRetinaFace::Anchor> scaleEnum(const ModelRetinaFace::Anchor& an
     return retVal;
 }
 
-std::vector<ModelRetinaFace::Anchor> generateAnchors(const int baseSize,
+std::vector<Anchor> generateAnchors(const int baseSize,
                                                      const std::vector<int>& ratios,
                                                      const std::vector<int>& scales) {
-    ModelRetinaFace::Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
+    Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
     auto ratioAnchors = ratioEnum(baseAnchor, ratios);
-    std::vector<ModelRetinaFace::Anchor> retVal;
+    std::vector<Anchor> retVal;
 
     for (const auto& ra : ratioAnchors) {
         auto addon = scaleEnum(ra, scales);
@@ -245,11 +245,11 @@ void filterScores(std::vector<float>& scores,
     }
 }
 
-void filterBoxes(std::vector<ModelRetinaFace::Anchor>& boxes,
+void filterBoxes(std::vector<Anchor>& boxes,
                  const std::vector<size_t>& indices,
                  const ov::Tensor& boxesTensor,
                  int anchorNum,
-                 const std::vector<ModelRetinaFace::Anchor>& anchors) {
+                 const std::vector<Anchor>& anchors) {
     const auto& shape = boxesTensor.get_shape();
     const float* boxesPtr = boxesTensor.data<float>();
     const auto boxPredLen = shape[1] / anchorNum;
@@ -279,7 +279,7 @@ void filterLandmarks(std::vector<cv::Point2f>& landmarks,
                      const std::vector<size_t>& indices,
                      const ov::Tensor& landmarksTensor,
                      int anchorNum,
-                     const std::vector<ModelRetinaFace::Anchor>& anchors,
+                     const std::vector<Anchor>& anchors,
                      const float landmarkStd) {
     const auto& shape = landmarksTensor.get_shape();
     const float* landmarksPtr = landmarksTensor.data<float>();