review updates

akorobeinikov · akorobeinikov · commit a948ea607ecd · 2022-11-10T19:23:28.000+03:00
diff --git a/demos/common/cpp/models/include/models/detection_model_faceboxes.h b/demos/common/cpp/models/include/models/detection_model_faceboxes.h
@@ -22,6 +22,8 @@
 #include <utility>
 #include <vector>
 
+#include <utils/nms.hpp>
+
 #include "models/detection_model.h"
 
 namespace ov {
@@ -32,25 +34,6 @@ struct ResultBase;
 
 class ModelFaceBoxes : public DetectionModel {
 public:
-    struct Anchor {
-        float left;
-        float top;
-        float right;
-        float bottom;
-
-        float getWidth() const {
-            return (right - left) + 1.0f;
-        }
-        float getHeight() const {
-            return (bottom - top) + 1.0f;
-        }
-        float getXCenter() const {
-            return left + (getWidth() - 1.0f) / 2.0f;
-        }
-        float getYCenter() const {
-            return top + (getHeight() - 1.0f) / 2.0f;
-        }
-    };
     static const int INIT_VECTOR_SIZE = 200;
 
     ModelFaceBoxes(const std::string& modelFileName,
diff --git a/demos/common/cpp/models/include/models/detection_model_retinaface.h b/demos/common/cpp/models/include/models/detection_model_retinaface.h
@@ -20,6 +20,8 @@
 #include <string>
 #include <vector>
 
+#include <utils/nms.hpp>
+
 #include "models/detection_model.h"
 
 namespace ov {
@@ -30,26 +32,6 @@ struct ResultBase;
 
 class ModelRetinaFace : public DetectionModel {
 public:
-    struct Anchor {
-        float left;
-        float top;
-        float right;
-        float bottom;
-
-        float getWidth() const {
-            return (right - left) + 1.0f;
-        }
-        float getHeight() const {
-            return (bottom - top) + 1.0f;
-        }
-        float getXCenter() const {
-            return left + (getWidth() - 1.0f) / 2.0f;
-        }
-        float getYCenter() const {
-            return top + (getHeight() - 1.0f) / 2.0f;
-        }
-    };
-
     static const int LANDMARKS_NUM = 5;
     static const int INIT_VECTOR_SIZE = 200;
     /// Loads model and performs required initialization
diff --git a/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h b/demos/common/cpp/models/include/models/detection_model_retinaface_pt.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include <opencv2/core/types.hpp>
+#include <utils/nms.hpp>
 
 #include "models/detection_model.h"
 
@@ -41,26 +42,6 @@ class ModelRetinaFacePT : public DetectionModel {
         float height;
     };
 
-    struct Rect {
-        float left;
-        float top;
-        float right;
-        float bottom;
-
-        float getWidth() const {
-            return (right - left) + 1.0f;
-        }
-        float getHeight() const {
-            return (bottom - top) + 1.0f;
-        }
-        float getXCenter() const {
-            return left + (getWidth() - 1.0f) / 2.0f;
-        }
-        float getYCenter() const {
-            return top + (getHeight() - 1.0f) / 2.0f;
-        }
-    };
-
     /// Loads model and performs required initialization
     /// @param model_name name of model to load
     /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
@@ -91,10 +72,10 @@ class ModelRetinaFacePT : public DetectionModel {
                                                   int imgWidth,
                                                   int imgHeight);
     std::vector<ModelRetinaFacePT::Box> generatePriorData();
-    std::vector<ModelRetinaFacePT::Rect> getFilteredProposals(const ov::Tensor& boxesTensor,
-                                                              const std::vector<size_t>& indicies,
-                                                              int imgWidth,
-                                                              int imgHeight);
+    std::vector<Anchor> getFilteredProposals(const ov::Tensor& boxesTensor,
+                                             const std::vector<size_t>& indicies,
+                                             int imgWidth,
+                                             int imgHeight);
 
     void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
 };
diff --git a/demos/common/cpp/models/include/models/detection_model_yolov3_onnx.h b/demos/common/cpp/models/include/models/detection_model_yolov3_onnx.h
@@ -42,7 +42,6 @@ class ModelYoloV3ONNX: public DetectionModel {
 
 protected:
     void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
-    float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd);
 
     std::string boxesOutputName;
     std::string scoresOutputName;
diff --git a/demos/common/cpp/models/include/models/detection_model_yolox.h b/demos/common/cpp/models/include/models/detection_model_yolox.h
@@ -45,7 +45,7 @@ class ModelYoloX: public DetectionModel {
 
 protected:
     void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
-    void prepareGridsAndStrides();
+    void setStridesGrids();
 
     double boxIOUThreshold;
     std::vector<std::pair<size_t, size_t>> grids;
diff --git a/demos/common/cpp/models/include/models/image_model.h b/demos/common/cpp/models/include/models/image_model.h
@@ -44,6 +44,6 @@ class ImageModel : public ModelBase {
 
     size_t netInputHeight = 0;
     size_t netInputWidth = 0;
-    INTERPOLATION_MODE interpolationMode = LINEAR;
+    cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR;
     RESIZE_MODE resizeMode = RESIZE_FILL;
 };
diff --git a/demos/common/cpp/models/src/detection_model_faceboxes.cpp b/demos/common/cpp/models/src/detection_model_faceboxes.cpp
@@ -100,7 +100,7 @@ void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     priorBoxes(featureMaps);
 }
 
-void calculateAnchors(std::vector<ModelFaceBoxes::Anchor>& anchors,
+void calculateAnchors(std::vector<Anchor>& anchors,
                       const std::vector<float>& vx,
                       const std::vector<float>& vy,
                       const int minSize,
@@ -126,7 +126,7 @@ void calculateAnchors(std::vector<ModelFaceBoxes::Anchor>& anchors,
     }
 }
 
-void calculateAnchorsZeroLevel(std::vector<ModelFaceBoxes::Anchor>& anchors,
+void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors,
                                const int fx,
                                const int fy,
                                const std::vector<int>& minSizes,
@@ -193,14 +193,14 @@ std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor
     return {indices, scores};
 }
 
-std::vector<ModelFaceBoxes::Anchor> filterBoxes(const ov::Tensor& boxesTensor,
-                                                const std::vector<ModelFaceBoxes::Anchor>& anchors,
-                                                const std::vector<size_t>& validIndices,
-                                                const std::vector<float>& variance) {
+std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor,
+                                const std::vector<Anchor>& anchors,
+                                const std::vector<size_t>& validIndices,
+                                const std::vector<float>& variance) {
     auto shape = boxesTensor.get_shape();
     const float* boxesPtr = boxesTensor.data<float>();
 
-    std::vector<ModelFaceBoxes::Anchor> boxes;
+    std::vector<Anchor> boxes;
     boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
     for (auto i : validIndices) {
         auto objStart = shape[2] * i;
diff --git a/demos/common/cpp/models/src/detection_model_retinaface.cpp b/demos/common/cpp/models/src/detection_model_retinaface.cpp
@@ -129,7 +129,7 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
         auto s = anchorCfg[idx].stride;
         auto anchorNum = anchorsFpn[s].size();
 
-        anchors.push_back(std::vector<ModelRetinaFace::Anchor>(height * width * anchorNum));
+        anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
         for (size_t iw = 0; iw < width; ++iw) {
             size_t sw = iw * s;
             for (size_t ih = 0; ih < height; ++ih) {
@@ -146,8 +146,8 @@ void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     }
 }
 
-std::vector<ModelRetinaFace::Anchor> ratioEnum(const ModelRetinaFace::Anchor& anchor, const std::vector<int>& ratios) {
-    std::vector<ModelRetinaFace::Anchor> retVal;
+std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
+    std::vector<Anchor> retVal;
     const auto w = anchor.getWidth();
     const auto h = anchor.getHeight();
     const auto xCtr = anchor.getXCenter();
@@ -166,8 +166,8 @@ std::vector<ModelRetinaFace::Anchor> ratioEnum(const ModelRetinaFace::Anchor& an
     return retVal;
 }
 
-std::vector<ModelRetinaFace::Anchor> scaleEnum(const ModelRetinaFace::Anchor& anchor, const std::vector<int>& scales) {
-    std::vector<ModelRetinaFace::Anchor> retVal;
+std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
+    std::vector<Anchor> retVal;
     const auto w = anchor.getWidth();
     const auto h = anchor.getHeight();
     const auto xCtr = anchor.getXCenter();
@@ -184,12 +184,12 @@ std::vector<ModelRetinaFace::Anchor> scaleEnum(const ModelRetinaFace::Anchor& an
     return retVal;
 }
 
-std::vector<ModelRetinaFace::Anchor> generateAnchors(const int baseSize,
+std::vector<Anchor> generateAnchors(const int baseSize,
                                                      const std::vector<int>& ratios,
                                                      const std::vector<int>& scales) {
-    ModelRetinaFace::Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
+    Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
     auto ratioAnchors = ratioEnum(baseAnchor, ratios);
-    std::vector<ModelRetinaFace::Anchor> retVal;
+    std::vector<Anchor> retVal;
 
     for (const auto& ra : ratioAnchors) {
         auto addon = scaleEnum(ra, scales);
@@ -245,11 +245,11 @@ void filterScores(std::vector<float>& scores,
     }
 }
 
-void filterBoxes(std::vector<ModelRetinaFace::Anchor>& boxes,
+void filterBoxes(std::vector<Anchor>& boxes,
                  const std::vector<size_t>& indices,
                  const ov::Tensor& boxesTensor,
                  int anchorNum,
-                 const std::vector<ModelRetinaFace::Anchor>& anchors) {
+                 const std::vector<Anchor>& anchors) {
     const auto& shape = boxesTensor.get_shape();
     const float* boxesPtr = boxesTensor.data<float>();
     const auto boxPredLen = shape[1] / anchorNum;
@@ -279,7 +279,7 @@ void filterLandmarks(std::vector<cv::Point2f>& landmarks,
                      const std::vector<size_t>& indices,
                      const ov::Tensor& landmarksTensor,
                      int anchorNum,
-                     const std::vector<ModelRetinaFace::Anchor>& anchors,
+                     const std::vector<Anchor>& anchors,
                      const float landmarkStd) {
     const auto& shape = landmarksTensor.get_shape();
     const float* landmarksPtr = landmarksTensor.data<float>();
diff --git a/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp b/demos/common/cpp/models/src/detection_model_retinaface_pt.cpp
@@ -197,11 +197,11 @@ std::vector<ModelRetinaFacePT::Box> ModelRetinaFacePT::generatePriorData() {
     return anchors;
 }
 
-std::vector<ModelRetinaFacePT::Rect> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor,
+std::vector<Anchor> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor,
                                                                              const std::vector<size_t>& indicies,
                                                                              int imgWidth,
                                                                              int imgHeight) {
-    std::vector<ModelRetinaFacePT::Rect> rects;
+    std::vector<Anchor> rects;
     rects.reserve(indicies.size());
 
     const auto& shape = boxesTensor.get_shape();
@@ -218,10 +218,10 @@ std::vector<ModelRetinaFacePT::Rect> ModelRetinaFacePT::getFilteredProposals(con
         const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height;
         const float width = prior.width * exp(pRawBox->width * variance[1]);
         const float height = prior.height * exp(pRawBox->height * variance[1]);
-        rects.push_back(Rect{clamp(cX - width / 2, 0.f, 1.f) * imgWidth,
-                             clamp(cY - height / 2, 0.f, 1.f) * imgHeight,
-                             clamp(cX + width / 2, 0.f, 1.f) * imgWidth,
-                             clamp(cY + height / 2, 0.f, 1.f) * imgHeight});
+        rects.push_back(Anchor{clamp(cX - width / 2, 0.f, 1.f) * imgWidth,
+                               clamp(cY - height / 2, 0.f, 1.f) * imgHeight,
+                               clamp(cX + width / 2, 0.f, 1.f) * imgWidth,
+                               clamp(cY + height / 2, 0.f, 1.f) * imgHeight});
     }
 
     return rects;
diff --git a/demos/common/cpp/models/src/detection_model_yolov3_onnx.cpp b/demos/common/cpp/models/src/detection_model_yolov3_onnx.cpp
@@ -39,60 +39,50 @@ ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
                                  const std::vector<std::string>& labels,
                                  const std::string& layout)
     : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
-        interpolationMode = CUBIC;
+        interpolationMode = cv::INTER_CUBIC;
         resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
     }
 
 
 void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     // --------------------------- Configure input & output -------------------------------------------------
-    // --------------------------- Prepare input  ------------------------------------------------------
+    // --------------------------- Prepare inputs ------------------------------------------------------
     const ov::OutputVector& inputs = model->inputs();
     if (inputs.size() != 2) {
         throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
     }
 
-    // Check first image input
-    std::string imageInputName = inputs.begin()->get_any_name();
-    inputsNames.push_back(imageInputName);
-
-    const ov::Shape& imageShape = inputs.begin()->get_shape();
-    const ov::Layout& imageLayout = getInputLayout(inputs.front());
-
-    if (imageShape.size() != 4 && imageShape[ov::layout::channels_idx(imageLayout)] != 3) {
-        throw std::logic_error("Expected 4D image input with 3 channels");
-    }
-
     ov::preprocess::PrePostProcessor ppp(model);
-    ppp.input(imageInputName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
-
-    ppp.input(imageInputName).model().set_layout(imageLayout);
-
-    // Check second info input
-    std::string infoInputName = (++inputs.begin())->get_any_name();
-    inputsNames.push_back(infoInputName);
-
-    const ov::Shape infoShape = (++inputs.begin())->get_shape();
-    const ov::Layout& infoLayout = getInputLayout(inputs.at(1));
-
-    if (infoShape.size() != 2 && infoShape[ov::layout::channels_idx(infoLayout)] != 2) {
-        throw std::logic_error("Expected 2D image info input with 2 channels");
+    inputsNames.reserve(inputs.size());
+    for (auto& input : inputs) {
+        const ov::Shape& currentShape = input.get_shape();
+        std::string currentName = input.get_any_name();
+        const ov::Layout& currentLayout = getInputLayout(input);
+
+        if (currentShape.size() == 4) {
+            if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) {
+                throw std::logic_error("Expected 4D image input with 3 channels");
+            }
+            inputsNames[0] = currentName;
+            netInputWidth = currentShape[ov::layout::width_idx(currentLayout)];
+            netInputHeight = currentShape[ov::layout::height_idx(currentLayout)];
+            ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+        } else if (currentShape.size() == 2) {
+            if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) {
+                throw std::logic_error("Expected 2D image info input with 2 channels");
+            }
+            inputsNames[1] = currentName;
+            ppp.input(currentName).tensor().set_element_type(ov::element::i32);
+        }
+        ppp.input(currentName).model().set_layout(currentLayout);
     }
 
-    ppp.input(infoInputName).tensor().set_element_type(ov::element::i32);
-
-    ppp.input(infoInputName).model().set_layout(infoLayout);
-
-    // --------------------------- Reading image input parameters -------------------------------------------
-    netInputWidth = imageShape[ov::layout::width_idx(imageLayout)];
-    netInputHeight = imageShape[ov::layout::height_idx(imageLayout)];
-
-    // --------------------------- Prepare output  -----------------------------------------------------
-    if (model->outputs().size() != 3) {
+    // --------------------------- Prepare outputs -----------------------------------------------------
+    const ov::OutputVector& outputs = model->outputs();
+    if (outputs.size() != 3) {
         throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
     }
 
-    const ov::OutputVector& outputs = model->outputs();
     for (auto& output : outputs) {
         const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
         std::string currentName = output.get_any_name();
@@ -129,7 +119,7 @@ std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData&
     return ImageModel::preprocess(inputData, request);
 }
 
-float ModelYoloV3ONNX::getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
+float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
     const float* scoresPtr = scoresTensor.data<float>();
     const auto shape = scoresTensor.get_shape();
     int N = shape[2];
diff --git a/demos/common/cpp/models/src/detection_model_yolox.cpp b/demos/common/cpp/models/src/detection_model_yolox.cpp
@@ -71,7 +71,7 @@ void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     inputsNames.push_back(input.get_any_name());
     netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
     netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
-    prepareGridsAndStrides();
+    setStridesGrids();
 
     // --------------------------- Prepare output  -----------------------------------------------------
     if (model->outputs().size() != 1) {
@@ -89,7 +89,7 @@ void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
     model = ppp.build();
 }
 
-void ModelYoloX::prepareGridsAndStrides() {
+void ModelYoloX::setStridesGrids() {
     std::vector<size_t> strides = {8, 16, 32};
     std::vector<size_t> hsizes(3);
     std::vector<size_t> wsizes(3);
diff --git a/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp b/demos/common/cpp/models/src/hpe_model_associative_embedding.cpp
diff --git a/demos/common/cpp/models/src/hpe_model_openpose.cpp b/demos/common/cpp/models/src/hpe_model_openpose.cpp
diff --git a/demos/common/cpp/utils/include/utils/image_utils.h b/demos/common/cpp/utils/include/utils/image_utils.h
diff --git a/demos/common/cpp/utils/src/image_utils.cpp b/demos/common/cpp/utils/src/image_utils.cpp