openvinotoolkit
diff --git a/‎ci/requirements-conversion.txt
Lines changed: 1 addition & 0 deletions b/‎ci/requirements-conversion.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎demos/CMakeLists.txt
Lines changed: 9 additions & 0 deletions b/‎demos/CMakeLists.txt
Lines changed: 9 additions & 0 deletions
diff --git a/‎demos/classification_demo/cpp/README.md
Lines changed: 1 addition & 1 deletion b/‎demos/classification_demo/cpp/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎demos/classification_demo/cpp/grid_mat.hpp
Lines changed: 3 additions & 0 deletions b/‎demos/classification_demo/cpp/grid_mat.hpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎demos/classification_demo/cpp/main.cpp
Lines changed: 9 additions & 0 deletions b/‎demos/classification_demo/cpp/main.cpp
Lines changed: 9 additions & 0 deletions
diff --git a/‎demos/common/cpp/models/include/models/detection_model_retinaface_pt.h
Lines changed: 76 additions & 0 deletions b/‎demos/common/cpp/models/include/models/detection_model_retinaface_pt.h
Lines changed: 76 additions & 0 deletions
diff --git a/‎demos/common/cpp/models/include/models/detection_model_yolo.h
Lines changed: 21 additions & 5 deletions b/‎demos/common/cpp/models/include/models/detection_model_yolo.h
Lines changed: 21 additions & 5 deletions
@@ -52,6 +52,7 @@ keras-preprocessing==1.1.2
     # via tensorflow
 markdown==3.3.4
     # via tensorboard
+mxnet==1.2.0 ; sys_platform == 'win32'
 mxnet==1.7.0.post2 ; sys_platform != "win32"
     # via -r ${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/requirements_mxnet.txt
 networkx==2.5
 
@@ -60,6 +60,15 @@ else()
     set(COMPILER_IS_GCC_LIKE FALSE)
 endif()
 
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
+  set(AARCH64 ON)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
+  set(ARM ON)
+endif()
+if(ARM AND NOT CMAKE_CROSSCOMPILING)
+    add_compile_options(-march=armv7-a)
+endif()
+
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
@@ -6,7 +6,7 @@ The demo visualize OpenVINO performance on inference of neural networks for imag
 
 On startup, the application reads command line parameters and loads a classification network to the Inference Engine for execution. It might take some time for demo to read all input images. Then the demo performs inference to classify the images and places them on grid.
 
-The demo starts in "Testing mode" with fixed grid size. After calculating the average FPS result, it will switch to normal mode and grid will be readjusted depending on model performance. Bigger grid means higher performance.
+The demo starts in "Testing mode" with fixed grid size. After calculating the average FPS result, it will switch to normal mode and grid will be readjusted depending on model performance. Bigger grid means higher performance. You can repeat testing by pressing "Space" or "R" button.
 
 When "ground truth" data applied, the color coding for the text, drawn above each image, shows whether the classification was correct: green means correct class prediction, red means wrong.
 
 
@@ -31,6 +31,9 @@ class GridMat {
                      currSourceId{0} {
         cv::Size size(static_cast<int>(std::round(sqrt(1. * targetFPS * aspectRatio.width / aspectRatio.height))),
                       static_cast<int>(std::round(sqrt(1. * targetFPS * aspectRatio.height / aspectRatio.width))));
+        if (size.width == 0 || size.height == 0) {
+            size = {1, 1};  // set minimum possible grid size
+        }
         int minCellSize = std::min(maxDisp.width / size.width, maxDisp.height / size.height);
         cellSize = cv::Size(minCellSize, minCellSize);
 
 
@@ -311,6 +311,15 @@ int main(int argc, char *argv[]) {
                     if (27 == key || 'q' == key || 'Q' == key) {  // Esc
                         keepRunning = false;
                     }
+                    else if (32 == key || 'r' == key || 'R' == key) {  // press space or r to restart testing if needed
+                        isTestMode = true;
+                        framesNum = 0;
+                        framesNumOnCalculationStart = 0;
+                        correctPredictionsCount = 0;
+                        accuracy = 0;
+                        elapsedSeconds = std::chrono::steady_clock::duration(0);
+                        startTime = std::chrono::steady_clock::now();
+                    }
                     else {
                         presenter.handleKey(key);
                     }
 
@@ -0,0 +1,76 @@
+/*
+// Copyright (C) 2020-2021 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <vector>
+#include "detection_model.h"
+#include <utils/nms.hpp>
+#include <string>
+
+class ModelRetinaFacePT : public DetectionModel {
+public:
+    struct Box {
+        float cX;
+        float cY;
+        float width;
+        float height;
+    };
+
+    struct Rect {
+        float left;
+        float top;
+        float right;
+        float bottom;
+
+        float getWidth() const { return (right - left) + 1.0f; }
+        float getHeight() const { return (bottom - top) + 1.0f; }
+        float getXCenter() const { return left + (getWidth() - 1.0f) / 2.0f; }
+        float getYCenter() const { return top + (getHeight() - 1.0f) / 2.0f; }
+    };
+
+    /// Loads model and performs required initialization
+    /// @param model_name name of model to load
+    /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+    /// Any detected object with confidence lower than this threshold will be ignored.
+    /// @param useAutoResize - if true, image will be resized by IE.
+    /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range.
+    ModelRetinaFacePT(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, float boxIOUThreshold);
+    std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+    size_t landmarksNum;
+    const float boxIOUThreshold;
+    float variance[2] = { 0.1f, 0.2f };
+
+    enum EOutputType {
+        OT_BBOX,
+        OT_SCORES,
+        OT_LANDMARK,
+        OT_MAX
+    };
+
+    std::vector<ModelRetinaFacePT::Box> priors;
+
+    std::vector<size_t> filterByScore(const InferenceEngine::MemoryBlob::Ptr& rawData, const float confidenceThreshold);
+    std::vector<float> getFilteredScores(const InferenceEngine::MemoryBlob::Ptr& rawData, const std::vector<size_t>& indicies);
+    std::vector<cv::Point2f> getFilteredLandmarks(const InferenceEngine::MemoryBlob::Ptr& rawData,
+        const std::vector<size_t>& indicies, int imgWidth, int imgHeight);
+    std::vector<ModelRetinaFacePT::Box> generatePriorData();
+    std::vector<ModelRetinaFacePT::Rect> getFilteredProposals(const InferenceEngine::MemoryBlob::Ptr& rawData,
+        const std::vector<size_t>& indicies, int imgWidth, int imgHeight);
+
+    void prepareInputsOutputs(InferenceEngine::CNNNetwork& cnnNetwork) override;
+};
@@ -26,19 +26,29 @@ namespace ngraph {
     }
 }
 
-class ModelYolo3 : public DetectionModel {
+class ModelYolo : public DetectionModel {
 protected:
     class Region {
     public:
         int num = 0;
         int classes = 0;
         int coords = 0;
         std::vector<float> anchors;
+        int outputWidth = 0;
+        int outputHeight = 0;
 
         Region(const std::shared_ptr<ngraph::op::RegionYolo>& regionYolo);
+        Region(int classes, int coords, const std::vector<float>& anchors, const std::vector<int64_t>& masks, int outputWidth, int outputHeight);
     };
 
 public:
+    enum YoloVersion {
+        YOLO_V1V2,
+        YOLO_V3,
+        YOLO_V4,
+        YOLO_V4_TINY
+    };
+
     /// Constructor.
     /// @param modelFileName name of model to load
     /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
@@ -51,22 +61,28 @@ class ModelYolo3 : public DetectionModel {
     /// during postprocessing (only one of them should stay). The default value is 0.5
     /// @param labels - array of labels for every class. If this array is empty or contains less elements
     /// than actual classes number, default "Label #N" will be shown for missing items.
-    ModelYolo3(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize,
-        bool useAdvancedPostprocessing = true, float boxIOUThreshold = 0.5, const std::vector<std::string>& labels = std::vector<std::string>());
+    /// @param anchors - vector of anchors coordinates. Required for YOLOv4, for other versions it may be omitted.
+    /// @param masks - vector of masks values. Required for YOLOv4, for other versions it may be omitted.
+    ModelYolo(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize,
+        bool useAdvancedPostprocessing = true, float boxIOUThreshold = 0.5, const std::vector<std::string>& labels = std::vector<std::string>(),
+        const std::vector<float>& anchors = std::vector<float>(), const std::vector<int64_t>& masks = std::vector<int64_t>());
 
     std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
 
 protected:
     void prepareInputsOutputs(InferenceEngine::CNNNetwork& cnnNetwork) override;
 
-    void parseYOLOV3Output(const std::string& output_name, const InferenceEngine::Blob::Ptr& blob,
+    void parseYOLOOutput(const std::string& output_name, const InferenceEngine::Blob::Ptr& blob,
         const unsigned long resized_im_h, const unsigned long resized_im_w, const unsigned long original_im_h,
         const unsigned long original_im_w, std::vector<DetectedObject>& objects);
 
-    static int calculateEntryIndex(int side, int lcoords, int lclasses, int location, int entry);
+    static int calculateEntryIndex(int entriesNum, int lcoords, int lclasses, int location, int entry);
     static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2);
 
     std::map<std::string, Region> regions;
     double boxIOUThreshold;
     bool useAdvancedPostprocessing;
+    YoloVersion yoloVersion;
+    const std::vector<float> presetAnchors;
+    const std::vector<int64_t> presetMasks;
 };