revert mask_rcnn_demo

vladimir-dudnik · vladimir-dudnik · commit 633740def7b0 · 2022-02-05T00:44:40.000+03:00
diff --git a/demos/mask_rcnn_demo/cpp/main.cpp b/demos/mask_rcnn_demo/cpp/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -7,28 +7,26 @@
  * @file mask_rcnn_demo/main.cpp
  * @example mask_rcnn_demo/main.cpp
  */
-#include <algorithm>
-#include <iomanip>
+#include <gflags/gflags.h>
 #include <iostream>
 #include <memory>
 #include <map>
+#include <algorithm>
 #include <string>
 #include <vector>
+#include <iomanip>
 
-#include "openvino/openvino.hpp"
+#include <inference_engine.hpp>
 
-#include "gflags/gflags.h"
-#include "utils/args_helper.hpp"
-#include "utils/ocv_common.hpp"
-#include "utils/performance_metrics.hpp"
-#include "utils/slog.hpp"
+#include <utils/args_helper.hpp>
+#include <utils/ocv_common.hpp>
+#include <utils/performance_metrics.hpp>
+#include <utils/slog.hpp>
 
 #include "mask_rcnn_demo.h"
 
-using namespace ov::preprocess;
-
-bool ParseAndCheckCommandLine(int argc, char* argv[]) {
-    // Parsing and validation of input args
+bool ParseAndCheckCommandLine(int argc, char *argv[]) {
+    // ---------------------------Parsing and validation of input args--------------------------------------
     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
     if (FLAGS_h) {
         showUsage();
@@ -47,77 +45,84 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
     return true;
 }
 
-int main(int argc, char* argv[]) {
+int main(int argc, char *argv[]) {
     try {
         PerformanceMetrics metrics;
 
-        // Parsing and validation of input args
+        // ------------------------------ Parsing and validation of input args ---------------------------------
         if (!ParseAndCheckCommandLine(argc, argv)) {
             return 0;
         }
 
-        // This vector stores paths to the processed images
+        /** This vector stores paths to the processed images **/
         std::vector<std::string> imagePaths;
         parseInputFilesArguments(imagePaths);
-        if (imagePaths.empty())
-            throw std::logic_error("No suitable images were found");
+        if (imagePaths.empty()) throw std::logic_error("No suitable images were found");
+        // -----------------------------------------------------------------------------------------------------
+
+        // ---------------------Load inference engine------------------------------------------------
+        slog::info << *InferenceEngine::GetInferenceEngineVersion() << slog::endl;
+        InferenceEngine::Core ie;
 
-        // Load inference engine
-        slog::info << ov::get_openvino_version() << slog::endl;
-        ov::Core core;
+        if (!FLAGS_l.empty()) {
+            // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension
+            auto extension_ptr = std::make_shared<InferenceEngine::Extension>(FLAGS_l);
+            ie.AddExtension(extension_ptr, "CPU");
+        }
+        if (!FLAGS_c.empty()) {
+            // clDNN Extensions are loaded from an .xml description and OpenCL kernel files
+            ie.SetConfig({{InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, "CPU");
+        }
 
-        // Load network (Generated xml/bin files)
+        // -----------------------------------------------------------------------------------------------------
 
-        // Read network model
-        slog::info << "Reading model: " << FLAGS_m << slog::endl;
-        std::shared_ptr<ov::Model> model = core.read_model(FLAGS_m);
-        logBasicModelInfo(model);
+        // --------------------Load network (Generated xml/bin files)-------------------------------------------
 
-        // Prepare input blobs
+        /** Read network model **/
+        auto network = ie.ReadNetwork(FLAGS_m);
 
-        // Taking information about all topology inputs
-        ov::OutputVector inputs = model->inputs();
-        ov::OutputVector outputs = model->outputs();
+        // add DetectionOutput layer as output so we can get detected boxes and their probabilities
+        network.addOutput(FLAGS_detection_output_name.c_str(), 0);
+        // -----------------------------------------------------------------------------------------------------
 
-        if(inputs.size() != 2 || outputs.size() != 2)
-            throw std::logic_error("Expected network with 2 inputs and 2 outputs");
+        // -----------------------------Prepare input blobs-----------------------------------------------------
 
-        size_t modelBatchSize = 0;
-        size_t modelInputHeight = 0;
-        size_t modelInputWidth = 0;
+        /** Taking information about all topology inputs **/
+        InferenceEngine::InputsDataMap inputInfo(network.getInputsInfo());
 
-        const ov::Layout layout_nchw{ "NCHW" };
+        std::string imageInputName;
 
-        // network dimensions for image input
-        auto it = std::find_if(inputs.begin(), inputs.end(), [](const ov::Output<ov::Node>& input) {return input.get_shape().size() == 4;});
-        if (it != inputs.end()) {
-            // ov::set_batch() should know input layout
-            model->get_parameters()[it->get_index()]->set_layout("NCHW");
-            modelBatchSize = it->get_shape()[ov::layout::batch_idx(layout_nchw)];
-            modelInputHeight = it->get_shape()[ov::layout::height_idx(layout_nchw)];
-            modelInputWidth = it->get_shape()[ov::layout::width_idx(layout_nchw)];
-        } else {
-            throw std::logic_error("Couldn't find model image input");
+        for (const auto & inputInfoItem : inputInfo) {
+            if (inputInfoItem.second->getTensorDesc().getDims().size() == 4) {  // first input contains images
+                imageInputName = inputInfoItem.first;
+                inputInfoItem.second->setPrecision(InferenceEngine::Precision::U8);
+            } else if (inputInfoItem.second->getTensorDesc().getDims().size() == 2) {  // second input contains image info
+                inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32);
+            } else {
+                throw std::logic_error("Unsupported input shape with size = " + std::to_string(inputInfoItem.second->getTensorDesc().getDims().size()));
+            }
         }
 
-        // Collect images
+        /** network dimensions for image input **/
+        const InferenceEngine::TensorDesc& inputDesc = inputInfo[imageInputName]->getTensorDesc();
+        IE_ASSERT(inputDesc.getDims().size() == 4);
+        size_t netBatchSize = getTensorBatch(inputDesc);
+        size_t netInputHeight = getTensorHeight(inputDesc);
+        size_t netInputWidth = getTensorWidth(inputDesc);
+
+        /** Collect images **/
         std::vector<cv::Mat> images;
 
-        if (modelBatchSize > imagePaths.size()) {
-            slog::warn << "Model batch size is greater than number of images (" << imagePaths.size() <<
+        if (netBatchSize > imagePaths.size()) {
+            slog::warn << "Network batch size is greater than number of images (" << imagePaths.size() <<
                        "), some input files will be duplicated" << slog::endl;
-        } else if (modelBatchSize < imagePaths.size()) {
-            modelBatchSize = imagePaths.size();
-            slog::warn << "Model batch size is less than number of images (" << imagePaths.size() <<
-                       "), model will be reshaped" << slog::endl;
+        } else if (netBatchSize < imagePaths.size()) {
+            slog::warn << "Network batch size is less than number of images (" << imagePaths.size() <<
+                       "), some input files will be ignored" << slog::endl;
         }
 
-        // set batch size
-        ov::set_batch(model, modelBatchSize);
-        slog::info << "\tBatch size is set to " << modelBatchSize << slog::endl;
-
         auto startTime = std::chrono::steady_clock::now();
-        for (size_t i = 0, inputIndex = 0; i < modelBatchSize; i++, inputIndex++) {
+        for (size_t i = 0, inputIndex = 0; i < netBatchSize; i++, inputIndex++) {
             if (inputIndex >= imagePaths.size()) {
                 inputIndex = 0;
             }
@@ -131,70 +136,82 @@ int main(int argc, char* argv[]) {
 
             images.push_back(image);
         }
-        if (images.empty())
-            throw std::logic_error("Valid input images were not found!");
+        if (images.empty()) throw std::logic_error("Valid input images were not found!");
+
+        // -----------------------------------------------------------------------------------------------------
+
+        // ---------------------------Prepare output blobs------------------------------------------------------
+        InferenceEngine::OutputsDataMap outputInfo(network.getOutputsInfo());
+        for (auto & item : outputInfo) {
+            item.second->setPrecision(InferenceEngine::Precision::FP32);
+        }
+
+        // -----------------------------------------------------------------------------------------------------
 
-        // Load model to the device
-        ov::CompiledModel compiled_model = core.compile_model(model, FLAGS_d);
-        logCompiledModelInfo(compiled_model, FLAGS_m, FLAGS_d);
+        // -------------------------Load model to the device----------------------------------------------------
+        auto executableNetwork = ie.LoadNetwork(network, FLAGS_d);
+        logExecNetworkInfo(executableNetwork, FLAGS_m, FLAGS_d);
+        slog::info << "\tBatch size is set to " << netBatchSize << slog::endl;
 
-        // Create Infer Request
-        ov::InferRequest infer_request = compiled_model.create_infer_request();
+        // -------------------------Create Infer Request--------------------------------------------------------
+        auto infer_request = executableNetwork.CreateInferRequest();
 
-        // Set input data
-        // Iterate over all the input blobs
-        for (size_t idx = 0; idx < inputs.size(); idx++) {
-            ov::Tensor tensor = infer_request.get_input_tensor(idx);
-            ov::Shape shape = tensor.get_shape();
+        // -----------------------------------------------------------------------------------------------------
 
-            if (shape.size() == 4) {
+        // -------------------------------Set input data--------------------------------------------------------
+        /** Iterate over all the input blobs **/
+        for (const auto & inputInfoItem : inputInfo) {
+            InferenceEngine::Blob::Ptr input = infer_request.GetBlob(inputInfoItem.first);
+
+            /** Fill first input tensor with images. First b channel, then g and r channels **/
+            if (inputInfoItem.second->getTensorDesc().getDims().size() == 4) {
+                /** Iterate over all input images **/
                 for (size_t image_id = 0; image_id < images.size(); ++image_id)
-                    matToTensor(images[image_id], tensor, image_id);
+                    matToBlob(images[image_id], input, image_id);
             }
 
-            if (shape.size() == 2) {
-                float* data = tensor.data<float>();
-                data[0] = static_cast<float>(modelInputHeight); // height
-                data[1] = static_cast<float>(modelInputWidth); // width
+            /** Fill second input tensor with image info **/
+            if (inputInfoItem.second->getTensorDesc().getDims().size() == 2) {
+                InferenceEngine::LockedMemory<void> inputMapped =
+                    InferenceEngine::as<InferenceEngine::MemoryBlob>(input)->wmap();
+                auto data = inputMapped.as<float *>();
+                data[0] = static_cast<float>(netInputHeight);  // height
+                data[1] = static_cast<float>(netInputWidth);  // width
                 data[2] = 1;
             }
         }
 
-        // Do inference
-        infer_request.infer();
+        // -----------------------------------------------------------------------------------------------------
 
-        // Postprocess output blobs
-        float* do_data = nullptr;
-        float* masks_data = nullptr;
 
-        size_t BOX_DESCRIPTION_SIZE = 0;
+        // ----------------------------Do inference-------------------------------------------------------------
+        infer_request.Infer();
+        // -----------------------------------------------------------------------------------------------------
 
-        size_t BOXES = 0;
-        size_t C = 0;
-        size_t H = 0;
-        size_t W = 0;
+        // ---------------------------Postprocess output blobs--------------------------------------------------
+        const auto do_blob = infer_request.GetBlob(FLAGS_detection_output_name.c_str());
+        InferenceEngine::LockedMemory<const void> doBlobMapped =
+            InferenceEngine::as<InferenceEngine::MemoryBlob>(do_blob)->rmap();
+        const auto do_data  = doBlobMapped.as<float*>();
 
-        for (size_t idx = 0; idx < outputs.size(); idx++) {
-            ov::Tensor tensor = infer_request.get_output_tensor(idx);
-            ov::Shape shape = tensor.get_shape();
-            size_t dims = shape.size();
-            if (dims == 2) {
-                do_data = tensor.data<float>();
-                // amount of elements in each detected box description (batch, label, prob, x1, y1, x2, y2)
-                BOX_DESCRIPTION_SIZE = shape[1];
-            }
-            if (dims == 4) {
-                masks_data = tensor.data<float>();
-                BOXES = shape[ov::layout::batch_idx(layout_nchw)];
-                C = shape[ov::layout::channels_idx(layout_nchw)];
-                H = shape[ov::layout::height_idx(layout_nchw)];
-                W = shape[ov::layout::width_idx(layout_nchw)];
-            }
-        }
+        const auto masks_blob = infer_request.GetBlob(FLAGS_masks_name.c_str());
+        InferenceEngine::LockedMemory<const void> masksBlobMapped =
+            InferenceEngine::as<InferenceEngine::MemoryBlob>(masks_blob)->rmap();
+        const auto masks_data = masksBlobMapped.as<float*>();
 
         const float PROBABILITY_THRESHOLD = 0.2f;
-        // threshold used to determine whether mask pixel corresponds to object or to background
-        const float MASK_THRESHOLD = 0.5f;
+        const float MASK_THRESHOLD = 0.5f;  // threshold used to determine whether mask pixel corresponds to object or to background
+        // amount of elements in each detected box description (batch, label, prob, x1, y1, x2, y2)
+        IE_ASSERT(do_blob->getTensorDesc().getDims().size() == 2);
+        size_t BOX_DESCRIPTION_SIZE = do_blob->getTensorDesc().getDims().back();
+
+        const InferenceEngine::TensorDesc& masksDesc = masks_blob->getTensorDesc();
+        IE_ASSERT(masksDesc.getDims().size() == 4);
+        size_t BOXES = getTensorBatch(masksDesc);
+        size_t C = getTensorChannels(masksDesc);
+        size_t H = getTensorHeight(masksDesc);
+        size_t W = getTensorWidth(masksDesc);
+
 
         size_t box_stride = W * H * C;
 
@@ -205,28 +222,22 @@ int main(int argc, char* argv[]) {
             output_images.push_back(img.clone());
         }
 
-        // Iterating over all boxes
+        /** Iterating over all boxes **/
         for (size_t box = 0; box < BOXES; ++box) {
             float* box_info = do_data + box * BOX_DESCRIPTION_SIZE;
             auto batch = static_cast<int>(box_info[0]);
-
             if (batch < 0)
                 break;
-            if (batch >= static_cast<int>(modelBatchSize))
+            if (batch >= static_cast<int>(netBatchSize))
                 throw std::logic_error("Invalid batch ID within detection output box");
-
             float prob = box_info[2];
-
             float x1 = std::min(std::max(0.0f, box_info[3] * images[batch].cols), static_cast<float>(images[batch].cols));
             float y1 = std::min(std::max(0.0f, box_info[4] * images[batch].rows), static_cast<float>(images[batch].rows));
             float x2 = std::min(std::max(0.0f, box_info[5] * images[batch].cols), static_cast<float>(images[batch].cols));
             float y2 = std::min(std::max(0.0f, box_info[6] * images[batch].rows), static_cast<float>(images[batch].rows));
-
             int box_width = static_cast<int>(x2 - x1);
             int box_height = static_cast<int>(y2 - y1);
-
-            auto class_id = static_cast<size_t>(box_info[1] + 1e-6);
-
+            auto class_id = static_cast<size_t>(box_info[1] + 1e-6f);
             if (prob > PROBABILITY_THRESHOLD && box_width > 0 && box_height > 0) {
                 size_t color_index = class_color.emplace(class_id, class_color.size()).first->second;
                 auto& color = CITYSCAPES_COLORS[color_index % arraySize(CITYSCAPES_COLORS)];
@@ -246,13 +257,11 @@ int main(int argc, char* argv[]) {
                     cv::Scalar(color.blue(), color.green(), color.red()));
                 roi_input_img.copyTo(uchar_resized_mask, resized_mask_mat <= MASK_THRESHOLD);
 
-                cv::addWeighted(uchar_resized_mask, alpha, roi_input_img, 1.0 - alpha, 0.0f, roi_input_img);
+                cv::addWeighted(uchar_resized_mask, alpha, roi_input_img, 1.0f - alpha, 0.0f, roi_input_img);
                 cv::rectangle(output_images[batch], roi, cv::Scalar(0, 0, 1), 1);
             }
         }
-
         metrics.update(startTime);
-
         for (size_t i = 0; i < output_images.size(); i++) {
             std::string imgName = "out" + std::to_string(i) + ".png";
             cv::imwrite(imgName, output_images[i]);
diff --git a/demos/mask_rcnn_demo/cpp/mask_rcnn_demo.h b/demos/mask_rcnn_demo/cpp/mask_rcnn_demo.h
@@ -1,13 +1,13 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <iostream>
 #include <string>
 #include <vector>
-#include "gflags/gflags.h"
+#include <gflags/gflags.h>
+#include <iostream>
 
 static const char help_message[] = "Print a usage message.";
 static const char image_message[] = "Required. Path to a .bmp image.";