openvinotoolkit
diff --git a/‎demos/common/cpp/utils/include/utils/input_wrappers.hpp
Lines changed: 1 addition & 1 deletion b/‎demos/common/cpp/utils/include/utils/input_wrappers.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎demos/text_detection_demo/cpp/README.md
Lines changed: 2 additions & 4 deletions b/‎demos/text_detection_demo/cpp/README.md
Lines changed: 2 additions & 4 deletions
diff --git a/‎demos/text_detection_demo/cpp/include/cnn.hpp
Lines changed: 30 additions & 52 deletions b/‎demos/text_detection_demo/cpp/include/cnn.hpp
Lines changed: 30 additions & 52 deletions
diff --git a/‎demos/text_detection_demo/cpp/include/text_detection.hpp
Lines changed: 24 additions & 5 deletions b/‎demos/text_detection_demo/cpp/include/text_detection.hpp
Lines changed: 24 additions & 5 deletions
diff --git a/‎demos/text_detection_demo/cpp/include/text_recognition.hpp
Lines changed: 42 additions & 4 deletions b/‎demos/text_detection_demo/cpp/include/text_recognition.hpp
Lines changed: 42 additions & 4 deletions
@@ -31,7 +31,7 @@ class IInputSource {
     std::mutex sourceLock;
 };
 
-class InputChannel: public std::enable_shared_from_this<InputChannel> {  // note: public inheritance
+class InputChannel: public std::enable_shared_from_this<InputChannel> { // note: public inheritance
 public:
     InputChannel(const InputChannel&) = delete;
     InputChannel& operator=(const InputChannel&) = delete;
 
@@ -11,8 +11,8 @@ The demo shows an example of using neural networks to detect and recognize print
 * `text-recognition-0014`, which is a recognition network for recognizing text. You should add option `-tr_pt_first` and specify output layer name via `-tr_o_blb_nm` option for this model (see model [description](../../../models/intel/text-recognition-0014/README.md) for details).
 * `text-recognition-0015`, which is a recognition network for recognizing text. You should add options `-tr_pt_first`, `-m_tr_ss "?0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"` (supported symbols set), `-tr_o_blb_nm "logits"` (to specify output name) and `-dt simple` (to specify decoder type). You can also specify `-lower` option to convert predicted text to lower-case. See model [description](../../../models/intel/text-recognition-0015/README.md) for details.
 * `text-recognition-0016`, which is a recognition network for recognizing text. You should add options `-tr_pt_first`, `-m_tr_ss "?0123456789abcdefghijklmnopqrstuvwxyz"` (supported symbols set), `-tr_o_blb_nm "logits"` (to specify output name) and `-dt simple` (to specify decoder type). You can also specify `-lower` option to convert predicted text to lower-case. See model [description](../../../models/intel/text-recognition-0016/README.md) for details.
-* `text-recognition-resnet-fc`, which is a recognition network for recognizing text. You should add option `-tr_pt_first`.
-* `handwritten-score-recognition-0001`, which is a recognition network for recognizing handwritten score marks like `<digit>` or `<digit>.<digit>`.
+* `text-recognition-resnet-fc`, which is a recognition network for recognizing text. You should add option `-tr_pt_first` and `-dt simple` (to specify decoder type).
+* `handwritten-score-recognition-0003`, which is a recognition network for recognizing handwritten score marks like `<digit>` or `<digit>.<digit>`. You should add options `-m_tr_ss "0123456789._"` (supported symbols set) and `-dt ctc` (to specify decoder type).
 * `vitstr-small-patch16-224`, which is a recognition network for recognizing text. You should add options `-tr_pt_first`, `-m_tr_ss <path to vocab file>/.vocab.txt` (supported symbols set), `-dt simple` (to specify decoder type), `-start_index 1` (to process output from provided index) and `-pad " "` (to use specific pad symbol).
 
 ## How It Works
@@ -96,8 +96,6 @@ Options:
     -max_rect_num "<value>"        Optional. Maximum number of rectangles to recognize. If it is negative, number of rectangles to recognize is not limited.
     -d_td "<device>"               Optional. Specify the target device for the Text Detection model to infer on (the list of available devices is shown below). The demo will look for a suitable plugin for a specified device. By default, it is CPU.
     -d_tr "<device>"               Optional. Specify the target device for the Text Recognition model to infer on (the list of available devices is shown below). The demo will look for a suitable plugin for a specified device. By default, it is CPU.
-    -l "<absolute_path>"           Optional. Absolute path to a shared library with the CPU kernels implementation for custom layers.
-    -c "<absolute_path>"           Optional. Absolute path to the GPU kernels implementation for custom layers.
     -no_show                       Optional. If it is true, then detected text will not be shown on image frame. By default, it is false.
     -r                             Optional. Output Inference results as raw values.
     -u                             Optional. List of monitors to show initially.
 
@@ -1,65 +1,43 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2019-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
+#include <map>
 #include <string>
 #include <vector>
 
-#include <inference_engine.hpp>
 #include <opencv2/opencv.hpp>
-#include <utils/ocv_common.hpp>
 
-class Cnn {
-  public:
-    Cnn(const std::string &model_path, const  std::string& model_type, InferenceEngine::Core & ie, const std::string & deviceName,
-              const cv::Size &new_input_resolution = cv::Size());
-
-    virtual InferenceEngine::BlobMap Infer(const cv::Mat &frame);
-
-    size_t ncalls() const {return ncalls_;}
-    double time_elapsed() const {return time_elapsed_;}
-    const cv::Size& input_size() const {return input_size_;}
-    const std::string model_type;
-  protected:
-    cv::Size input_size_;
-    int channels_;
-    std::string input_name_;
-    InferenceEngine::InferRequest infer_request_;
-    std::vector<std::string> output_names_;
+#include "openvino/openvino.hpp"
 
-    double time_elapsed_;
-    size_t ncalls_;
-};
 
-class EncoderDecoderCNN : public Cnn {
-  public:
-    EncoderDecoderCNN(std::string model_path, std::string model_type,
-                      InferenceEngine::Core &ie, const std::string &deviceName,
-                      const std::string &out_enc_hidden_name,
-                      const std::string &out_dec_hidden_name,
-                      const std::string &in_dec_hidden_name,
-                      const std::string &features_name,
-                      const std::string &in_dec_symbol_name,
-                      const std::string &out_dec_symbol_name,
-                      const std::string &logits_name,
-                      size_t end_token
-                      );
-    InferenceEngine::BlobMap Infer(const cv::Mat &frame) override;
-  private:
-    InferenceEngine::InferRequest infer_request_decoder_;
-    std::string features_name_;
-    std::string out_enc_hidden_name_;
-    std::string out_dec_hidden_name_;
-    std::string in_dec_hidden_name_;
-    std::string in_dec_symbol_name_;
-    std::string out_dec_symbol_name_;
-    std::string logits_name_;
-    size_t end_token_;
-    void check_net_names(const InferenceEngine::OutputsDataMap &output_info_decoder,
-                         const InferenceEngine::InputsDataMap &input_info_decoder
-                         ) const;
+class Cnn {
+public:
+    Cnn(const std::string& modelPath, const std::string& modelType, const std::string& deviceName,
+        ov::Core& core, const cv::Size& new_input_resolution = cv::Size());
+
+    virtual std::map<std::string, ov::Tensor> Infer(const cv::Mat& frame) = 0;
+
+    size_t ncalls() const { return m_ncalls; }
+    double time_elapsed() const { return m_time_elapsed; }
+    const cv::Size& input_size() const { return m_input_size; }
+
+protected:
+    int m_channels;
+    cv::Size m_input_size;
+    cv::Size m_new_input_resolution;
+    const std::string m_modelPath;
+    const std::string m_modelType;
+    const std::string m_deviceName;
+    std::string m_input_name;
+    std::vector<std::string> m_output_names;
+    ov::Layout m_modelLayout;
+    ov::Core& m_core;
+    ov::InferRequest m_infer_request;
+    std::shared_ptr<ov::Model> m_model;
+
+    double m_time_elapsed;
+    size_t m_ncalls;
 };
-
-class DecoderNotFound {};
@@ -1,14 +1,33 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2019-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
+#include <map>
 #include <vector>
+#include <string>
 
-#include <inference_engine.hpp>
 #include <opencv2/opencv.hpp>
 
-std::vector<cv::RotatedRect> postProcess(const InferenceEngine::BlobMap &blobs, const cv::Size& image_size,
-                                         const cv::Size& image_shape, float cls_conf_threshold,
-                                         float link_conf_threshold);
+#include "openvino/openvino.hpp"
+
+#include "cnn.hpp"
+
+class TextDetector : public Cnn {
+public:
+    TextDetector(const std::string& model_path, const std::string& model_type, const std::string& deviceName,
+        ov::Core& core, const cv::Size& new_input_resolution = cv::Size()) :
+        Cnn(model_path, model_type, deviceName, core) {};
+
+    std::map<std::string, ov::Tensor> Infer(const cv::Mat& frame) override;
+
+    std::vector<cv::RotatedRect> postProcess(
+        const std::map<std::string, ov::runtime::Tensor>& output_tensors, const cv::Size& image_size,
+        const cv::Size& image_shape, float cls_conf_threshold, float link_conf_threshold);
+private:
+    cv::Mat decodeImageByJoin(
+        const std::vector<float>& cls_data, const ov::Shape& cls_data_shape,
+        const std::vector<float>& link_data, const ov::Shape& link_data_shape,
+        float cls_conf_threshold, float link_conf_threshold);
+};
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2021 Intel Corporation
+// Copyright (C) 2019-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -7,6 +7,44 @@
 #include <string>
 #include <vector>
 
-std::string CTCGreedyDecoder(const std::vector<float> &data, const std::string& alphabet, char pad_symbol, double *conf);
-std::string CTCBeamSearchDecoder(const std::vector<float> &data, const std::string& alphabet, char pad_symbol, double *conf, int bandwidth);
-std::string SimpleDecoder(const std::vector<float> &data, const std::string& alphabet, char pad_symbol, double *conf, int start_index);
+#include "openvino/openvino.hpp"
+
+#include "cnn.hpp"
+
+std::string CTCGreedyDecoder(const std::vector<float>& data, const std::string& alphabet, char pad_symbol, double* conf);
+std::string CTCBeamSearchDecoder(const std::vector<float>& data, const std::string& alphabet, char pad_symbol, double* conf, int bandwidth);
+std::string SimpleDecoder(const std::vector<float>& data, const std::string& alphabet, char pad_symbol, double* conf, int start_index);
+
+class TextRecognizer : public Cnn {
+public:
+    TextRecognizer(
+        const std::string& model_path, const std::string& model_type, const std::string& deviceName,
+        ov::Core& core,
+        const std::string& out_enc_hidden_name,
+        const std::string& out_dec_hidden_name,
+        const std::string& in_dec_hidden_name,
+        const std::string& features_name,
+        const std::string& in_dec_symbol_name,
+        const std::string& out_dec_symbol_name,
+        const std::string& logits_name,
+        size_t end_token);
+
+    std::map<std::string, ov::runtime::Tensor> Infer(const cv::Mat& frame) override;
+
+    const cv::Size& input_size() const { return m_input_size; }
+
+private:
+    void check_model_names(
+        const ov::OutputVector& input_info_decoder, const ov::OutputVector& output_info_decoder) const;
+
+    bool m_isCompositeModel;
+    ov::InferRequest m_infer_request_decoder;
+    std::string m_features_name;
+    std::string m_out_enc_hidden_name;
+    std::string m_out_dec_hidden_name;
+    std::string m_in_dec_hidden_name;
+    std::string m_in_dec_symbol_name;
+    std::string m_out_dec_symbol_name;
+    std::string m_logits_name;
+    size_t m_end_token;
+};