[FIX] Fixed exisitng comments to account for mulit-method model

benITo47 · benITo47 · commit 4e3b7443a72f · 2026-01-14T13:38:04.000+01:00
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp
@@ -10,10 +10,10 @@ Detector::Detector(const std::string &modelSource,
     : BaseModel(modelSource, callInvoker) {}
 
 std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage,
-                                                    const int inputWidth) {
+                                                    int32_t inputWidth) {
   /*
    Detector as an input accepts tensor with a shape of [1, 3, H, H].
-   where H is a constant for model. In our supported models it is currently
+   where H is a constant for model. In our supported model it is currently
    either H=800 or H=1280.
    Due to big influence of resize to quality of recognition the image preserves
    original aspect ratio and the missing parts are filled with padding.
@@ -23,7 +23,7 @@ std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage,
 
   auto inputShapes = getAllInputShapes(methodName);
   if (inputShapes.empty()) {
-    throw std::runtime_error("Detector model has no input shape for method: " +
+    throw std::runtime_error("Detector model: invalid method name " +
                              methodName);
   }
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <cstdint>
 #include <executorch/extension/tensor/tensor_ptr.h>
 #include <opencv2/opencv.hpp>
 #include <rnexecutorch/models/BaseModel.h>
@@ -21,7 +22,7 @@ class Detector final : public BaseModel {
   explicit Detector(const std::string &modelSource,
                     std::shared_ptr<react::CallInvoker> callInvoker);
   std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
-                                            const int inputWidth);
+                                            int32_t inputWidth);
 
 private:
   std::vector<types::DetectorBBox>
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
@@ -17,8 +17,9 @@ namespace models::ocr {
  2. Recognition - recognizing the text in the bounding boxes, the result is a
  list of strings and corresponding boxes & confidence scores.
 
- Recognition uses three models, each model is resposible for recognizing text
- of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
+ Recognition uses one model with three methods, each method is resposible for
+ recognizing text of different sizes (e.g. large - 512x64, medium - 256x64,
+ small - 128x64).
 */
 
 class OCR final {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp
@@ -13,7 +13,7 @@ Recognizer::Recognizer(const std::string &modelSource,
     : BaseModel(modelSource, callInvoker) {}
 
 std::pair<std::vector<int32_t>, float>
-Recognizer::generate(const cv::Mat &grayImage, int inputWidth) {
+Recognizer::generate(const cv::Mat &grayImage, int32_t inputWidth) {
   /*
    In our pipeline we use three types of Recognizer, each designated to
    handle different image sizes:
@@ -26,8 +26,8 @@ Recognizer::generate(const cv::Mat &grayImage, int inputWidth) {
   std::string method_name = "forward_" + std::to_string(inputWidth);
   auto shapes = getAllInputShapes(method_name);
   if (shapes.empty()) {
-    throw std::runtime_error(
-        "Recognizer model has no input tensors for method " + method_name);
+    throw std::runtime_error("Recognizer model: invalid method name " +
+                             method_name);
   }
   std::vector<int32_t> tensorDims = shapes[0];
   TensorPtr inputTensor =
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h
@@ -26,7 +26,7 @@ class Recognizer final : public BaseModel {
   explicit Recognizer(const std::string &modelSource,
                       std::shared_ptr<react::CallInvoker> callInvoker);
   std::pair<std::vector<int32_t>, float> generate(const cv::Mat &grayImage,
-                                                  int inputWidth);
+                                                  int32_t inputWidth);
 
 private:
   std::pair<std::vector<int32_t>, float>
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp
@@ -9,20 +9,19 @@
 
 namespace rnexecutorch::models::ocr {
 VerticalDetector::VerticalDetector(
-    const std::string &modelSource, bool detectSingleCharacters,
+    const std::string &modelSource,
     std::shared_ptr<react::CallInvoker> callInvoker)
     : BaseModel(modelSource, callInvoker) {
-  this->detectSingleCharacters = detectSingleCharacters;
-  modelSmallImageSize =
+  this->modelSmallImageSize =
       calculateImageSizeForWidth(constants::kSmallDetectorWidth);
-  modelMediumImageSize =
+  this->modelMediumImageSize =
       calculateImageSizeForWidth(constants::kMediumDetectorWidth);
-  modelLargeImageSize =
+  this->modelLargeImageSize =
       calculateImageSizeForWidth(constants::kLargeDetectorWidth);
 }
 
 std::vector<types::DetectorBBox>
-VerticalDetector::generate(const cv::Mat &inputImage, const int inputWidth,
+VerticalDetector::generate(const cv::Mat &inputImage, int32_t inputWidth,
                            bool detectSingleCharacters) {
 
   std::string methodName = "forward_" + std::to_string(inputWidth);
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h
@@ -1,8 +1,8 @@
 #pragma once
 
+#include <cstdint>
 #include <executorch/extension/tensor/tensor_ptr.h>
 #include <opencv2/opencv.hpp>
-
 #include <rnexecutorch/models/BaseModel.h>
 #include <rnexecutorch/models/ocr/Types.h>
 
@@ -15,16 +15,17 @@ namespace rnexecutorch::models::ocr {
 
   In Vertical OCR pipeline we make use of Detector two times:
 
-  1. Large Detector -- The differences between Detector used in standard OCR and
- Large Detector used in Vertical OCR is: a) To obtain detected boxes from heeat
- maps it utilizes `getDetBoxesFromTextMapVertical()` function rather than
+  1. Large Detector through forward_1280 method -- The differences between
+ Detector used in standard OCR and Large Detector used in Vertical OCR is: a) To
+ obtain detected boxes from heeat maps it utilizes
+ `getDetBoxesFromTextMapVertical()` function rather than
  'getDetBoxesFromTextMap()`. Other than that, refer to the standard OCR
  Detector.
 
-  2. Narrow Detector -- it is designed to detect a single characters bounding
- boxes. `getDetBoxesFromTextMapVertical()` function acts differently for Narrow
- Detector and different textThreshold Value is passed. Additionally, the
- grouping of detected boxes is completely omited.
+  2. Narrow Detector throguh forward_320 method -- it is designed to detect a
+ single characters bounding boxes. `getDetBoxesFromTextMapVertical()` function
+ acts differently for Narrow Detector and different textThreshold Value is
+ passed. Additionally, the grouping of detected boxes is completely omited.
 
   Vertical Detector pipeline differentiate the Large Detector and Narrow
  Detector based on `detectSingleCharacters` flag passed to the constructor.
@@ -36,16 +37,14 @@ using executorch::extension::TensorPtr;
 class VerticalDetector final : public BaseModel {
 public:
   explicit VerticalDetector(const std::string &modelSource,
-                            bool detectSingleCharacters,
                             std::shared_ptr<react::CallInvoker> callInvoker);
   std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
-                                            const int inputWidth,
+                                            int32_t inputWidth,
                                             bool detectSingleCharacters);
 
   cv::Size getModelImageSize(int inputWidth) const noexcept;
 
 private:
-  bool detectSingleCharacters;
   std::vector<types::DetectorBBox>
   postprocess(const Tensor &tensor, const cv::Size &modelInputSize,
               bool detectSingleCharacters) const;
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
@@ -11,9 +11,9 @@ VerticalOCR::VerticalOCR(const std::string &detectorSource,
                          const std::string &recognizerSource,
                          std::string symbols, bool independentChars,
                          std::shared_ptr<react::CallInvoker> invoker)
-    : detector(detectorSource, true, invoker),
-      recognizer(recognizerSource, invoker), converter(symbols),
-      independentCharacters(independentChars), callInvoker(invoker) {}
+    : detector(detectorSource, invoker), recognizer(recognizerSource, invoker),
+      converter(symbols), independentCharacters(independentChars),
+      callInvoker(invoker) {}
 
 std::vector<types::OCRDetection> VerticalOCR::generate(std::string input) {
   cv::Mat image = image_processing::readImage(input);
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
@@ -21,22 +21,20 @@ namespace models::ocr {
 /*
   Vertical OCR is OCR designed to handle vertical texts.
   Vertical OCR pipeline consists of:
-  1. Large Detector -- detects regions where text is located.
+  1. Detector using forward_1280 method-- detects regions where text is located.
      Almost identical to the Detector in standard OCR.
      The result of this phase is a list of bounding boxes.
   Each detected box is then processed individually through the following steps:
-    2. Narrow Detector -- designed for detecting where single characters
-       are located.
-    There are two different strategies used for vertical recognition:
-      Strategy 1 "Independent Characters":
-        Treating each character region found  by Narrow Detector
-        as compeletely independent.
-        3. Each character is forwarded to Small Recognizer (64 x 64).
+    2. Detector using forward_320 method -- designed for detecting where single
+  characters are located. There are two different strategies used for vertical
+  recognition: Strategy 1 "Independent Characters": Treating each character
+  region found  by Narrow Detector as compeletely independent.
+        3. Each character is forwarded to Recognizer with input size 64 x 64.
       Strategy 2 "Joint Characters":
         The bounding boxes found by Narrow Detector are
         horizontally merged to create one wide image.
-        3. One wide image is forwarded to Large Recognzer (512 x 64).
-    Vertical OCR differentiate between those two strategies based on
+        3. One wide image is forwarded to  Recognzer with input width of 512
+  x 64. Vertical OCR differentiate between those two strategies based on
     `independentChars` flag passed to the constructor.
 */