[REFACTOR] VerticalDetector inherits from Detector

benITo47 · benITo47 · commit cbf7e36a6cd3 · 2026-01-15T17:28:00.000+01:00
- Changed error messages in Detector classes
	- Added input width check in generate functions
	- Reverted 'export' keyword from modelUrls.ts
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h
@@ -1,7 +1,9 @@
 #pragma once
 
+#include <array>
 #include <cstdint>
 #include <opencv2/opencv.hpp>
+#include <vector>
 
 namespace rnexecutorch::models::ocr::constants {
 
@@ -30,6 +32,11 @@ inline constexpr int32_t kVerticalLineThreshold = 20;
 inline constexpr int32_t kSmallDetectorWidth = 320;
 inline constexpr int32_t kMediumDetectorWidth = 800;
 inline constexpr int32_t kLargeDetectorWidth = 1280;
+inline constexpr std::array<int32_t, 3> kDetectorInputWidths = {
+    kSmallDetectorWidth, kMediumDetectorWidth, kLargeDetectorWidth};
+inline constexpr std::array<int32_t, 4> kRecognizerInputWidths = {
+    kSmallVerticalRecognizerWidth, kSmallRecognizerWidth,
+    kMediumRecognizerWidth, kLargeRecognizerWidth};
 
 /*
  Mean and variance values for image normalization were used in EASYOCR pipeline
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp
@@ -1,13 +1,28 @@
 #include "Detector.h"
+#include "Constants.h"
+#include <cstdint>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/models/ocr/Constants.h>
 #include <rnexecutorch/models/ocr/utils/DetectorUtils.h>
+#include <stdexcept>
 #include <string>
-
 namespace rnexecutorch::models::ocr {
 Detector::Detector(const std::string &modelSource,
                    std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {}
+    : BaseModel(modelSource, callInvoker) {
+
+  for (auto input_size : constants::kDetectorInputWidths) {
+    std::string methodName = "forward_" + std::to_string(input_size);
+    auto inputShapes = getAllInputShapes(methodName);
+    if (inputShapes[0].size() < 2) {
+      throw std::runtime_error(
+          "Unexpected detector model input size for method:" + methodName +
+          ", expected "
+          "at least 2 dimensions but got: " +
+          std::to_string(inputShapes[0].size()) + ".");
+    }
+  }
+}
 
 std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage,
                                                     int32_t inputWidth) {
@@ -19,43 +34,46 @@ std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage,
    original aspect ratio and the missing parts are filled with padding.
    */
 
-  std::string methodName = "forward_" + std::to_string(inputWidth);
+  utils::validateInputWidth(inputWidth, constants::kDetectorInputWidths,
+                            "Detector");
 
+  std::string methodName = "forward_" + std::to_string(inputWidth);
   auto inputShapes = getAllInputShapes(methodName);
-  if (inputShapes.empty()) {
-    throw std::runtime_error("Detector model: invalid method name " +
-                             methodName);
-  }
-
-  std::vector<int32_t> modelInputShape = inputShapes[0];
 
-  if (modelInputShape.size() < 2) {
-    throw std::runtime_error("Detector model: invalid method name: " +
-                             methodName);
-  }
-
-  cv::Size modelInputSize =
-      cv::Size(modelInputShape[modelInputShape.size() - 1],
-               modelInputShape[modelInputShape.size() - 2]);
+  cv::Size modelInputSize = calculateModelImageSize(inputWidth);
 
   cv::Mat resizedInputImage =
       image_processing::resizePadded(inputImage, modelInputSize);
   TensorPtr inputTensor = image_processing::getTensorFromMatrix(
       inputShapes[0], resizedInputImage, constants::kNormalizationMean,
       constants::kNormalizationVariance);
   auto forwardResult = BaseModel::execute(methodName, {inputTensor});
+
   if (!forwardResult.ok()) {
     throw std::runtime_error(
-        "Failed to forward, error: " +
+        "Failed to " + methodName + " error: " +
         std::to_string(static_cast<uint32_t>(forwardResult.error())));
   }
 
   return postprocess(forwardResult->at(0).toTensor(), modelInputSize);
 }
 
+cv::Size Detector::calculateModelImageSize(int32_t methodInputWidth) {
+
+  utils::validateInputWidth(methodInputWidth, constants::kDetectorInputWidths,
+                            "Detector");
+  std::string methodName = "forward_" + std::to_string(methodInputWidth);
+
+  auto inputShapes = getAllInputShapes(methodName);
+  std::vector<int32_t> modelInputShape = inputShapes[0];
+  cv::Size modelInputSize =
+      cv::Size(modelInputShape[modelInputShape.size() - 1],
+               modelInputShape[modelInputShape.size() - 2]);
+  return modelInputSize;
+}
+
 std::vector<types::DetectorBBox>
-Detector::postprocess(const Tensor &tensor,
-                      const cv::Size &modelInputSize) const {
+Detector::postprocess(const Tensor &tensor, const cv::Size &modelInputSize) {
   /*
    The output of the model consists of two matrices (heat maps):
    1. ScoreText(Score map) - The probability of a region containing character.
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h
@@ -17,15 +17,20 @@ namespace rnexecutorch::models::ocr {
 using executorch::aten::Tensor;
 using executorch::extension::TensorPtr;
 
-class Detector final : public BaseModel {
+class Detector : public BaseModel {
 public:
   explicit Detector(const std::string &modelSource,
                     std::shared_ptr<react::CallInvoker> callInvoker);
-  std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
-                                            int32_t inputWidth);
+  virtual std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
+                                                    int32_t inputWidth);
 
-private:
-  std::vector<types::DetectorBBox>
-  postprocess(const Tensor &tensor, const cv::Size &modelInputSize) const;
+  cv::Size calculateModelImageSize(int32_t methodInputWidth);
+
+protected:
+  TensorPtr runInference(const cv::Mat &inputImage, int32_t inputWidth,
+                         const std::string &detectorName);
+
+  std::vector<types::DetectorBBox> postprocess(const Tensor &tensor,
+                                               const cv::Size &modelInputSize);
 };
 } // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
@@ -5,7 +5,8 @@
 
 namespace rnexecutorch::models::ocr {
 OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource,
-         std::string symbols, std::shared_ptr<react::CallInvoker> callInvoker)
+         const std::string &symbols,
+         std::shared_ptr<react::CallInvoker> callInvoker)
     : detector(detectorSource, callInvoker),
       recognitionHandler(recognizerSource, symbols, callInvoker) {}
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
@@ -25,7 +25,7 @@ namespace models::ocr {
 class OCR final {
 public:
   explicit OCR(const std::string &detectorSource,
-               const std::string &recognizerSource, std::string symbols,
+               const std::string &recognizerSource, const std::string &symbols,
                std::shared_ptr<react::CallInvoker> callInvoker);
   std::vector<types::OCRDetection> generate(std::string input);
   std::size_t getMemoryLowerBound() const noexcept;
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp
@@ -5,7 +5,7 @@
 
 namespace rnexecutorch::models::ocr {
 RecognitionHandler::RecognitionHandler(
-    const std::string &recognizerSource, std::string symbols,
+    const std::string &recognizerSource, const std::string &symbols,
     std::shared_ptr<react::CallInvoker> callInvoker)
     : converter(symbols), recognizer(recognizerSource, callInvoker) {
   memorySizeLowerBound = recognizer.getMemoryLowerBound();
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h
@@ -18,7 +18,7 @@ namespace rnexecutorch::models::ocr {
 class RecognitionHandler final {
 public:
   explicit RecognitionHandler(const std::string &recognizer,
-                              std::string symbols,
+                              const std::string &symbols,
                               std::shared_ptr<react::CallInvoker> callInvoker);
   std::vector<types::OCRDetection>
   recognize(std::vector<types::DetectorBBox> bboxesList, cv::Mat &imgGray,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp
@@ -1,9 +1,11 @@
 #include "Recognizer.h"
+#include "Constants.h"
 #include <numeric>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/data_processing/Numerical.h>
 #include <rnexecutorch/models/ocr/Constants.h>
 #include <rnexecutorch/models/ocr/Types.h>
+#include <rnexecutorch/models/ocr/utils/DetectorUtils.h>
 #include <rnexecutorch/models/ocr/utils/RecognizerUtils.h>
 #include <string>
 
@@ -23,11 +25,14 @@ Recognizer::generate(const cv::Mat &grayImage, int32_t inputWidth) {
    The `generate` function as an argument accepts an image in grayscale
    already resized to the expected size.
   */
+  utils::validateInputWidth(inputWidth, constants::kRecognizerInputWidths,
+                            "Recognizer");
+
   std::string method_name = "forward_" + std::to_string(inputWidth);
   auto shapes = getAllInputShapes(method_name);
   if (shapes.empty()) {
-    throw std::runtime_error("Recognizer model: invalid method name " +
-                             method_name);
+    throw std::runtime_error("Recognizer model: Input shapes for  " +
+                             method_name " not found");
   }
   std::vector<int32_t> tensorDims = shapes[0];
   TensorPtr inputTensor =
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp
@@ -707,4 +707,21 @@ groupTextBoxes(std::vector<types::DetectorBBox> &boxes, float centerThreshold,
   return orderedSortedBoxes;
 }
 
+void validateInputWidth(int32_t inputWidth, std::span<const int32_t> constants,
+                        std::string modelName) {
+  auto it = std::ranges::find(constants, inputWidth);
+
+  if (it == constants.end()) {
+    std::string allowed;
+    for (size_t i = 0; i < constants.size(); ++i) {
+      allowed +=
+          std::to_string(constants[i]) + (i < constants.size() - 1 ? ", " : "");
+    }
+
+    throw std::runtime_error("Unexpected input width for " + modelName +
+                             "! Expected [" + allowed + "] but got " +
+                             std::to_string(inputWidth) + ".");
+  }
+}
+
 } // namespace rnexecutorch::models::ocr::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h
@@ -78,4 +78,19 @@ groupTextBoxes(std::vector<types::DetectorBBox> &boxes, float centerThreshold,
                float distanceThreshold, float heightThreshold,
                int32_t minSideThreshold, int32_t maxSideThreshold,
                int32_t maxWidth);
+
+/**
+ * Validates if the provided image width is supported by the model.
+ * * This method checks the input width against the passed allowed
+ * widths in constants vector. If the width is not found, it
+ * constructs a descriptive error message listing all valid options.
+ *
+ * @param inputWidth The width of the input image to be validated.
+ * @param constants Vector of available input sizes.
+ * @param modelName String with modelNames used for generating error message
+ * @throws std::runtime_error If inputWidth is not present in the allowed
+ * detector input widths array.
+ */
+void validateInputWidth(int32_t inputWidth, std::span<const int32_t> constants,
+                        std::string modelName);
 } // namespace rnexecutorch::models::ocr::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp
@@ -11,36 +11,37 @@ namespace rnexecutorch::models::ocr {
 VerticalDetector::VerticalDetector(
     const std::string &modelSource,
     std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
-  this->modelSmallImageSize =
-      calculateImageSizeForWidth(constants::kSmallDetectorWidth);
-  this->modelMediumImageSize =
-      calculateImageSizeForWidth(constants::kMediumDetectorWidth);
-  this->modelLargeImageSize =
-      calculateImageSizeForWidth(constants::kLargeDetectorWidth);
-}
+    : Detector(modelSource, callInvoker) {}
 
 std::vector<types::DetectorBBox>
-VerticalDetector::generate(const cv::Mat &inputImage, int32_t inputWidth,
-                           bool detectSingleCharacters) {
+VerticalDetector::generate(const cv::Mat &inputImage, int32_t inputWidth) {
 
-  std::string methodName = "forward_" + std::to_string(inputWidth);
+  bool detectSingleCharacters =
+      !(inputWidth >= constants::kMediumDetectorWidth);
+
+  utils::validateInputWidth(inputWidth, constants::kDetectorInputWidths,
+                            "VerticalDetector");
 
+  std::string methodName = "forward_" + std::to_string(inputWidth);
   auto inputShapes = getAllInputShapes(methodName);
 
+  cv::Size modelInputSize = calculateModelImageSize(inputWidth);
+
   cv::Mat resizedInputImage =
-      image_processing::resizePadded(inputImage, getModelImageSize(inputWidth));
+      image_processing::resizePadded(inputImage, modelInputSize);
   TensorPtr inputTensor = image_processing::getTensorFromMatrix(
       inputShapes[0], resizedInputImage, constants::kNormalizationMean,
       constants::kNormalizationVariance);
   auto forwardResult = BaseModel::execute(methodName, {inputTensor});
+
   if (!forwardResult.ok()) {
     throw std::runtime_error(
-        "Failed to forward, error: " +
+        "Failed to " + methodName + " error: " +
         std::to_string(static_cast<uint32_t>(forwardResult.error())));
   }
   return postprocess(forwardResult->at(0).toTensor(),
-                     getModelImageSize(inputWidth), detectSingleCharacters);
+                     calculateModelImageSize(inputWidth),
+                     detectSingleCharacters);
 }
 
 std::vector<types::DetectorBBox>
@@ -87,45 +88,4 @@ VerticalDetector::postprocess(const Tensor &tensor,
   return bBoxesList;
 }
 
-cv::Size
-VerticalDetector::calculateImageSizeForWidth(const int methoInputWidth) {
-
-  std::string methodName = "forward_" + std::to_string(methoInputWidth);
-
-  auto inputShapes = getAllInputShapes(methodName);
-
-  if (inputShapes.empty()) {
-    throw std::runtime_error("Detector model has no input shape for method: " +
-                             methodName);
-  }
-  std::vector<int32_t> modelInputShape = inputShapes[0];
-
-  if (modelInputShape.size() < 2) {
-    throw std::runtime_error("Unexpected detector model input size, expected "
-                             "at least 2 dimensions but got: " +
-                             std::to_string(modelInputShape.size()) + ".");
-  }
-
-  cv::Size modelInputSize =
-      cv::Size(modelInputShape[modelInputShape.size() - 1],
-               modelInputShape[modelInputShape.size() - 2]);
-  return modelInputSize;
-}
-
-cv::Size VerticalDetector::getModelImageSize(int inputWidth) const noexcept {
-  switch (inputWidth) {
-  case constants::kSmallDetectorWidth:
-    return modelSmallImageSize;
-    break;
-  case constants::kMediumDetectorWidth:
-    return modelMediumImageSize;
-    break;
-  case constants::kLargeDetectorWidth:
-    return modelLargeImageSize;
-    break;
-  default:
-    return modelMediumImageSize;
-  }
-}
-
 } // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h
@@ -4,6 +4,7 @@
 #include <executorch/extension/tensor/tensor_ptr.h>
 #include <opencv2/opencv.hpp>
 #include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/ocr/Detector.h>
 #include <rnexecutorch/models/ocr/Types.h>
 
 namespace rnexecutorch::models::ocr {
@@ -34,24 +35,17 @@ namespace rnexecutorch::models::ocr {
 using executorch::aten::Tensor;
 using executorch::extension::TensorPtr;
 
-class VerticalDetector final : public BaseModel {
+class VerticalDetector final : public Detector {
 public:
   explicit VerticalDetector(const std::string &modelSource,
                             std::shared_ptr<react::CallInvoker> callInvoker);
-  std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
-                                            int32_t inputWidth,
-                                            bool detectSingleCharacters);
 
-  cv::Size getModelImageSize(int inputWidth) const noexcept;
+  std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
+                                            int32_t inputWidth) override;
 
 private:
   std::vector<types::DetectorBBox>
   postprocess(const Tensor &tensor, const cv::Size &modelInputSize,
               bool detectSingleCharacters) const;
-  cv::Size calculateImageSizeForWidth(const int methoInputWidth);
-
-  cv::Size modelSmallImageSize;
-  cv::Size modelMediumImageSize;
-  cv::Size modelLargeImageSize;
 };
 } // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
diff --git a/packages/react-native-executorch/src/constants/ocr/models.ts b/packages/react-native-executorch/src/constants/ocr/models.ts
diff --git a/packages/react-native-executorch/src/constants/versions.ts b/packages/react-native-executorch/src/constants/versions.ts