Skip to content

Commit 4e3b744

Browse files
committed
[FIX] Fixed exisitng comments to account for mulit-method model
1 parent bdd64bd commit 4e3b744

File tree

9 files changed

+38
-40
lines changed

9 files changed

+38
-40
lines changed

packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ Detector::Detector(const std::string &modelSource,
1010
: BaseModel(modelSource, callInvoker) {}
1111

1212
std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage,
13-
const int inputWidth) {
13+
int32_t inputWidth) {
1414
/*
1515
Detector as an input accepts tensor with a shape of [1, 3, H, H].
16-
where H is a constant for model. In our supported models it is currently
16+
where H is a constant for model. In our supported model it is currently
1717
either H=800 or H=1280.
1818
Due to big influence of resize to quality of recognition the image preserves
1919
original aspect ratio and the missing parts are filled with padding.
@@ -23,7 +23,7 @@ std::vector<types::DetectorBBox> Detector::generate(const cv::Mat &inputImage,
2323

2424
auto inputShapes = getAllInputShapes(methodName);
2525
if (inputShapes.empty()) {
26-
throw std::runtime_error("Detector model has no input shape for method: " +
26+
throw std::runtime_error("Detector model: invalid method name " +
2727
methodName);
2828
}
2929

packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <cstdint>
34
#include <executorch/extension/tensor/tensor_ptr.h>
45
#include <opencv2/opencv.hpp>
56
#include <rnexecutorch/models/BaseModel.h>
@@ -21,7 +22,7 @@ class Detector final : public BaseModel {
2122
explicit Detector(const std::string &modelSource,
2223
std::shared_ptr<react::CallInvoker> callInvoker);
2324
std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
24-
const int inputWidth);
25+
int32_t inputWidth);
2526

2627
private:
2728
std::vector<types::DetectorBBox>

packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ namespace models::ocr {
1717
2. Recognition - recognizing the text in the bounding boxes, the result is a
1818
list of strings and corresponding boxes & confidence scores.
1919
20-
Recognition uses three models, each model is resposible for recognizing text
21-
of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
20+
Recognition uses one model with three methods, each method is resposible for
21+
recognizing text of different sizes (e.g. large - 512x64, medium - 256x64,
22+
small - 128x64).
2223
*/
2324

2425
class OCR final {

packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Recognizer::Recognizer(const std::string &modelSource,
1313
: BaseModel(modelSource, callInvoker) {}
1414

1515
std::pair<std::vector<int32_t>, float>
16-
Recognizer::generate(const cv::Mat &grayImage, int inputWidth) {
16+
Recognizer::generate(const cv::Mat &grayImage, int32_t inputWidth) {
1717
/*
1818
In our pipeline we use three types of Recognizer, each designated to
1919
handle different image sizes:
@@ -26,8 +26,8 @@ Recognizer::generate(const cv::Mat &grayImage, int inputWidth) {
2626
std::string method_name = "forward_" + std::to_string(inputWidth);
2727
auto shapes = getAllInputShapes(method_name);
2828
if (shapes.empty()) {
29-
throw std::runtime_error(
30-
"Recognizer model has no input tensors for method " + method_name);
29+
throw std::runtime_error("Recognizer model: invalid method name " +
30+
method_name);
3131
}
3232
std::vector<int32_t> tensorDims = shapes[0];
3333
TensorPtr inputTensor =

packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class Recognizer final : public BaseModel {
2626
explicit Recognizer(const std::string &modelSource,
2727
std::shared_ptr<react::CallInvoker> callInvoker);
2828
std::pair<std::vector<int32_t>, float> generate(const cv::Mat &grayImage,
29-
int inputWidth);
29+
int32_t inputWidth);
3030

3131
private:
3232
std::pair<std::vector<int32_t>, float>

packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,19 @@
99

1010
namespace rnexecutorch::models::ocr {
1111
VerticalDetector::VerticalDetector(
12-
const std::string &modelSource, bool detectSingleCharacters,
12+
const std::string &modelSource,
1313
std::shared_ptr<react::CallInvoker> callInvoker)
1414
: BaseModel(modelSource, callInvoker) {
15-
this->detectSingleCharacters = detectSingleCharacters;
16-
modelSmallImageSize =
15+
this->modelSmallImageSize =
1716
calculateImageSizeForWidth(constants::kSmallDetectorWidth);
18-
modelMediumImageSize =
17+
this->modelMediumImageSize =
1918
calculateImageSizeForWidth(constants::kMediumDetectorWidth);
20-
modelLargeImageSize =
19+
this->modelLargeImageSize =
2120
calculateImageSizeForWidth(constants::kLargeDetectorWidth);
2221
}
2322

2423
std::vector<types::DetectorBBox>
25-
VerticalDetector::generate(const cv::Mat &inputImage, const int inputWidth,
24+
VerticalDetector::generate(const cv::Mat &inputImage, int32_t inputWidth,
2625
bool detectSingleCharacters) {
2726

2827
std::string methodName = "forward_" + std::to_string(inputWidth);

packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#pragma once
22

3+
#include <cstdint>
34
#include <executorch/extension/tensor/tensor_ptr.h>
45
#include <opencv2/opencv.hpp>
5-
66
#include <rnexecutorch/models/BaseModel.h>
77
#include <rnexecutorch/models/ocr/Types.h>
88

@@ -15,16 +15,17 @@ namespace rnexecutorch::models::ocr {
1515
1616
In Vertical OCR pipeline we make use of Detector two times:
1717
18-
1. Large Detector -- The differences between Detector used in standard OCR and
19-
Large Detector used in Vertical OCR is: a) To obtain detected boxes from heeat
20-
maps it utilizes `getDetBoxesFromTextMapVertical()` function rather than
18+
1. Large Detector through forward_1280 method -- The differences between
19+
Detector used in standard OCR and Large Detector used in Vertical OCR is: a) To
20+
obtain detected boxes from heeat maps it utilizes
21+
`getDetBoxesFromTextMapVertical()` function rather than
2122
'getDetBoxesFromTextMap()`. Other than that, refer to the standard OCR
2223
Detector.
2324
24-
2. Narrow Detector -- it is designed to detect a single characters bounding
25-
boxes. `getDetBoxesFromTextMapVertical()` function acts differently for Narrow
26-
Detector and different textThreshold Value is passed. Additionally, the
27-
grouping of detected boxes is completely omited.
25+
2. Narrow Detector throguh forward_320 method -- it is designed to detect a
26+
single characters bounding boxes. `getDetBoxesFromTextMapVertical()` function
27+
acts differently for Narrow Detector and different textThreshold Value is
28+
passed. Additionally, the grouping of detected boxes is completely omited.
2829
2930
Vertical Detector pipeline differentiate the Large Detector and Narrow
3031
Detector based on `detectSingleCharacters` flag passed to the constructor.
@@ -36,16 +37,14 @@ using executorch::extension::TensorPtr;
3637
class VerticalDetector final : public BaseModel {
3738
public:
3839
explicit VerticalDetector(const std::string &modelSource,
39-
bool detectSingleCharacters,
4040
std::shared_ptr<react::CallInvoker> callInvoker);
4141
std::vector<types::DetectorBBox> generate(const cv::Mat &inputImage,
42-
const int inputWidth,
42+
int32_t inputWidth,
4343
bool detectSingleCharacters);
4444

4545
cv::Size getModelImageSize(int inputWidth) const noexcept;
4646

4747
private:
48-
bool detectSingleCharacters;
4948
std::vector<types::DetectorBBox>
5049
postprocess(const Tensor &tensor, const cv::Size &modelInputSize,
5150
bool detectSingleCharacters) const;

packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ VerticalOCR::VerticalOCR(const std::string &detectorSource,
1111
const std::string &recognizerSource,
1212
std::string symbols, bool independentChars,
1313
std::shared_ptr<react::CallInvoker> invoker)
14-
: detector(detectorSource, true, invoker),
15-
recognizer(recognizerSource, invoker), converter(symbols),
16-
independentCharacters(independentChars), callInvoker(invoker) {}
14+
: detector(detectorSource, invoker), recognizer(recognizerSource, invoker),
15+
converter(symbols), independentCharacters(independentChars),
16+
callInvoker(invoker) {}
1717

1818
std::vector<types::OCRDetection> VerticalOCR::generate(std::string input) {
1919
cv::Mat image = image_processing::readImage(input);

packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,20 @@ namespace models::ocr {
2121
/*
2222
Vertical OCR is OCR designed to handle vertical texts.
2323
Vertical OCR pipeline consists of:
24-
1. Large Detector -- detects regions where text is located.
24+
1. Detector using forward_1280 method-- detects regions where text is located.
2525
Almost identical to the Detector in standard OCR.
2626
The result of this phase is a list of bounding boxes.
2727
Each detected box is then processed individually through the following steps:
28-
2. Narrow Detector -- designed for detecting where single characters
29-
are located.
30-
There are two different strategies used for vertical recognition:
31-
Strategy 1 "Independent Characters":
32-
Treating each character region found by Narrow Detector
33-
as compeletely independent.
34-
3. Each character is forwarded to Small Recognizer (64 x 64).
28+
2. Detector using forward_320 method -- designed for detecting where single
29+
characters are located. There are two different strategies used for vertical
30+
recognition: Strategy 1 "Independent Characters": Treating each character
31+
region found by Narrow Detector as compeletely independent.
32+
3. Each character is forwarded to Recognizer with input size 64 x 64.
3533
Strategy 2 "Joint Characters":
3634
The bounding boxes found by Narrow Detector are
3735
horizontally merged to create one wide image.
38-
3. One wide image is forwarded to Large Recognzer (512 x 64).
39-
Vertical OCR differentiate between those two strategies based on
36+
3. One wide image is forwarded to Recognzer with input width of 512
37+
x 64. Vertical OCR differentiate between those two strategies based on
4038
`independentChars` flag passed to the constructor.
4139
*/
4240

0 commit comments

Comments
 (0)