diff --git a/apps/computer-vision/app/ocr/index.tsx b/apps/computer-vision/app/ocr/index.tsx
index 1fb282791..546e50a91 100644
--- a/apps/computer-vision/app/ocr/index.tsx
+++ b/apps/computer-vision/app/ocr/index.tsx
@@ -16,7 +16,9 @@ export default function OCRScreen() {
height: number;
}>();
- const model = useOCR({ model: OCR_ENGLISH });
+ const model = useOCR({
+ model: OCR_ENGLISH,
+ });
const { setGlobalGenerating } = useContext(GeneratingContext);
useEffect(() => {
setGlobalGenerating(model.isGenerating);
diff --git a/docs/docs/02-hooks/02-computer-vision/useOCR.md b/docs/docs/02-hooks/02-computer-vision/useOCR.md
index d07efd601..4813b5c1f 100644
--- a/docs/docs/02-hooks/02-computer-vision/useOCR.md
+++ b/docs/docs/02-hooks/02-computer-vision/useOCR.md
@@ -30,12 +30,6 @@ function App() {
Type definitions
```typescript
-interface RecognizerSources {
- recognizerLarge: string | number;
- recognizerMedium: string | number;
- recognizerSmall: string | number;
-}
-
type OCRLanguage =
| 'abq'
| 'ady'
@@ -121,9 +115,7 @@ interface OCRDetection {
**`model`** - Object containing the detector source, recognizer sources, and language.
- **`detectorSource`** - A string that specifies the location of the detector binary.
-- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels.
-- **`recognizerMedium`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 256 pixels.
-- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 128 pixels.
+- **`recognizerSource`** - A string that specifies the location of the recognizer binary.
- **`language`** - A parameter that specifies the language of the text to be recognized by the OCR.
**`preventLoad?`** - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
@@ -186,23 +178,18 @@ function App() {
}
```
-## Language-Specific Recognizers
+## Alphabet-Specific Recognizers
-Each supported language requires its own set of recognizer models.
-The built-in constants such as `RECOGNIZER_EN_CRNN_512`, `RECOGNIZER_PL_CRNN_256`, etc., point to specific models trained for a particular language.
+Each supported alphabet requires its own recognizer model. The built-in constants, such as `RECOGNIZER_LATIN_CRNN` or `RECOGNIZER_CYRILLIC_CRNN`, point to specific models trained for a particular alphabet.
> For example:
>
-> - To recognize **English** text, use:
-> - `RECOGNIZER_EN_CRNN_512`
-> - `RECOGNIZER_EN_CRNN_256`
-> - `RECOGNIZER_EN_CRNN_128`
-> - To recognize **Polish** text, use:
-> - `RECOGNIZER_PL_CRNN_512`
-> - `RECOGNIZER_PL_CRNN_256`
-> - `RECOGNIZER_PL_CRNN_128`
+> - To recognize text in languages using the **Latin** alphabet (like Polish, or German), use:
+> - `RECOGNIZER_LATIN_CRNN`
+> - To recognize text in languages using the **Cyrillic** alphabet (like Russian or Ukrainian), use:
+> - `RECOGNIZER_CYRILLIC_CRNN`
-You need to make sure the recognizer models you pass in `recognizerSources` match the `language` you specify.
+You need to make sure the recognizer model you pass in `recognizerSource` matches the alphabet of the `language` you specify.
## Supported languages
@@ -275,33 +262,27 @@ You need to make sure the recognizer models you pass in `recognizerSources` matc
## Supported models
-| Model | Type |
-| ------------------------------------------------------- | :--------: |
-| [CRAFT_800\*](https://github.com/clovaai/CRAFT-pytorch) | Detector |
-| [CRNN_512\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
-| [CRNN_256\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
-| [CRNN_128\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
-
-\* - The number following the underscore (\_) indicates the input image width used during model export.
+| Model | Type |
+| ------------------------------------------------- | :--------: |
+| [CRAFT](https://github.com/clovaai/CRAFT-pytorch) | Detector |
+| [CRNN](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
## Benchmarks
### Model size
-| Model | XNNPACK [MB] |
-| ------------------------------ | :----------: |
-| Detector (CRAFT_800_QUANTIZED) | 19.8 |
-| Recognizer (CRNN_512) | 15 - 18\* |
-| Recognizer (CRNN_256) | 16 - 18\* |
-| Recognizer (CRNN_128) | 17 - 19\* |
+| Model | XNNPACK [MB] |
+| -------------------------- | :-----------: |
+| Detector (CRAFT_QUANTIZED) | 20.9 |
+| Recognizer (CRNN) | 18.5 - 25.2\* |
\* - The model weights vary depending on the language.
### Memory usage
-| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
-| ------------------------------------------------------------------------------------------------------ | :--------------------: | :----------------: |
-| Detector (CRAFT_800_QUANTIZED) + Recognizer (CRNN_512) + Recognizer (CRNN_256) + Recognizer (CRNN_128) | 1400 | 1320 |
+| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
+| ------------------------------------ | :--------------------: | :----------------: |
+| Detector (CRAFT) + Recognizer (CRNN) | 1400 | 1320 |
### Inference time
@@ -317,16 +298,13 @@ Times presented in the tables are measured as consecutive runs of the model. Ini
**Time measurements:**
-| Metric | iPhone 17 Pro
[ms] | iPhone 16 Pro
[ms] | iPhone SE 3 | Samsung Galaxy S24
[ms] | OnePlus 12
[ms] |
-| ---------------------------------- | ------------------------- | ------------------------- | ----------- | ------------------------------ | ---------------------- |
-| **Total Inference Time** | 652 | 600 | 2855 | 1092 | 1034 |
-| **Detector (CRAFT_800_QUANTIZED)** | 220 | 221 | 1740 | 521 | 492 |
-| **Recognizer (CRNN_512)** | | | | | |
-| ├─ Average Time | 45 | 38 | 110 | 40 | 38 |
-| ├─ Total Time (3 runs) | 135 | 114 | 330 | 120 | 114 |
-| **Recognizer (CRNN_256)** | | | | | |
-| ├─ Average Time | 21 | 18 | 54 | 20 | 19 |
-| ├─ Total Time (7 runs) | 147 | 126 | 378 | 140 | 133 |
-| **Recognizer (CRNN_128)** | | | | | |
-| ├─ Average Time | 11 | 9 | 27 | 10 | 10 |
-| ├─ Total Time (7 runs) | 77 | 63 | 189 | 70 | 70 |
+Notice that the recognizer models were executed between 3 and 7 times during a single recognition.
+The values below represent the averages across all runs for the benchmark image.
+
+| Model | iPhone 17 Pro [ms] | iPhone 16 Pro [ms] | iPhone SE 3 | Samsung Galaxy S24 [ms] | OnePlus 12 [ms] |
+| ------------------------------- | ------------------ | ------------------ | ----------- | ----------------------- | --------------- |
+| **Total Inference Time** | 652 | 600 | 2855 | 1092 | 1034 |
+| Detector (CRAFT) `forward_800` | 220 | 221 | 1740 | 521 | 492 |
+| Recognizer (CRNN) `forward_512` | 45 | 38 | 110 | 40 | 38 |
+| Recognizer (CRNN) `forward_256` | 21 | 18 | 54 | 20 | 19 |
+| Recognizer (CRNN) `forward_128` | 11 | 9 | 27 | 10 | 10 |
diff --git a/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md b/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md
index f317d527e..f4840be37 100644
--- a/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md
+++ b/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md
@@ -129,12 +129,10 @@ interface OCRDetection {
### Arguments
-**`model`** - Object containing the detector sources, recognizer sources, and language.
+**`model`** - Object containing the detector source, recognizer source, and language.
-- **`detectorLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 1280 pixels.
-- **`detectorNarrow`** - A string that specifies the location of the detector binary file which accepts input images with a width of 320 pixels.
-- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels.
-- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 64 pixels.
+- **`detectorSource`** - A string that specifies the location of the detector binary.
+- **`recognizerSource`** - A string that specifies the location of the recognizer binary.
- **`language`** - A parameter that specifies the language of the text to be recognized by the OCR.
**`independentCharacters`** – A boolean parameter that indicates whether the text in the image consists of a random sequence of characters. If set to true, the algorithm will scan each character individually instead of reading them as continuous text.
@@ -202,21 +200,18 @@ function App() {
}
```
-## Language-Specific Recognizers
+## Alphabet-Specific Recognizers
-Each supported language requires its own set of recognizer models.
-The built-in constants such as `RECOGNIZER_EN_CRNN_512`, `RECOGNIZER_PL_CRNN_64`, etc., point to specific models trained for a particular language.
+Each supported alphabet requires its own recognizer model. The built-in constants, such as `RECOGNIZER_LATIN_CRNN` or `RECOGNIZER_CYRILLIC_CRNN`, point to specific models trained for a particular alphabet.
> For example:
>
-> - To recognize **English** text, use:
-> - `RECOGNIZER_EN_CRNN_512`
-> - `RECOGNIZER_EN_CRNN_64`
-> - To recognize **Polish** text, use:
-> - `RECOGNIZER_PL_CRNN_512`
-> - `RECOGNIZER_PL_CRNN_64`
+> - To recognize text in languages using the **Latin** alphabet (like Polish, or German), use:
+> - `RECOGNIZER_LATIN_CRNN`
+> - To recognize text in languages using the **Cyrillic** alphabet (like Russian or Ukrainian), use:
+> - `RECOGNIZER_CYRILLIC_CRNN`
-You need to make sure the recognizer models you pass in `recognizerSources` match the `language` you specify.
+You need to make sure the recognizer model you pass in `recognizerSource` matches the alphabet of the `language` you specify.
## Supported languages
@@ -289,14 +284,10 @@ You need to make sure the recognizer models you pass in `recognizerSources` matc
## Supported models
-| Model | Type |
-| -------------------------------------------------------- | ---------- |
-| [CRAFT_1280\*](https://github.com/clovaai/CRAFT-pytorch) | Detector |
-| [CRAFT_320\*](https://github.com/clovaai/CRAFT-pytorch) | Detector |
-| [CRNN_512\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
-| [CRNN_64\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
-
-\* - The number following the underscore (\_) indicates the input image width used during model export.
+| Model | Type |
+| ------------------------------------------------- | :--------: |
+| [CRAFT](https://github.com/clovaai/CRAFT-pytorch) | Detector |
+| [CRNN](https://www.jaided.ai/easyocr/modelhub/) | Recognizer |
## Benchmarks
@@ -313,10 +304,9 @@ You need to make sure the recognizer models you pass in `recognizerSources` matc
### Memory usage
-| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
-| -------------------------------------------------------------------- | :--------------------: | :----------------: |
-| Detector (CRAFT_1280) + Detector (CRAFT_320) + Recognizer (CRNN_512) | 1540 | 1470 |
-| Detector(CRAFT_1280) + Detector(CRAFT_320) + Recognizer (CRNN_64) | 1070 | 1000 |
+| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
+| ------------------------------------ | :--------------------: | :----------------: |
+| Detector (CRAFT) + Recognizer (CRNN) | 1000-1600 | 1000-1500 |
### Inference time
@@ -332,16 +322,13 @@ Times presented in the tables are measured as consecutive runs of the model. Ini
**Time measurements:**
-| Metric | iPhone 17 Pro
[ms] | iPhone 16 Pro
[ms] | iPhone SE 3 | Samsung Galaxy S24
[ms] | OnePlus 12
[ms] |
-| -------------------------------------------------------------------------- | ------------------------- | ------------------------- | ----------- | ------------------------------ | ---------------------- |
-| **Total Inference Time** | 1104 | 1113 | 8840 | 2845 | 2640 |
-| **Detector (CRAFT_1280_QUANTIZED)** | 501 | 507 | 4317 | 1405 | 1275 |
-| **Detector (CRAFT_320_QUANTIZED)** | | | | | |
-| ├─ Average Time | 125 | 121 | 1060 | 338 | 299 |
-| ├─ Total Time (4 runs) | 500 | 484 | 4240 | 1352 | 1196 |
-| **Recognizer (CRNN_64)**
(_With Flag `independentChars == true`_) | | | | | |
-| ├─ Average Time | 5 | 6 | 14 | 7 | 6 |
-| ├─ Total Time (21 runs) | 105 | 126 | 294 | 147 | 126 |
-| **Recognizer (CRNN_512)**
(_With Flag `independentChars == false`_) | | | | | |
-| ├─ Average Time | 46 | 42 | 109 | 47 | 37 |
-| ├─ Total Time (4 runs) | 184 | 168 | 436 | 188 | 148 |
+Notice that the recognizer models, as well as detector's `forward_320` method, were executed between 4 and 21 times during a single recognition.
+The values below represent the averages across all runs for the benchmark image.
+
+| Model | iPhone 17 Pro [ms] | iPhone 16 Pro [ms] | iPhone SE 3 | Samsung Galaxy S24 [ms] | OnePlus 12 [ms] |
+| ------------------------------- | ------------------ | ------------------ | ----------- | ----------------------- | --------------- |
+| **Total Inference Time** | 1104 | 1113 | 8840 | 2845 | 2640 |
+| Detector (CRAFT) `forward_1280` | 501 | 507 | 4317 | 1405 | 1275 |
+| Detector (CRAFT) `forward_320` | 125 | 121 | 1060 | 338 | 299 |
+| Recognizer (CRNN) `forward_512` | 46 | 42 | 109 | 47 | 37 |
+| Recognizer (CRNN) `forward_64` | 5 | 6 | 14 | 7 | 6 |
diff --git a/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md b/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md
index c46e65970..f8a42fdf2 100644
--- a/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md
+++ b/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md
@@ -22,11 +22,11 @@ const detections = await ocrModule.forward(imageUri);
### Methods
-| Method | Type | Description |
-| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `load` | `(model: { detectorSource: ResourceSource; recognizerLarge: ResourceSource; recognizerMedium: ResourceSource; recognizerSmall: ResourceSource; language: OCRLanguage }, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorSource` is a string that specifies the location of the detector binary, `recognizerLarge` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels, `recognizerMedium` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 256 pixels, `recognizerSmall` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 128 pixels, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. |
-| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. |
-| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. |
+| Method | Type | Description |
+| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `load` | `(model: { detectorSource: ResourceSource; recognizerSource: ResourceSource; language: OCRLanguage }, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorSource` is a string that specifies the location of the detector binary, `recognizerSource` is a string that specifies the location of the recognizer binary, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. |
+| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. |
+| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. |
Type definitions
@@ -116,12 +116,10 @@ interface OCRDetection {
To load the model, use the `load` method. It accepts an object:
-**`model`** - Object containing the detector source, recognizer sources, and language.
+**`model`** - Object containing the detector source, recognizer source, and language.
- **`detectorSource`** - A string that specifies the location of the detector binary.
-- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels.
-- **`recognizerMedium`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 256 pixels.
-- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 128 pixels.
+- **`recognizerSource`** - A string that specifies the location of the recognizer binary.
- **`language`** - A parameter that specifies the language of the text to be recognized by the OCR.
**`onDownloadProgressCallback`** - (Optional) Function called on download progress.
diff --git a/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md b/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md
index ecbe5d9d9..c60a95178 100644
--- a/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md
+++ b/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md
@@ -26,11 +26,11 @@ const detections = await verticalOCRModule.forward(imageUri);
### Methods
-| Method | Type | Description |
-| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `load` | `(model: { detectorLarge: ResourceSource; detectorNarrow: ResourceSource; recognizerLarge: ResourceSource; recognizerSmall: ResourceSource; language: OCRLanguage }, independentCharacters: boolean, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorLarge` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 1280 pixels, `detectorNarrow` is a string that specifies the location of the detector binary file which accepts input images with a width of 320 pixels, `recognizerLarge` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels, `recognizerSmall` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 64 pixels, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. |
-| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. |
-| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. |
+| Method | Type | Description |
+| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `load` | `(model: { detectorSource: ResourceSource; recognizerSource: ResourceSource; language: OCRLanguage }, independentCharacters: boolean, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorSource` is a string that specifies the location of the detector binary, `recognizerSource` is a string that specifies the location of the recognizer binary, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. |
+| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. |
+| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. |
Type definitions
@@ -130,12 +130,10 @@ interface OCRDetection {
To load the model, use the `load` method. It accepts:
-**`model`** - Object containing the detector sources, recognizer sources, and language.
+**`model`** - Object containing the detector source, recognizer source, and language.
-- **`detectorLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 1280 pixels.
-- **`detectorNarrow`** - A string that specifies the location of the detector binary file which accepts input images with a width of 320 pixels.
-- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels.
-- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 64 pixels.
+- **`detectorSource`** - A string that specifies the location of the detector binary.
+- **`recognizerSource`** - A string that specifies the location of the recognizer binary.
- **`language`** - A parameter that specifies the language of the text to be recognized by the OCR.
**`independentCharacters`** – A boolean parameter that indicates whether the text in the image consists of a random sequence of characters. If set to true, the algorithm will scan each character individually instead of reading them as continuous text.
diff --git a/docs/docs/04-benchmarks/inference-time.md b/docs/docs/04-benchmarks/inference-time.md
index dbfc2b21d..7777b301b 100644
--- a/docs/docs/04-benchmarks/inference-time.md
+++ b/docs/docs/04-benchmarks/inference-time.md
@@ -32,24 +32,26 @@ Times presented in the tables are measured as consecutive runs of the model. Ini
Notice that the recognizer models were executed between 3 and 7 times during a single recognition.
The values below represent the averages across all runs for the benchmark image.
-| Model | iPhone 17 Pro (XNNPACK) [ms] | iPhone 16 Pro (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| ------------------------------ | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: |
-| Detector (CRAFT_800_QUANTIZED) | 220 | 221 | 1740 | 521 | 492 |
-| Recognizer (CRNN_512) | 45 | 38 | 110 | 40 | 38 |
-| Recognizer (CRNN_256) | 21 | 18 | 54 | 20 | 19 |
-| Recognizer (CRNN_128) | 11 | 9 | 27 | 10 | 10 |
+| Model | iPhone 17 Pro [ms] | iPhone 16 Pro [ms] | iPhone SE 3 | Samsung Galaxy S24 [ms] | OnePlus 12 [ms] |
+| ------------------------------- | ------------------ | ------------------ | ----------- | ----------------------- | --------------- |
+| **Total Inference Time** | 652 | 600 | 2855 | 1092 | 1034 |
+| Detector (CRAFT) `forward_800` | 220 | 221 | 1740 | 521 | 492 |
+| Recognizer (CRNN) `forward_512` | 45 | 38 | 110 | 40 | 38 |
+| Recognizer (CRNN) `forward_256` | 21 | 18 | 54 | 20 | 19 |
+| Recognizer (CRNN) `forward_128` | 11 | 9 | 27 | 10 | 10 |
## Vertical OCR
-Notice that the recognizer models, as well as detector CRAFT_320 model, were executed between 4 and 21 times during a single recognition.
+Notice that the recognizer models, as well as detector's `forward_320` method, were executed between 4 and 21 times during a single recognition.
The values below represent the averages across all runs for the benchmark image.
-| Model | iPhone 17 Pro (XNNPACK) [ms] | iPhone 16 Pro (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| ------------------------------- | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: |
-| Detector (CRAFT_1280_QUANTIZED) | 501 | 507 | 4317 | 1405 | 1275 |
-| Detector (CRAFT_320_QUANTIZED) | 125 | 121 | 1060 | 338 | 299 |
-| Recognizer (CRNN_512) | 46 | 42 | 109 | 47 | 37 |
-| Recognizer (CRNN_64) | 5 | 6 | 14 | 7 | 6 |
+| Model | iPhone 17 Pro
[ms] | iPhone 16 Pro
[ms] | iPhone SE 3 | Samsung Galaxy S24
[ms] | OnePlus 12
[ms] |
+| ------------------------------- | ------------------------- | ------------------------- | ----------- | ------------------------------ | ---------------------- |
+| **Total Inference Time** | 1104 | 1113 | 8840 | 2845 | 2640 |
+| Detector (CRAFT) `forward_1280` | 501 | 507 | 4317 | 1405 | 1275 |
+| Detector (CRAFT) `forward_320` | 125 | 121 | 1060 | 338 | 299 |
+| Recognizer (CRNN) `forward_512` | 46 | 42 | 109 | 47 | 37 |
+| Recognizer (CRNN) `forward_64` | 5 | 6 | 14 | 7 | 6 |
## LLMs
diff --git a/docs/docs/04-benchmarks/memory-usage.md b/docs/docs/04-benchmarks/memory-usage.md
index a0c5a7b6d..3058b5725 100644
--- a/docs/docs/04-benchmarks/memory-usage.md
+++ b/docs/docs/04-benchmarks/memory-usage.md
@@ -29,16 +29,15 @@ All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12 (A
## OCR
-| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
-| ------------------------------------------------------------------------------------------------------ | :--------------------: | :----------------: |
-| Detector (CRAFT_800_QUANTIZED) + Recognizer (CRNN_512) + Recognizer (CRNN_256) + Recognizer (CRNN_128) | 1400 | 1320 |
+| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
+| ------------------------------------ | :--------------------: | :----------------: |
+| Detector (CRAFT) + Recognizer (CRNN) | 1400 | 1320 |
## Vertical OCR
-| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
-| ---------------------------------------------------------------------------------------- | :--------------------: | :----------------: |
-| Detector (CRAFT_1280_QUANTIZED) + Detector (CRAFT_320_QUANTIZED) + Recognizer (CRNN_512) | 1540 | 1470 |
-| Detector(CRAFT_1280_QUANTIZED) + Detector(CRAFT_320_QUANTIZED) + Recognizer (CRNN_64) | 1070 | 1000 |
+| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
+| ------------------------------------ | :--------------------: | :----------------: |
+| Detector (CRAFT) + Recognizer (CRNN) | 1000-1600 | 1000-1500 |
## LLMs
diff --git a/docs/docs/04-benchmarks/model-size.md b/docs/docs/04-benchmarks/model-size.md
index 00e819494..54f33e2cb 100644
--- a/docs/docs/04-benchmarks/model-size.md
+++ b/docs/docs/04-benchmarks/model-size.md
@@ -25,23 +25,19 @@ title: Model Size
## OCR
-| Model | XNNPACK [MB] |
-| ------------------------------ | :----------: |
-| Detector (CRAFT_800_QUANTIZED) | 19.8 |
-| Recognizer (CRNN_512) | 15 - 18\* |
-| Recognizer (CRNN_256) | 16 - 18\* |
-| Recognizer (CRNN_128) | 17 - 19\* |
+| Model | XNNPACK [MB] |
+| -------------------------- | :-----------: |
+| Detector (CRAFT_QUANTIZED) | 20.9 |
+| Recognizer (CRNN) | 18.5 - 25.2\* |
\* - The model weights vary depending on the language.
## Vertical OCR
-| Model | XNNPACK [MB] |
-| ------------------------------- | :----------: |
-| Detector (CRAFT_1280_QUANTIZED) | 19.8 |
-| Detector (CRAFT_320_QUANTIZED) | 19.8 |
-| Recognizer (CRNN_EN_512) | 15 - 18\* |
-| Recognizer (CRNN_EN_64) | 15 - 16\* |
+| Model | XNNPACK [MB] |
+| -------------------------- | :-----------: |
+| Detector (CRAFT_QUANTIZED) | 20.9 |
+| Recognizer (CRNN) | 18.5 - 25.2\* |
\* - The model weights vary depending on the language.
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h
index 0cc063379..9b96f1761 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h
@@ -1,7 +1,9 @@
#pragma once
+#include
#include
#include
+#include
namespace rnexecutorch::models::ocr::constants {
@@ -27,6 +29,14 @@ inline constexpr int32_t kMaxWidth =
inline constexpr int32_t kSingleCharacterMinSize = 70;
inline constexpr int32_t kRecognizerImageSize = 1280;
inline constexpr int32_t kVerticalLineThreshold = 20;
+inline constexpr int32_t kSmallDetectorWidth = 320;
+inline constexpr int32_t kMediumDetectorWidth = 800;
+inline constexpr int32_t kLargeDetectorWidth = 1280;
+inline constexpr std::array kDetectorInputWidths = {
+ kSmallDetectorWidth, kMediumDetectorWidth, kLargeDetectorWidth};
+inline constexpr std::array kRecognizerInputWidths = {
+ kSmallVerticalRecognizerWidth, kSmallRecognizerWidth,
+ kMediumRecognizerWidth, kLargeRecognizerWidth};
/*
Mean and variance values for image normalization were used in EASYOCR pipeline
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp
index 2b8c46945..cb8baee5f 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp
@@ -1,55 +1,79 @@
#include "Detector.h"
+#include "Constants.h"
+#include
#include
#include
#include
-
+#include
+#include
namespace rnexecutorch::models::ocr {
Detector::Detector(const std::string &modelSource,
std::shared_ptr callInvoker)
: BaseModel(modelSource, callInvoker) {
- auto inputShapes = getAllInputShapes();
- if (inputShapes.empty()) {
- throw std::runtime_error(
- "Detector model seems to not take any input tensors.");
- }
- std::vector modelInputShape = inputShapes[0];
- if (modelInputShape.size() < 2) {
- throw std::runtime_error("Unexpected detector model input size, expected "
- "at least 2 dimensions but got: " +
- std::to_string(modelInputShape.size()) + ".");
+
+ for (auto input_size : constants::kDetectorInputWidths) {
+ std::string methodName = "forward_" + std::to_string(input_size);
+ auto inputShapes = getAllInputShapes(methodName);
+ if (inputShapes[0].size() < 2) {
+ throw std::runtime_error(
+ "Unexpected detector model input size for method:" + methodName +
+ ", expected "
+ "at least 2 dimensions but got: " +
+ std::to_string(inputShapes[0].size()) + ".");
+ }
}
- modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
- modelInputShape[modelInputShape.size() - 2]);
}
-cv::Size Detector::getModelImageSize() const noexcept { return modelImageSize; }
-
-std::vector Detector::generate(const cv::Mat &inputImage) {
+std::vector Detector::generate(const cv::Mat &inputImage,
+ int32_t inputWidth) {
/*
Detector as an input accepts tensor with a shape of [1, 3, H, H].
- where H is a constant for model. In our supported models it is currently
+ where H is a constant for model. In our supported model it is currently
either H=800 or H=1280.
Due to big influence of resize to quality of recognition the image preserves
original aspect ratio and the missing parts are filled with padding.
*/
- auto inputShapes = getAllInputShapes();
+
+ utils::validateInputWidth(inputWidth, constants::kDetectorInputWidths,
+ "Detector");
+
+ std::string methodName = "forward_" + std::to_string(inputWidth);
+ auto inputShapes = getAllInputShapes(methodName);
+
+ cv::Size modelInputSize = calculateModelImageSize(inputWidth);
+
cv::Mat resizedInputImage =
- image_processing::resizePadded(inputImage, getModelImageSize());
+ image_processing::resizePadded(inputImage, modelInputSize);
TensorPtr inputTensor = image_processing::getTensorFromMatrix(
inputShapes[0], resizedInputImage, constants::kNormalizationMean,
constants::kNormalizationVariance);
- auto forwardResult = BaseModel::forward(inputTensor);
+ auto forwardResult = BaseModel::execute(methodName, {inputTensor});
+
if (!forwardResult.ok()) {
throw std::runtime_error(
- "Failed to forward, error: " +
+ "Failed to " + methodName + " error: " +
std::to_string(static_cast(forwardResult.error())));
}
- return postprocess(forwardResult->at(0).toTensor());
+ return postprocess(forwardResult->at(0).toTensor(), modelInputSize);
+}
+
+cv::Size Detector::calculateModelImageSize(int32_t methodInputWidth) {
+
+ utils::validateInputWidth(methodInputWidth, constants::kDetectorInputWidths,
+ "Detector");
+ std::string methodName = "forward_" + std::to_string(methodInputWidth);
+
+ auto inputShapes = getAllInputShapes(methodName);
+ std::vector modelInputShape = inputShapes[0];
+ cv::Size modelInputSize =
+ cv::Size(modelInputShape[modelInputShape.size() - 1],
+ modelInputShape[modelInputShape.size() - 2]);
+ return modelInputSize;
}
std::vector
-Detector::postprocess(const Tensor &tensor) const {
+Detector::postprocess(const Tensor &tensor, const cv::Size &modelInputSize) {
/*
The output of the model consists of two matrices (heat maps):
1. ScoreText(Score map) - The probability of a region containing character.
@@ -65,7 +89,7 @@ Detector::postprocess(const Tensor &tensor) const {
*/
auto [scoreTextMat, scoreAffinityMat] = utils::interleavedArrayToMats(
tensorData,
- cv::Size(modelImageSize.width / 2, modelImageSize.height / 2));
+ cv::Size(modelInputSize.width / 2, modelInputSize.height / 2));
/*
Heatmaps are then converted into list of bounding boxes.
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h
index f9fb2a39b..2a3c98ddb 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h
@@ -1,5 +1,6 @@
#pragma once
+#include
#include
#include
#include
@@ -16,15 +17,20 @@ namespace rnexecutorch::models::ocr {
using executorch::aten::Tensor;
using executorch::extension::TensorPtr;
-class Detector final : public BaseModel {
+class Detector : public BaseModel {
public:
explicit Detector(const std::string &modelSource,
std::shared_ptr callInvoker);
- std::vector generate(const cv::Mat &inputImage);
- cv::Size getModelImageSize() const noexcept;
+ virtual std::vector generate(const cv::Mat &inputImage,
+ int32_t inputWidth);
-private:
- std::vector postprocess(const Tensor &tensor) const;
- cv::Size modelImageSize;
+ cv::Size calculateModelImageSize(int32_t methodInputWidth);
+
+protected:
+ TensorPtr runInference(const cv::Mat &inputImage, int32_t inputWidth,
+ const std::string &detectorName);
+
+ std::vector postprocess(const Tensor &tensor,
+ const cv::Size &modelInputSize);
};
} // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
index 4045886d3..166e4de8b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
@@ -1,16 +1,14 @@
#include "OCR.h"
+#include "Constants.h"
#include
#include
namespace rnexecutorch::models::ocr {
-OCR::OCR(const std::string &detectorSource,
- const std::string &recognizerSourceLarge,
- const std::string &recognizerSourceMedium,
- const std::string &recognizerSourceSmall, std::string symbols,
+OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource,
+ const std::string &symbols,
std::shared_ptr callInvoker)
: detector(detectorSource, callInvoker),
- recognitionHandler(recognizerSourceLarge, recognizerSourceMedium,
- recognizerSourceSmall, symbols, callInvoker) {}
+ recognitionHandler(recognizerSource, symbols, callInvoker) {}
std::vector OCR::generate(std::string input) {
cv::Mat image = image_processing::readImage(input);
@@ -23,7 +21,8 @@ std::vector OCR::generate(std::string input) {
with text. They are corresponding to the image of size 1280x1280, which
is a size later used by Recognition Handler.
*/
- std::vector bboxesList = detector.generate(image);
+ std::vector bboxesList =
+ detector.generate(image, constants::kMediumDetectorWidth);
cv::cvtColor(image, image, cv::COLOR_BGR2GRAY);
/*
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
index c7c7e61c4..8b283a2c0 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
@@ -17,16 +17,15 @@ namespace models::ocr {
2. Recognition - recognizing the text in the bounding boxes, the result is a
list of strings and corresponding boxes & confidence scores.
- Recognition uses three models, each model is resposible for recognizing text
- of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64).
+ Recognition uses one model with three methods, each method is resposible for
+ recognizing text of different sizes (e.g. large - 512x64, medium - 256x64,
+ small - 128x64).
*/
class OCR final {
public:
explicit OCR(const std::string &detectorSource,
- const std::string &recognizerSourceLarge,
- const std::string &recognizerSourceMedium,
- const std::string &recognizerSourceSmall, std::string symbols,
+ const std::string &recognizerSource, const std::string &symbols,
std::shared_ptr callInvoker);
std::vector generate(std::string input);
std::size_t getMemoryLowerBound() const noexcept;
@@ -39,6 +38,5 @@ class OCR final {
} // namespace models::ocr
REGISTER_CONSTRUCTOR(models::ocr::OCR, std::string, std::string, std::string,
- std::string, std::string,
std::shared_ptr);
} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp
index f0427accf..dfde73765 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp
@@ -5,29 +5,18 @@
namespace rnexecutorch::models::ocr {
RecognitionHandler::RecognitionHandler(
- const std::string &recognizerSourceLarge,
- const std::string &recognizerSourceMedium,
- const std::string &recognizerSourceSmall, std::string symbols,
+ const std::string &recognizerSource, const std::string &symbols,
std::shared_ptr callInvoker)
- : converter(symbols), recognizerLarge(recognizerSourceLarge, callInvoker),
- recognizerMedium(recognizerSourceMedium, callInvoker),
- recognizerSmall(recognizerSourceSmall, callInvoker) {
- memorySizeLowerBound = recognizerSmall.getMemoryLowerBound() +
- recognizerMedium.getMemoryLowerBound() +
- recognizerLarge.getMemoryLowerBound();
+ : converter(symbols), recognizer(recognizerSource, callInvoker) {
+ memorySizeLowerBound = recognizer.getMemoryLowerBound();
}
std::pair, float>
RecognitionHandler::runModel(cv::Mat image) {
// Note that the height of an image is always equal to 64.
- if (image.cols >= constants::kLargeRecognizerWidth) {
- return recognizerLarge.generate(image);
- }
- if (image.cols >= constants::kMediumRecognizerWidth) {
- return recognizerMedium.generate(image);
- }
- return recognizerSmall.generate(image);
+ int32_t desiredWidth = utils::getDesiredWidth(image, false);
+ return recognizer.generate(image, desiredWidth);
}
void RecognitionHandler::processBBox(std::vector &boxList,
@@ -100,9 +89,5 @@ std::size_t RecognitionHandler::getMemoryLowerBound() const noexcept {
return memorySizeLowerBound;
}
-void RecognitionHandler::unload() noexcept {
- recognizerSmall.unload();
- recognizerMedium.unload();
- recognizerLarge.unload();
-}
+void RecognitionHandler::unload() noexcept { recognizer.unload(); }
} // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h
index d585b74bc..abdfe5ba9 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h
@@ -17,10 +17,8 @@ namespace rnexecutorch::models::ocr {
class RecognitionHandler final {
public:
- explicit RecognitionHandler(const std::string &recognizerSourceLarge,
- const std::string &recognizerSourceMedium,
- const std::string &recognizerSourceSmall,
- std::string symbols,
+ explicit RecognitionHandler(const std::string &recognizer,
+ const std::string &symbols,
std::shared_ptr callInvoker);
std::vector
recognize(std::vector bboxesList, cv::Mat &imgGray,
@@ -35,8 +33,6 @@ class RecognitionHandler final {
types::PaddingInfo ratioAndPadding);
std::size_t memorySizeLowerBound{0};
CTCLabelConverter converter;
- Recognizer recognizerLarge;
- Recognizer recognizerMedium;
- Recognizer recognizerSmall;
+ Recognizer recognizer;
};
} // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp
index e076dabed..237c5154e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp
@@ -1,29 +1,21 @@
#include "Recognizer.h"
+#include "Constants.h"
#include
#include
#include
#include
#include
+#include
#include
+#include
namespace rnexecutorch::models::ocr {
Recognizer::Recognizer(const std::string &modelSource,
std::shared_ptr callInvoker)
- : BaseModel(modelSource, callInvoker) {
- auto inputShapes = getAllInputShapes();
- if (inputShapes.empty()) {
- throw std::runtime_error("Recognizer model has no input tensors.");
- }
- std::vector modelInputShape = inputShapes[0];
- if (modelInputShape.size() < 2) {
- throw std::runtime_error("Unexpected Recognizer model input shape.");
- }
- modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
- modelInputShape[modelInputShape.size() - 2]);
-}
+ : BaseModel(modelSource, callInvoker) {}
std::pair, float>
-Recognizer::generate(const cv::Mat &grayImage) {
+Recognizer::generate(const cv::Mat &grayImage, int32_t inputWidth) {
/*
In our pipeline we use three types of Recognizer, each designated to
handle different image sizes:
@@ -33,10 +25,19 @@ Recognizer::generate(const cv::Mat &grayImage) {
The `generate` function as an argument accepts an image in grayscale
already resized to the expected size.
*/
- std::vector tensorDims = getAllInputShapes()[0];
+ utils::validateInputWidth(inputWidth, constants::kRecognizerInputWidths,
+ "Recognizer");
+
+ std::string method_name = "forward_" + std::to_string(inputWidth);
+ auto shapes = getAllInputShapes(method_name);
+ if (shapes.empty()) {
+ throw std::runtime_error("Recognizer model: Input shapes for " +
+ method_name " not found");
+ }
+ std::vector tensorDims = shapes[0];
TensorPtr inputTensor =
image_processing::getTensorFromMatrixGray(tensorDims, grayImage);
- auto forwardResult = BaseModel::forward(inputTensor);
+ auto forwardResult = BaseModel::execute(method_name, {inputTensor});
if (!forwardResult.ok()) {
throw std::runtime_error(
"Failed to forward in Recognizer, error: " +
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h
index 50eafe968..337e21483 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h
@@ -25,12 +25,11 @@ class Recognizer final : public BaseModel {
public:
explicit Recognizer(const std::string &modelSource,
std::shared_ptr callInvoker);
- std::pair, float> generate(const cv::Mat &grayImage);
+ std::pair, float> generate(const cv::Mat &grayImage,
+ int32_t inputWidth);
private:
std::pair, float>
postprocess(const Tensor &tensor) const;
-
- cv::Size modelImageSize;
};
} // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp
index 25fa160c2..7614e97a1 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp
@@ -707,4 +707,21 @@ groupTextBoxes(std::vector &boxes, float centerThreshold,
return orderedSortedBoxes;
}
+void validateInputWidth(int32_t inputWidth, std::span constants,
+ std::string modelName) {
+ auto it = std::ranges::find(constants, inputWidth);
+
+ if (it == constants.end()) {
+ std::string allowed;
+ for (size_t i = 0; i < constants.size(); ++i) {
+ allowed +=
+ std::to_string(constants[i]) + (i < constants.size() - 1 ? ", " : "");
+ }
+
+ throw std::runtime_error("Unexpected input width for " + modelName +
+ "! Expected [" + allowed + "] but got " +
+ std::to_string(inputWidth) + ".");
+ }
+}
+
} // namespace rnexecutorch::models::ocr::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h
index ca0c2676d..0b742a4ce 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h
@@ -78,4 +78,19 @@ groupTextBoxes(std::vector &boxes, float centerThreshold,
float distanceThreshold, float heightThreshold,
int32_t minSideThreshold, int32_t maxSideThreshold,
int32_t maxWidth);
+
+/**
+ * Validates if the provided image width is supported by the model.
+ * * This method checks the input width against the passed allowed
+ * widths in constants vector. If the width is not found, it
+ * constructs a descriptive error message listing all valid options.
+ *
+ * @param inputWidth The width of the input image to be validated.
+ * @param constants Vector of available input sizes.
+ * @param modelName String with modelNames used for generating error message
+ * @throws std::runtime_error If inputWidth is not present in the allowed
+ * detector input widths array.
+ */
+void validateInputWidth(int32_t inputWidth, std::span constants,
+ std::string modelName);
} // namespace rnexecutorch::models::ocr::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h
index 7e2cfe5bf..e2dea2f7f 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h
@@ -43,6 +43,24 @@ void computeRatioAndResize(cv::Mat &img, cv::Size size, int32_t modelHeight);
*/
cv::Mat cropImage(types::DetectorBBox box, cv::Mat &image, int32_t modelHeight);
void adjustContrastGrey(cv::Mat &img, double target);
+
+/**
+ * @brief Determines the optimal width for an image based on its aspect ratio
+ * and orientation, to fit the requirements of the recognition model.
+ *
+ * This function calculates a `desiredWidth` that, when combined with a fixed
+ * `modelHeight` (from `normalizeForRecognizer`), maintains the image's aspect
+ * ratio and prepares it for input into the recognizer model. It considers
+ * whether the text in the image is `isVertical`, which might influence the
+ * chosen width for better recognition performance.
+ *
+ * @param img The input image matrix.
+ * @param isVertical A boolean indicating if the text in the image is oriented
+ * vertically.
+ * @return The calculated desired width for the image.
+ */
+int32_t getDesiredWidth(const cv::Mat &img, bool isVertical);
+
/**
* @brief Prepares an image for recognition models by standardizing size,
* contrast, and pixel values.
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp
index 3d89f10a1..a0faf43ee 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp
@@ -5,51 +5,49 @@
#include
#include
+#include
namespace rnexecutorch::models::ocr {
VerticalDetector::VerticalDetector(
- const std::string &modelSource, bool detectSingleCharacters,
+ const std::string &modelSource,
std::shared_ptr callInvoker)
- : BaseModel(modelSource, callInvoker) {
- this->detectSingleCharacters = detectSingleCharacters;
- auto inputShapes = getAllInputShapes();
- if (inputShapes.empty()) {
- throw std::runtime_error(
- "Detector model seems to not take any input tensors.");
- }
- std::vector modelInputShape = inputShapes[0];
- if (modelInputShape.size() < 2) {
- throw std::runtime_error("Unexpected detector model input size, expected "
- "at least 2 dimensions but got: " +
- std::to_string(modelInputShape.size()) + ".");
- }
- modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
- modelInputShape[modelInputShape.size() - 2]);
-}
-
-cv::Size VerticalDetector::getModelImageSize() const noexcept {
- return modelImageSize;
-}
+ : Detector(modelSource, callInvoker) {}
std::vector
-VerticalDetector::generate(const cv::Mat &inputImage) {
- auto inputShapes = getAllInputShapes();
+VerticalDetector::generate(const cv::Mat &inputImage, int32_t inputWidth) {
+
+ bool detectSingleCharacters =
+ !(inputWidth >= constants::kMediumDetectorWidth);
+
+ utils::validateInputWidth(inputWidth, constants::kDetectorInputWidths,
+ "VerticalDetector");
+
+ std::string methodName = "forward_" + std::to_string(inputWidth);
+ auto inputShapes = getAllInputShapes(methodName);
+
+ cv::Size modelInputSize = calculateModelImageSize(inputWidth);
+
cv::Mat resizedInputImage =
- image_processing::resizePadded(inputImage, getModelImageSize());
+ image_processing::resizePadded(inputImage, modelInputSize);
TensorPtr inputTensor = image_processing::getTensorFromMatrix(
inputShapes[0], resizedInputImage, constants::kNormalizationMean,
constants::kNormalizationVariance);
- auto forwardResult = BaseModel::forward(inputTensor);
+ auto forwardResult = BaseModel::execute(methodName, {inputTensor});
+
if (!forwardResult.ok()) {
throw std::runtime_error(
- "Failed to forward, error: " +
+ "Failed to " + methodName + " error: " +
std::to_string(static_cast(forwardResult.error())));
}
- return postprocess(forwardResult->at(0).toTensor());
+ return postprocess(forwardResult->at(0).toTensor(),
+ calculateModelImageSize(inputWidth),
+ detectSingleCharacters);
}
std::vector
-VerticalDetector::postprocess(const Tensor &tensor) const {
+VerticalDetector::postprocess(const Tensor &tensor,
+ const cv::Size &modelInputSize,
+ bool detectSingleCharacters) const {
/*
The output of the model consists of two matrices (heat maps):
1. ScoreText(Score map) - The probability of a region containing character.
@@ -67,20 +65,20 @@ VerticalDetector::postprocess(const Tensor &tensor) const {
*/
auto [scoreTextMat, scoreAffinityMat] = utils::interleavedArrayToMats(
tensorData,
- cv::Size(modelImageSize.width / 2, modelImageSize.height / 2));
- float txtThreshold = this->detectSingleCharacters
+ cv::Size(modelInputSize.width / 2, modelInputSize.height / 2));
+ float txtThreshold = detectSingleCharacters
? constants::kTextThreshold
: constants::kTextThresholdVertical;
std::vector bBoxesList =
utils::getDetBoxesFromTextMapVertical(
scoreTextMat, scoreAffinityMat, txtThreshold,
- constants::kLinkThreshold, this->detectSingleCharacters);
+ constants::kLinkThreshold, detectSingleCharacters);
const float restoreRatio = utils::calculateRestoreRatio(
scoreTextMat.rows, constants::kRecognizerImageSize);
utils::restoreBboxRatio(bBoxesList, restoreRatio);
// if this is Narrow Detector, do not group boxes.
- if (!this->detectSingleCharacters) {
+ if (!detectSingleCharacters) {
bBoxesList = utils::groupTextBoxes(
bBoxesList, constants::kCenterThreshold, constants::kDistanceThreshold,
constants::kHeightThreshold, constants::kMinSideThreshold,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h
index 802ab7329..29e9c86bf 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h
@@ -1,9 +1,10 @@
#pragma once
+#include
#include
#include
-
#include
+#include
#include
namespace rnexecutorch::models::ocr {
@@ -15,16 +16,17 @@ namespace rnexecutorch::models::ocr {
In Vertical OCR pipeline we make use of Detector two times:
- 1. Large Detector -- The differences between Detector used in standard OCR and
- Large Detector used in Vertical OCR is: a) To obtain detected boxes from heeat
- maps it utilizes `getDetBoxesFromTextMapVertical()` function rather than
+ 1. Large Detector through forward_1280 method -- The differences between
+ Detector used in standard OCR and Large Detector used in Vertical OCR is: a) To
+ obtain detected boxes from heeat maps it utilizes
+ `getDetBoxesFromTextMapVertical()` function rather than
'getDetBoxesFromTextMap()`. Other than that, refer to the standard OCR
Detector.
- 2. Narrow Detector -- it is designed to detect a single characters bounding
- boxes. `getDetBoxesFromTextMapVertical()` function acts differently for Narrow
- Detector and different textThreshold Value is passed. Additionally, the
- grouping of detected boxes is completely omited.
+ 2. Narrow Detector through forward_320 method -- it is designed to detect a
+ single characters bounding boxes. `getDetBoxesFromTextMapVertical()` function
+ acts differently for Narrow Detector and different textThreshold Value is
+ passed. Additionally, the grouping of detected boxes is completely omited.
Vertical Detector pipeline differentiate the Large Detector and Narrow
Detector based on `detectSingleCharacters` flag passed to the constructor.
@@ -33,17 +35,17 @@ namespace rnexecutorch::models::ocr {
using executorch::aten::Tensor;
using executorch::extension::TensorPtr;
-class VerticalDetector final : public BaseModel {
+class VerticalDetector final : public Detector {
public:
explicit VerticalDetector(const std::string &modelSource,
- bool detectSingleCharacters,
std::shared_ptr callInvoker);
- std::vector generate(const cv::Mat &inputImage);
- cv::Size getModelImageSize() const noexcept;
+
+ std::vector generate(const cv::Mat &inputImage,
+ int32_t inputWidth) override;
private:
- bool detectSingleCharacters;
- std::vector postprocess(const Tensor &tensor) const;
- cv::Size modelImageSize;
+ std::vector
+ postprocess(const Tensor &tensor, const cv::Size &modelInputSize,
+ bool detectSingleCharacters) const;
};
} // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
index 5bb374dc1..40c0ce26a 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
@@ -2,19 +2,18 @@
#include
#include
#include
+#include
#include
#include
namespace rnexecutorch::models::ocr {
-VerticalOCR::VerticalOCR(const std::string &detectorLargeSource,
- const std::string &detectorNarrowSource,
+VerticalOCR::VerticalOCR(const std::string &detectorSource,
const std::string &recognizerSource,
std::string symbols, bool independentChars,
std::shared_ptr invoker)
- : detectorLarge(detectorLargeSource, false, invoker),
- detectorNarrow(detectorNarrowSource, true, invoker),
- recognizer(recognizerSource, invoker), converter(symbols),
- independentCharacters(independentChars), callInvoker(invoker) {}
+ : detector(detectorSource, invoker), recognizer(recognizerSource, invoker),
+ converter(symbols), independentCharacters(independentChars),
+ callInvoker(invoker) {}
std::vector VerticalOCR::generate(std::string input) {
cv::Mat image = image_processing::readImage(input);
@@ -22,9 +21,11 @@ std::vector VerticalOCR::generate(std::string input) {
throw std::runtime_error("Failed to load image from path: " + input);
}
// 1. Large Detector
- std::vector largeBoxes = detectorLarge.generate(image);
+ std::vector largeBoxes =
+ detector.generate(image, constants::kLargeDetectorWidth);
- cv::Size largeDetectorSize = detectorLarge.getModelImageSize();
+ cv::Size largeDetectorSize =
+ detector.calculateModelImageSize(constants::kLargeDetectorWidth);
cv::Mat resizedImage =
image_processing::resizePadded(image, largeDetectorSize);
types::PaddingInfo imagePaddings =
@@ -42,9 +43,7 @@ std::vector VerticalOCR::generate(std::string input) {
}
std::size_t VerticalOCR::getMemoryLowerBound() const noexcept {
- return detectorLarge.getMemoryLowerBound() +
- detectorNarrow.getMemoryLowerBound() +
- recognizer.getMemoryLowerBound();
+ return detector.getMemoryLowerBound() + recognizer.getMemoryLowerBound();
}
// Strategy 1: Recognize each character individually
@@ -76,7 +75,8 @@ std::pair VerticalOCR::_handleIndependentCharacters(
croppedChar = utils::normalizeForRecognizer(
croppedChar, constants::kRecognizerHeight, 0.0, true);
- const auto &[predIndex, score] = recognizer.generate(croppedChar);
+ const auto &[predIndex, score] =
+ recognizer.generate(croppedChar, constants::kRecognizerHeight);
if (!predIndex.empty()) {
text += converter.decodeGreedy(predIndex, predIndex.size())[0];
}
@@ -118,7 +118,7 @@ std::pair VerticalOCR::_handleJointCharacters(
mergedCharacters, constants::kRecognizerHeight, 0.0, false);
const auto &[predIndex, confidenceScore] =
- recognizer.generate(mergedCharacters);
+ recognizer.generate(mergedCharacters, constants::kLargeRecognizerWidth);
if (!predIndex.empty()) {
text = converter.decodeGreedy(predIndex, predIndex.size())[0];
}
@@ -138,7 +138,7 @@ types::OCRDetection VerticalOCR::_processSingleTextBox(
// 2. Narrow Detector - detects single characters
std::vector characterBoxes =
- detectorNarrow.generate(croppedLargeBox);
+ detector.generate(croppedLargeBox, constants::kSmallDetectorWidth);
std::string text;
float confidenceScore = 0.0;
@@ -148,7 +148,8 @@ types::OCRDetection VerticalOCR::_processSingleTextBox(
static_cast(box.bbox[2].x - box.bbox[0].x);
const int32_t boxHeight =
static_cast(box.bbox[2].y - box.bbox[0].y);
- cv::Size narrowRecognizerSize = detectorNarrow.getModelImageSize();
+ cv::Size narrowRecognizerSize =
+ detector.calculateModelImageSize(constants::kSmallDetectorWidth);
types::PaddingInfo paddingsBox = utils::calculateResizeRatioAndPaddings(
cv::Size(boxWidth, boxHeight), narrowRecognizerSize);
@@ -173,8 +174,7 @@ types::OCRDetection VerticalOCR::_processSingleTextBox(
}
void VerticalOCR::unload() noexcept {
- detectorLarge.unload();
- detectorNarrow.unload();
+ detector.unload();
recognizer.unload();
}
} // namespace rnexecutorch::models::ocr
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
index f9f70f2d9..e7654c2f2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
@@ -21,22 +21,20 @@ namespace models::ocr {
/*
Vertical OCR is OCR designed to handle vertical texts.
Vertical OCR pipeline consists of:
- 1. Large Detector -- detects regions where text is located.
+ 1. Detector using forward_1280 method-- detects regions where text is located.
Almost identical to the Detector in standard OCR.
The result of this phase is a list of bounding boxes.
Each detected box is then processed individually through the following steps:
- 2. Narrow Detector -- designed for detecting where single characters
- are located.
- There are two different strategies used for vertical recognition:
- Strategy 1 "Independent Characters":
- Treating each character region found by Narrow Detector
- as compeletely independent.
- 3. Each character is forwarded to Small Recognizer (64 x 64).
+ 2. Detector using forward_320 method -- designed for detecting where single
+ characters are located. There are two different strategies used for vertical
+ recognition: Strategy 1 "Independent Characters": Treating each character
+ region found by Narrow Detector as compeletely independent.
+ 3. Each character is forwarded to Recognizer with input size 64 x 64.
Strategy 2 "Joint Characters":
The bounding boxes found by Narrow Detector are
horizontally merged to create one wide image.
- 3. One wide image is forwarded to Large Recognzer (512 x 64).
- Vertical OCR differentiate between those two strategies based on
+ 3. One wide image is forwarded to Recognzer with input width of 512
+ x 64. Vertical OCR differentiate between those two strategies based on
`independentChars` flag passed to the constructor.
*/
@@ -45,8 +43,7 @@ using executorch::extension::TensorPtr;
class VerticalOCR final {
public:
- explicit VerticalOCR(const std::string &detectorLargeSource,
- const std::string &detectorNarrowSource,
+ explicit VerticalOCR(const std::string &detectorSource,
const std::string &recognizerSource, std::string symbols,
bool indpendentCharacters,
std::shared_ptr callInvoker);
@@ -60,6 +57,7 @@ class VerticalOCR final {
const std::vector &characterBoxes,
const types::PaddingInfo &paddingsBox,
const types::PaddingInfo &imagePaddings);
+
std::pair
_handleJointCharacters(const types::DetectorBBox &box,
const cv::Mat &originalImage,
@@ -70,8 +68,8 @@ class VerticalOCR final {
_processSingleTextBox(types::DetectorBBox &box, const cv::Mat &originalImage,
const cv::Mat &resizedLargeImage,
const types::PaddingInfo &imagePaddings);
- VerticalDetector detectorLarge;
- VerticalDetector detectorNarrow;
+
+ VerticalDetector detector;
Recognizer recognizer;
CTCLabelConverter converter;
bool independentCharacters;
@@ -80,6 +78,5 @@ class VerticalOCR final {
} // namespace models::ocr
REGISTER_CONSTRUCTOR(models::ocr::VerticalOCR, std::string, std::string,
- std::string, std::string, bool,
- std::shared_ptr);
+ std::string, bool, std::shared_ptr);
} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/src/constants/ocr/models.ts b/packages/react-native-executorch/src/constants/ocr/models.ts
index c6bad4293..f6d4efb13 100644
--- a/packages/react-native-executorch/src/constants/ocr/models.ts
+++ b/packages/react-native-executorch/src/constants/ocr/models.ts
@@ -1,884 +1,426 @@
import { alphabets, symbols } from './symbols';
-const URL_PREFIX =
- 'https://huggingface.co/software-mansion/react-native-executorch';
-const VERSION_TAG = 'resolve/v0.5.0';
+import { VERSION_TAG, URL_PREFIX } from '../versions';
-const DETECTOR_CRAFT_1280_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_1280_quantized.pte`;
-const DETECTOR_CRAFT_800_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_800_quantized.pte`;
-const DETECTOR_CRAFT_320_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_320_quantized.pte`;
+const DETECTOR_CRAFT_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_quantized.pte`;
-type RecognizerSize = 64 | 128 | 256 | 512;
+const createHFRecognizerDownloadUrl = (alphabet: keyof typeof alphabets) =>
+ `${URL_PREFIX}-recognizer-crnn.en/${VERSION_TAG}/xnnpack/${alphabet}/xnnpack_crnn_${alphabet}.pte`;
-const createHFRecognizerDownloadUrl = (
- alphabet: keyof typeof alphabets,
- size: RecognizerSize
-) =>
- `${URL_PREFIX}-recognizer-crnn.en/${VERSION_TAG}/xnnpack/${alphabet}/xnnpack_crnn_${alphabet}_${size}.pte`;
-
-const RECOGNIZER_ENGLISH_CRNN_512 = createHFRecognizerDownloadUrl(
- 'english',
- 512
-);
-const RECOGNIZER_ENGLISH_CRNN_256 = createHFRecognizerDownloadUrl(
- 'english',
- 256
-);
-const RECOGNIZER_ENGLISH_CRNN_128 = createHFRecognizerDownloadUrl(
- 'english',
- 128
-);
-const RECOGNIZER_ENGLISH_CRNN_64 = createHFRecognizerDownloadUrl('english', 64);
-
-const RECOGNIZER_LATIN_CRNN_512 = createHFRecognizerDownloadUrl('latin', 512);
-const RECOGNIZER_LATIN_CRNN_256 = createHFRecognizerDownloadUrl('latin', 256);
-const RECOGNIZER_LATIN_CRNN_128 = createHFRecognizerDownloadUrl('latin', 128);
-const RECOGNIZER_LATIN_CRNN_64 = createHFRecognizerDownloadUrl('latin', 64);
-
-const RECOGNIZER_JAPANESE_CRNN_512 = createHFRecognizerDownloadUrl(
- 'japanese',
- 512
-);
-const RECOGNIZER_JAPANESE_CRNN_256 = createHFRecognizerDownloadUrl(
- 'japanese',
- 256
-);
-const RECOGNIZER_JAPANESE_CRNN_128 = createHFRecognizerDownloadUrl(
- 'japanese',
- 128
-);
-const RECOGNIZER_JAPANESE_CRNN_64 = createHFRecognizerDownloadUrl(
- 'japanese',
- 64
-);
-
-const RECOGNIZER_KANNADA_CRNN_512 = createHFRecognizerDownloadUrl(
- 'kannada',
- 512
-);
-const RECOGNIZER_KANNADA_CRNN_256 = createHFRecognizerDownloadUrl(
- 'kannada',
- 256
-);
-const RECOGNIZER_KANNADA_CRNN_128 = createHFRecognizerDownloadUrl(
- 'kannada',
- 128
-);
-const RECOGNIZER_KANNADA_CRNN_64 = createHFRecognizerDownloadUrl('kannada', 64);
-
-const RECOGNIZER_KOREAN_CRNN_512 = createHFRecognizerDownloadUrl('korean', 512);
-const RECOGNIZER_KOREAN_CRNN_256 = createHFRecognizerDownloadUrl('korean', 256);
-const RECOGNIZER_KOREAN_CRNN_128 = createHFRecognizerDownloadUrl('korean', 128);
-const RECOGNIZER_KOREAN_CRNN_64 = createHFRecognizerDownloadUrl('korean', 64);
-
-const RECOGNIZER_TELUGU_CRNN_512 = createHFRecognizerDownloadUrl('telugu', 512);
-const RECOGNIZER_TELUGU_CRNN_256 = createHFRecognizerDownloadUrl('telugu', 256);
-const RECOGNIZER_TELUGU_CRNN_128 = createHFRecognizerDownloadUrl('telugu', 128);
-const RECOGNIZER_TELUGU_CRNN_64 = createHFRecognizerDownloadUrl('telugu', 64);
-
-const RECOGNIZER_ZH_SIM_CRNN_512 = createHFRecognizerDownloadUrl('zh_sim', 512);
-const RECOGNIZER_ZH_SIM_CRNN_256 = createHFRecognizerDownloadUrl('zh_sim', 256);
-const RECOGNIZER_ZH_SIM_CRNN_128 = createHFRecognizerDownloadUrl('zh_sim', 128);
-const RECOGNIZER_ZH_SIM_CRNN_64 = createHFRecognizerDownloadUrl('zh_sim', 64);
-
-const RECOGNIZER_CYRILLIC_CRNN_512 = createHFRecognizerDownloadUrl(
- 'cyrillic',
- 512
-);
-const RECOGNIZER_CYRILLIC_CRNN_256 = createHFRecognizerDownloadUrl(
- 'cyrillic',
- 256
-);
-const RECOGNIZER_CYRILLIC_CRNN_128 = createHFRecognizerDownloadUrl(
- 'cyrillic',
- 128
-);
-const RECOGNIZER_CYRILLIC_CRNN_64 = createHFRecognizerDownloadUrl(
- 'cyrillic',
- 64
-);
+const RECOGNIZER_ENGLISH_CRNN = createHFRecognizerDownloadUrl('english');
+const RECOGNIZER_LATIN_CRNN = createHFRecognizerDownloadUrl('latin');
+const RECOGNIZER_JAPANESE_CRNN = createHFRecognizerDownloadUrl('japanese');
+const RECOGNIZER_KANNADA_CRNN = createHFRecognizerDownloadUrl('kannada');
+const RECOGNIZER_KOREAN_CRNN = createHFRecognizerDownloadUrl('korean');
+const RECOGNIZER_TELUGU_CRNN = createHFRecognizerDownloadUrl('telugu');
+const RECOGNIZER_ZH_SIM_CRNN = createHFRecognizerDownloadUrl('zh_sim');
+const RECOGNIZER_CYRILLIC_CRNN = createHFRecognizerDownloadUrl('cyrillic');
const createOCRObject = (
- recognizerLarge: string,
- recognizerMedium: string,
- recognizerSmall: string,
+ recognizerSource: string,
language: keyof typeof symbols
) => {
return {
- detectorSource: DETECTOR_CRAFT_800_MODEL,
- recognizerLarge,
- recognizerMedium,
- recognizerSmall,
+ detectorSource: DETECTOR_CRAFT_MODEL,
+ recognizerSource,
language,
};
};
const createVerticalOCRObject = (
- recognizerLarge: string,
- recognizerSmall: string,
+ recognizerSource: string,
language: keyof typeof symbols
) => {
return {
- detectorLarge: DETECTOR_CRAFT_1280_MODEL,
- detectorNarrow: DETECTOR_CRAFT_320_MODEL,
- recognizerLarge,
- recognizerSmall,
+ detectorSource: DETECTOR_CRAFT_MODEL,
+ recognizerSource,
language,
};
};
-export const OCR_ABAZA = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'abq'
-);
+export const OCR_ABAZA = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'abq');
export const VERTICAL_OCR_ABAZA = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'abq'
);
-export const OCR_ADYGHE = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'ady'
-);
+export const OCR_ADYGHE = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'ady');
export const VERTICAL_OCR_ADYGHE = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'ady'
);
-export const OCR_AFRIKAANS = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'af'
-);
+export const OCR_AFRIKAANS = createOCRObject(RECOGNIZER_LATIN_CRNN, 'af');
export const VERTICAL_OCR_AFRIKAANS = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'af'
);
-export const OCR_AVAR = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'ava'
-);
+export const OCR_AVAR = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'ava');
export const VERTICAL_OCR_AVAR = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'ava'
);
-export const OCR_AZERBAIJANI = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'az'
-);
+export const OCR_AZERBAIJANI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'az');
export const VERTICAL_OCR_AZERBAIJANI = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'az'
);
-export const OCR_BELARUSIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'be'
-);
+export const OCR_BELARUSIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'be');
export const VERTICAL_OCR_BELARUSIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'be'
);
-export const OCR_BULGARIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'bg'
-);
+export const OCR_BULGARIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'bg');
export const VERTICAL_OCR_BULGARIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'bg'
);
-export const OCR_BOSNIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'bs'
-);
+export const OCR_BOSNIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'bs');
export const VERTICAL_OCR_BOSNIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'bs'
);
export const OCR_SIMPLIFIED_CHINESE = createOCRObject(
- RECOGNIZER_ZH_SIM_CRNN_512,
- RECOGNIZER_ZH_SIM_CRNN_256,
- RECOGNIZER_ZH_SIM_CRNN_128,
+ RECOGNIZER_ZH_SIM_CRNN,
'chSim'
);
export const VERTICAL_OCR_SIMPLIFIED_CHINESE = createVerticalOCRObject(
- RECOGNIZER_ZH_SIM_CRNN_512,
- RECOGNIZER_ZH_SIM_CRNN_64,
+ RECOGNIZER_ZH_SIM_CRNN,
'chSim'
);
-export const OCR_CHECHEN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'che'
-);
+export const OCR_CHECHEN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'che');
export const VERTICAL_OCR_CHECHEN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'che'
);
-export const OCR_CZECH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'cs'
-);
+export const OCR_CZECH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'cs');
export const VERTICAL_OCR_CZECH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'cs'
);
-export const OCR_WELSH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'cy'
-);
+export const OCR_WELSH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'cy');
export const VERTICAL_OCR_WELSH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'cy'
);
-export const OCR_DANISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'da'
-);
+export const OCR_DANISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'da');
export const VERTICAL_OCR_DANISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'da'
);
-export const OCR_DARGWA = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'dar'
-);
+export const OCR_DARGWA = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'dar');
export const VERTICAL_OCR_DARGWA = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'dar'
);
-export const OCR_GERMAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'de'
-);
+export const OCR_GERMAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'de');
export const VERTICAL_OCR_GERMAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'de'
);
-export const OCR_ENGLISH = createOCRObject(
- RECOGNIZER_ENGLISH_CRNN_512,
- RECOGNIZER_ENGLISH_CRNN_256,
- RECOGNIZER_ENGLISH_CRNN_128,
- 'en'
-);
+export const OCR_ENGLISH = createOCRObject(RECOGNIZER_ENGLISH_CRNN, 'en');
export const VERTICAL_OCR_ENGLISH = createVerticalOCRObject(
- RECOGNIZER_ENGLISH_CRNN_512,
- RECOGNIZER_ENGLISH_CRNN_64,
+ RECOGNIZER_ENGLISH_CRNN,
'en'
);
-export const OCR_SPANISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'es'
-);
+export const OCR_SPANISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'es');
export const VERTICAL_OCR_SPANISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'es'
);
-export const OCR_ESTONIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'et'
-);
+export const OCR_ESTONIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'et');
export const VERTICAL_OCR_ESTONIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'et'
);
-export const OCR_FRENCH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'fr'
-);
+export const OCR_FRENCH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'fr');
export const VERTICAL_OCR_FRENCH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'fr'
);
-export const OCR_IRISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'ga'
-);
+export const OCR_IRISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ga');
export const VERTICAL_OCR_IRISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'ga'
);
-export const OCR_CROATIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'hr'
-);
+export const OCR_CROATIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'hr');
export const VERTICAL_OCR_CROATIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'hr'
);
-export const OCR_HUNGARIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'hu'
-);
+export const OCR_HUNGARIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'hu');
export const VERTICAL_OCR_HUNGARIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'hu'
);
-export const OCR_INDONESIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'id'
-);
+export const OCR_INDONESIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'id');
export const VERTICAL_OCR_INDONESIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'id'
);
-export const OCR_INGUSH = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'inh'
-);
+export const OCR_INGUSH = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'inh');
export const VERTICAL_OCR_INGUSH = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'inh'
);
-export const OCR_ICELANDIC = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'ic'
-);
+export const OCR_ICELANDIC = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ic');
export const VERTICAL_OCR_ICELANDIC = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'ic'
);
-export const OCR_ITALIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'it'
-);
+export const OCR_ITALIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'it');
export const VERTICAL_OCR_ITALIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'it'
);
-export const OCR_JAPANESE = createOCRObject(
- RECOGNIZER_JAPANESE_CRNN_512,
- RECOGNIZER_JAPANESE_CRNN_256,
- RECOGNIZER_JAPANESE_CRNN_128,
- 'ja'
-);
+export const OCR_JAPANESE = createOCRObject(RECOGNIZER_JAPANESE_CRNN, 'ja');
export const VERTICAL_OCR_JAPANESE = createVerticalOCRObject(
- RECOGNIZER_JAPANESE_CRNN_512,
- RECOGNIZER_JAPANESE_CRNN_64,
+ RECOGNIZER_JAPANESE_CRNN,
'ja'
);
-export const OCR_KARBADIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'kbd'
-);
+export const OCR_KARBADIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'kbd');
export const VERTICAL_OCR_KARBADIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'kbd'
);
-export const OCR_KANNADA = createOCRObject(
- RECOGNIZER_KANNADA_CRNN_512,
- RECOGNIZER_KANNADA_CRNN_256,
- RECOGNIZER_KANNADA_CRNN_128,
- 'kn'
-);
+export const OCR_KANNADA = createOCRObject(RECOGNIZER_KANNADA_CRNN, 'kn');
export const VERTICAL_OCR_KANNADA = createVerticalOCRObject(
- RECOGNIZER_KANNADA_CRNN_512,
- RECOGNIZER_KANNADA_CRNN_64,
+ RECOGNIZER_KANNADA_CRNN,
'kn'
);
-export const OCR_KOREAN = createOCRObject(
- RECOGNIZER_KOREAN_CRNN_512,
- RECOGNIZER_KOREAN_CRNN_256,
- RECOGNIZER_KOREAN_CRNN_128,
- 'ko'
-);
+export const OCR_KOREAN = createOCRObject(RECOGNIZER_KOREAN_CRNN, 'ko');
export const VERTICAL_OCR_KOREAN = createVerticalOCRObject(
- RECOGNIZER_KOREAN_CRNN_512,
- RECOGNIZER_KOREAN_CRNN_64,
+ RECOGNIZER_KOREAN_CRNN,
'ko'
);
-export const OCR_KURDISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'ku'
-);
+export const OCR_KURDISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ku');
export const VERTICAL_OCR_KURDISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'ku'
);
-export const OCR_LATIN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'la'
-);
+export const OCR_LATIN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'la');
export const VERTICAL_OCR_LATIN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'la'
);
-export const OCR_LAK = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'lbe'
-);
+export const OCR_LAK = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'lbe');
export const VERTICAL_OCR_LAK = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'lbe'
);
-export const OCR_LEZGHIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'lez'
-);
+export const OCR_LEZGHIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'lez');
export const VERTICAL_OCR_LEZGHIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'lez'
);
-export const OCR_LITHUANIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'lt'
-);
+export const OCR_LITHUANIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'lt');
export const VERTICAL_OCR_LITHUANIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'lt'
);
-export const OCR_LATVIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'lv'
-);
+export const OCR_LATVIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'lv');
export const VERTICAL_OCR_LATVIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'lv'
);
-export const OCR_MAORI = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'mi'
-);
+export const OCR_MAORI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'mi');
export const VERTICAL_OCR_MAORI = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'mi'
);
-export const OCR_MONGOLIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'mn'
-);
+export const OCR_MONGOLIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'mn');
export const VERTICAL_OCR_MONGOLIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'mn'
);
-export const OCR_MALAY = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'ms'
-);
+export const OCR_MALAY = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ms');
export const VERTICAL_OCR_MALAY = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'ms'
);
-export const OCR_MALTESE = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'mt'
-);
+export const OCR_MALTESE = createOCRObject(RECOGNIZER_LATIN_CRNN, 'mt');
export const VERTICAL_OCR_MALTESE = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'mt'
);
-export const OCR_DUTCH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'nl'
-);
+export const OCR_DUTCH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'nl');
export const VERTICAL_OCR_DUTCH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'nl'
);
-export const OCR_NORWEGIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'no'
-);
+export const OCR_NORWEGIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'no');
export const VERTICAL_OCR_NORWEGIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'no'
);
-export const OCR_OCCITAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'oc'
-);
+export const OCR_OCCITAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'oc');
export const VERTICAL_OCR_OCCITAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'oc'
);
-export const OCR_PALI = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'pi'
-);
+export const OCR_PALI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'pi');
export const VERTICAL_OCR_PALI = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'pi'
);
-export const OCR_POLISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'pl'
-);
+export const OCR_POLISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'pl');
export const VERTICAL_OCR_POLISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'pl'
);
-export const OCR_PORTUGUESE = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'pt'
-);
+export const OCR_PORTUGUESE = createOCRObject(RECOGNIZER_LATIN_CRNN, 'pt');
export const VERTICAL_OCR_PORTUGUESE = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'pt'
);
-export const OCR_ROMANIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'ro'
-);
+export const OCR_ROMANIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ro');
export const VERTICAL_OCR_ROMANIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'ro'
);
-export const OCR_RUSSIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'ru'
-);
+export const OCR_RUSSIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'ru');
export const VERTICAL_OCR_RUSSIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'ru'
);
export const OCR_SERBIAN_CYRILLIC = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
+ RECOGNIZER_CYRILLIC_CRNN,
'rsCyrillic'
);
export const VERTICAL_OCR_SERBIAN_CYRILLIC = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'rsCyrillic'
);
export const OCR_SERBIAN_LATIN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
+ RECOGNIZER_LATIN_CRNN,
'rsLatin'
);
export const VERTICAL_OCR_SERBIAN_LATIN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'rsLatin'
);
-export const OCR_SLOVAK = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'sk'
-);
+export const OCR_SLOVAK = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sk');
export const VERTICAL_OCR_SLOVAK = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'sk'
);
-export const OCR_SLOVENIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'sl'
-);
+export const OCR_SLOVENIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sl');
export const VERTICAL_OCR_SLOVENIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'sl'
);
-export const OCR_ALBANIAN = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'sq'
-);
+export const OCR_ALBANIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sq');
export const VERTICAL_OCR_ALBANIAN = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'sq'
);
-export const OCR_SWEDISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'sv'
-);
+export const OCR_SWEDISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sv');
export const VERTICAL_OCR_SWEDISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'sv'
);
-export const OCR_SWAHILI = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'sw'
-);
+export const OCR_SWAHILI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sw');
export const VERTICAL_OCR_SWAHILI = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'sw'
);
-export const OCR_TABASSARAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'tab'
-);
+export const OCR_TABASSARAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'tab');
export const VERTICAL_OCR_TABASSARAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'tab'
);
-export const OCR_TELUGU = createOCRObject(
- RECOGNIZER_TELUGU_CRNN_512,
- RECOGNIZER_TELUGU_CRNN_256,
- RECOGNIZER_TELUGU_CRNN_128,
- 'te'
-);
+export const OCR_TELUGU = createOCRObject(RECOGNIZER_TELUGU_CRNN, 'te');
export const VERTICAL_OCR_TELUGU = createVerticalOCRObject(
- RECOGNIZER_TELUGU_CRNN_512,
- RECOGNIZER_TELUGU_CRNN_64,
+ RECOGNIZER_TELUGU_CRNN,
'te'
);
-export const OCR_TAJIK = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'tjk'
-);
+export const OCR_TAJIK = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'tjk');
export const VERTICAL_OCR_TAJIK = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'tjk'
);
-export const OCR_TAGALOG = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'tl'
-);
+export const OCR_TAGALOG = createOCRObject(RECOGNIZER_LATIN_CRNN, 'tl');
export const VERTICAL_OCR_TAGALOG = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'tl'
);
-export const OCR_TURKISH = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'tr'
-);
+export const OCR_TURKISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'tr');
export const VERTICAL_OCR_TURKISH = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'tr'
);
-export const OCR_UKRAINIAN = createOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_256,
- RECOGNIZER_CYRILLIC_CRNN_128,
- 'uk'
-);
+export const OCR_UKRAINIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'uk');
export const VERTICAL_OCR_UKRAINIAN = createVerticalOCRObject(
- RECOGNIZER_CYRILLIC_CRNN_512,
- RECOGNIZER_CYRILLIC_CRNN_64,
+ RECOGNIZER_CYRILLIC_CRNN,
'uk'
);
-export const OCR_UZBEK = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'uz'
-);
+export const OCR_UZBEK = createOCRObject(RECOGNIZER_LATIN_CRNN, 'uz');
export const VERTICAL_OCR_UZBEK = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'uz'
);
-export const OCR_VIETNAMESE = createOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_256,
- RECOGNIZER_LATIN_CRNN_128,
- 'vi'
-);
+export const OCR_VIETNAMESE = createOCRObject(RECOGNIZER_LATIN_CRNN, 'vi');
export const VERTICAL_OCR_VIETNAMESE = createVerticalOCRObject(
- RECOGNIZER_LATIN_CRNN_512,
- RECOGNIZER_LATIN_CRNN_64,
+ RECOGNIZER_LATIN_CRNN,
'vi'
);
diff --git a/packages/react-native-executorch/src/constants/versions.ts b/packages/react-native-executorch/src/constants/versions.ts
new file mode 100644
index 000000000..e71504b59
--- /dev/null
+++ b/packages/react-native-executorch/src/constants/versions.ts
@@ -0,0 +1,4 @@
+export const URL_PREFIX =
+ 'https://huggingface.co/software-mansion/react-native-executorch';
+export const VERSION_TAG = 'resolve/v0.7.0';
+export const NEXT_VERSION_TAG = 'resolve/v0.8.0';
diff --git a/packages/react-native-executorch/src/controllers/OCRController.ts b/packages/react-native-executorch/src/controllers/OCRController.ts
index c37903614..f2e81188d 100644
--- a/packages/react-native-executorch/src/controllers/OCRController.ts
+++ b/packages/react-native-executorch/src/controllers/OCRController.ts
@@ -25,17 +25,12 @@ export class OCRController {
public load = async (
detectorSource: ResourceSource,
- recognizerSources: {
- recognizerLarge: ResourceSource;
- recognizerMedium: ResourceSource;
- recognizerSmall: ResourceSource;
- },
+ recognizerSource: ResourceSource,
language: OCRLanguage,
onDownloadProgressCallback?: (downloadProgress: number) => void
) => {
try {
- if (!detectorSource || Object.keys(recognizerSources).length !== 3)
- return;
+ if (!detectorSource || !recognizerSource) return;
if (!symbols[language]) {
throw new Error(getError(ETError.LanguageNotSupported));
@@ -47,18 +42,14 @@ export class OCRController {
const paths = await ResourceFetcher.fetch(
onDownloadProgressCallback,
detectorSource,
- recognizerSources.recognizerLarge,
- recognizerSources.recognizerMedium,
- recognizerSources.recognizerSmall
+ recognizerSource
);
- if (paths === null || paths?.length < 4) {
+ if (paths === null || paths.length < 2) {
throw new Error('Download interrupted!');
}
this.nativeModule = global.loadOCR(
paths[0]!,
paths[1]!,
- paths[2]!,
- paths[3]!,
symbols[language]
);
this.isReady = true;
diff --git a/packages/react-native-executorch/src/controllers/VerticalOCRController.ts b/packages/react-native-executorch/src/controllers/VerticalOCRController.ts
index a303de161..3e0223b5c 100644
--- a/packages/react-native-executorch/src/controllers/VerticalOCRController.ts
+++ b/packages/react-native-executorch/src/controllers/VerticalOCRController.ts
@@ -24,24 +24,14 @@ export class VerticalOCRController {
}
public load = async (
- detectorSources: {
- detectorLarge: ResourceSource;
- detectorNarrow: ResourceSource;
- },
- recognizerSources: {
- recognizerLarge: ResourceSource;
- recognizerSmall: ResourceSource;
- },
+ detectorSource: ResourceSource,
+ recognizerSource: ResourceSource,
language: OCRLanguage,
independentCharacters: boolean,
onDownloadProgressCallback: (downloadProgress: number) => void
) => {
try {
- if (
- Object.keys(detectorSources).length !== 2 ||
- Object.keys(recognizerSources).length !== 2
- )
- return;
+ if (!detectorSource || !recognizerSource) return;
if (!symbols[language]) {
throw new Error(getError(ETError.LanguageNotSupported));
@@ -52,19 +42,15 @@ export class VerticalOCRController {
const paths = await ResourceFetcher.fetch(
onDownloadProgressCallback,
- detectorSources.detectorLarge,
- detectorSources.detectorNarrow,
- independentCharacters
- ? recognizerSources.recognizerSmall
- : recognizerSources.recognizerLarge
+ detectorSource,
+ recognizerSource
);
- if (paths === null || paths.length < 3) {
+ if (paths === null || paths.length < 2) {
throw new Error('Download interrupted');
}
this.ocrNativeModule = global.loadVerticalOCR(
paths[0]!,
paths[1]!,
- paths[2]!,
symbols[language],
independentCharacters
);
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts
index 90e3c0b43..b7aaec32e 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts
@@ -17,9 +17,7 @@ export const useOCR = ({
}: {
model: {
detectorSource: ResourceSource;
- recognizerLarge: ResourceSource;
- recognizerMedium: ResourceSource;
- recognizerSmall: ResourceSource;
+ recognizerSource: ResourceSource;
language: OCRLanguage;
};
preventLoad?: boolean;
@@ -44,11 +42,7 @@ export const useOCR = ({
(async () => {
await controllerInstance.load(
model.detectorSource,
- {
- recognizerLarge: model.recognizerLarge,
- recognizerMedium: model.recognizerMedium,
- recognizerSmall: model.recognizerSmall,
- },
+ model.recognizerSource,
model.language,
setDownloadProgress
);
@@ -60,9 +54,7 @@ export const useOCR = ({
}, [
controllerInstance,
model.detectorSource,
- model.recognizerLarge,
- model.recognizerMedium,
- model.recognizerSmall,
+ model.recognizerSource,
model.language,
preventLoad,
]);
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts
index 1a6e1d270..c033d3721 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts
@@ -17,10 +17,8 @@ export const useVerticalOCR = ({
preventLoad = false,
}: {
model: {
- detectorLarge: ResourceSource;
- detectorNarrow: ResourceSource;
- recognizerLarge: ResourceSource;
- recognizerSmall: ResourceSource;
+ detectorSource: ResourceSource;
+ recognizerSource: ResourceSource;
language: OCRLanguage;
};
independentCharacters?: boolean;
@@ -45,14 +43,8 @@ export const useVerticalOCR = ({
(async () => {
await controllerInstance.load(
- {
- detectorLarge: model.detectorLarge,
- detectorNarrow: model.detectorNarrow,
- },
- {
- recognizerLarge: model.recognizerLarge,
- recognizerSmall: model.recognizerSmall,
- },
+ model.detectorSource,
+ model.recognizerSource,
model.language,
independentCharacters,
setDownloadProgress
@@ -64,10 +56,8 @@ export const useVerticalOCR = ({
};
}, [
controllerInstance,
- model.detectorLarge,
- model.detectorNarrow,
- model.recognizerLarge,
- model.recognizerSmall,
+ model.detectorSource,
+ model.recognizerSource,
model.language,
independentCharacters,
preventLoad,
diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts
index cddc6f595..af278812a 100644
--- a/packages/react-native-executorch/src/index.ts
+++ b/packages/react-native-executorch/src/index.ts
@@ -29,14 +29,11 @@ declare global {
) => any;
var loadOCR: (
detectorSource: string,
- recognizerLarge: string,
- recognizerMedium: string,
- recognizerSmall: string,
+ recognizer: string,
symbols: string
) => any;
var loadVerticalOCR: (
- detectorLarge: string,
- detectorNarrow: string,
+ detectorSource: string,
recognizer: string,
symbols: string,
independentCharacters?: boolean
diff --git a/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts b/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts
index 158b227ae..ac4e2e2ff 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts
@@ -12,20 +12,14 @@ export class OCRModule {
async load(
model: {
detectorSource: ResourceSource;
- recognizerLarge: ResourceSource;
- recognizerMedium: ResourceSource;
- recognizerSmall: ResourceSource;
+ recognizerSource: ResourceSource;
language: OCRLanguage;
},
onDownloadProgressCallback: (progress: number) => void = () => {}
) {
await this.controller.load(
model.detectorSource,
- {
- recognizerLarge: model.recognizerLarge,
- recognizerMedium: model.recognizerMedium,
- recognizerSmall: model.recognizerSmall,
- },
+ model.recognizerSource,
model.language,
onDownloadProgressCallback
);
diff --git a/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts
index 303ace04a..3eebec716 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts
@@ -11,24 +11,16 @@ export class VerticalOCRModule {
async load(
model: {
- detectorLarge: ResourceSource;
- detectorNarrow: ResourceSource;
- recognizerLarge: ResourceSource;
- recognizerSmall: ResourceSource;
+ detectorSource: ResourceSource;
+ recognizerSource: ResourceSource;
language: OCRLanguage;
},
independentCharacters: boolean,
onDownloadProgressCallback: (progress: number) => void = () => {}
) {
await this.controller.load(
- {
- detectorLarge: model.detectorLarge,
- detectorNarrow: model.detectorNarrow,
- },
- {
- recognizerLarge: model.recognizerLarge,
- recognizerSmall: model.recognizerSmall,
- },
+ model.detectorSource,
+ model.recognizerSource,
model.language,
independentCharacters,
onDownloadProgressCallback