diff --git a/apps/computer-vision/app/ocr/index.tsx b/apps/computer-vision/app/ocr/index.tsx index 1fb282791..546e50a91 100644 --- a/apps/computer-vision/app/ocr/index.tsx +++ b/apps/computer-vision/app/ocr/index.tsx @@ -16,7 +16,9 @@ export default function OCRScreen() { height: number; }>(); - const model = useOCR({ model: OCR_ENGLISH }); + const model = useOCR({ + model: OCR_ENGLISH, + }); const { setGlobalGenerating } = useContext(GeneratingContext); useEffect(() => { setGlobalGenerating(model.isGenerating); diff --git a/docs/docs/02-hooks/02-computer-vision/useOCR.md b/docs/docs/02-hooks/02-computer-vision/useOCR.md index d07efd601..4813b5c1f 100644 --- a/docs/docs/02-hooks/02-computer-vision/useOCR.md +++ b/docs/docs/02-hooks/02-computer-vision/useOCR.md @@ -30,12 +30,6 @@ function App() { Type definitions ```typescript -interface RecognizerSources { - recognizerLarge: string | number; - recognizerMedium: string | number; - recognizerSmall: string | number; -} - type OCRLanguage = | 'abq' | 'ady' @@ -121,9 +115,7 @@ interface OCRDetection { **`model`** - Object containing the detector source, recognizer sources, and language. - **`detectorSource`** - A string that specifies the location of the detector binary. -- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels. -- **`recognizerMedium`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 256 pixels. -- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 128 pixels. +- **`recognizerSource`** - A string that specifies the location of the recognizer binary. - **`language`** - A parameter that specifies the language of the text to be recognized by the OCR. **`preventLoad?`** - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook. @@ -186,23 +178,18 @@ function App() { } ``` -## Language-Specific Recognizers +## Alphabet-Specific Recognizers -Each supported language requires its own set of recognizer models. -The built-in constants such as `RECOGNIZER_EN_CRNN_512`, `RECOGNIZER_PL_CRNN_256`, etc., point to specific models trained for a particular language. +Each supported alphabet requires its own recognizer model. The built-in constants, such as `RECOGNIZER_LATIN_CRNN` or `RECOGNIZER_CYRILLIC_CRNN`, point to specific models trained for a particular alphabet. > For example: > -> - To recognize **English** text, use: -> - `RECOGNIZER_EN_CRNN_512` -> - `RECOGNIZER_EN_CRNN_256` -> - `RECOGNIZER_EN_CRNN_128` -> - To recognize **Polish** text, use: -> - `RECOGNIZER_PL_CRNN_512` -> - `RECOGNIZER_PL_CRNN_256` -> - `RECOGNIZER_PL_CRNN_128` +> - To recognize text in languages using the **Latin** alphabet (like Polish, or German), use: +> - `RECOGNIZER_LATIN_CRNN` +> - To recognize text in languages using the **Cyrillic** alphabet (like Russian or Ukrainian), use: +> - `RECOGNIZER_CYRILLIC_CRNN` -You need to make sure the recognizer models you pass in `recognizerSources` match the `language` you specify. +You need to make sure the recognizer model you pass in `recognizerSource` matches the alphabet of the `language` you specify. ## Supported languages @@ -275,33 +262,27 @@ You need to make sure the recognizer models you pass in `recognizerSources` matc ## Supported models -| Model | Type | -| ------------------------------------------------------- | :--------: | -| [CRAFT_800\*](https://github.com/clovaai/CRAFT-pytorch) | Detector | -| [CRNN_512\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | -| [CRNN_256\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | -| [CRNN_128\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | - -\* - The number following the underscore (\_) indicates the input image width used during model export. +| Model | Type | +| ------------------------------------------------- | :--------: | +| [CRAFT](https://github.com/clovaai/CRAFT-pytorch) | Detector | +| [CRNN](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | ## Benchmarks ### Model size -| Model | XNNPACK [MB] | -| ------------------------------ | :----------: | -| Detector (CRAFT_800_QUANTIZED) | 19.8 | -| Recognizer (CRNN_512) | 15 - 18\* | -| Recognizer (CRNN_256) | 16 - 18\* | -| Recognizer (CRNN_128) | 17 - 19\* | +| Model | XNNPACK [MB] | +| -------------------------- | :-----------: | +| Detector (CRAFT_QUANTIZED) | 20.9 | +| Recognizer (CRNN) | 18.5 - 25.2\* | \* - The model weights vary depending on the language. ### Memory usage -| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | -| ------------------------------------------------------------------------------------------------------ | :--------------------: | :----------------: | -| Detector (CRAFT_800_QUANTIZED) + Recognizer (CRNN_512) + Recognizer (CRNN_256) + Recognizer (CRNN_128) | 1400 | 1320 | +| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | +| ------------------------------------ | :--------------------: | :----------------: | +| Detector (CRAFT) + Recognizer (CRNN) | 1400 | 1320 | ### Inference time @@ -317,16 +298,13 @@ Times presented in the tables are measured as consecutive runs of the model. Ini **Time measurements:** -| Metric | iPhone 17 Pro
[ms] | iPhone 16 Pro
[ms] | iPhone SE 3 | Samsung Galaxy S24
[ms] | OnePlus 12
[ms] | -| ---------------------------------- | ------------------------- | ------------------------- | ----------- | ------------------------------ | ---------------------- | -| **Total Inference Time** | 652 | 600 | 2855 | 1092 | 1034 | -| **Detector (CRAFT_800_QUANTIZED)** | 220 | 221 | 1740 | 521 | 492 | -| **Recognizer (CRNN_512)** | | | | | | -| ├─ Average Time | 45 | 38 | 110 | 40 | 38 | -| ├─ Total Time (3 runs) | 135 | 114 | 330 | 120 | 114 | -| **Recognizer (CRNN_256)** | | | | | | -| ├─ Average Time | 21 | 18 | 54 | 20 | 19 | -| ├─ Total Time (7 runs) | 147 | 126 | 378 | 140 | 133 | -| **Recognizer (CRNN_128)** | | | | | | -| ├─ Average Time | 11 | 9 | 27 | 10 | 10 | -| ├─ Total Time (7 runs) | 77 | 63 | 189 | 70 | 70 | +Notice that the recognizer models were executed between 3 and 7 times during a single recognition. +The values below represent the averages across all runs for the benchmark image. + +| Model | iPhone 17 Pro [ms] | iPhone 16 Pro [ms] | iPhone SE 3 | Samsung Galaxy S24 [ms] | OnePlus 12 [ms] | +| ------------------------------- | ------------------ | ------------------ | ----------- | ----------------------- | --------------- | +| **Total Inference Time** | 652 | 600 | 2855 | 1092 | 1034 | +| Detector (CRAFT) `forward_800` | 220 | 221 | 1740 | 521 | 492 | +| Recognizer (CRNN) `forward_512` | 45 | 38 | 110 | 40 | 38 | +| Recognizer (CRNN) `forward_256` | 21 | 18 | 54 | 20 | 19 | +| Recognizer (CRNN) `forward_128` | 11 | 9 | 27 | 10 | 10 | diff --git a/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md b/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md index f317d527e..f4840be37 100644 --- a/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md +++ b/docs/docs/02-hooks/02-computer-vision/useVerticalOCR.md @@ -129,12 +129,10 @@ interface OCRDetection { ### Arguments -**`model`** - Object containing the detector sources, recognizer sources, and language. +**`model`** - Object containing the detector source, recognizer source, and language. -- **`detectorLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 1280 pixels. -- **`detectorNarrow`** - A string that specifies the location of the detector binary file which accepts input images with a width of 320 pixels. -- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels. -- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 64 pixels. +- **`detectorSource`** - A string that specifies the location of the detector binary. +- **`recognizerSource`** - A string that specifies the location of the recognizer binary. - **`language`** - A parameter that specifies the language of the text to be recognized by the OCR. **`independentCharacters`** – A boolean parameter that indicates whether the text in the image consists of a random sequence of characters. If set to true, the algorithm will scan each character individually instead of reading them as continuous text. @@ -202,21 +200,18 @@ function App() { } ``` -## Language-Specific Recognizers +## Alphabet-Specific Recognizers -Each supported language requires its own set of recognizer models. -The built-in constants such as `RECOGNIZER_EN_CRNN_512`, `RECOGNIZER_PL_CRNN_64`, etc., point to specific models trained for a particular language. +Each supported alphabet requires its own recognizer model. The built-in constants, such as `RECOGNIZER_LATIN_CRNN` or `RECOGNIZER_CYRILLIC_CRNN`, point to specific models trained for a particular alphabet. > For example: > -> - To recognize **English** text, use: -> - `RECOGNIZER_EN_CRNN_512` -> - `RECOGNIZER_EN_CRNN_64` -> - To recognize **Polish** text, use: -> - `RECOGNIZER_PL_CRNN_512` -> - `RECOGNIZER_PL_CRNN_64` +> - To recognize text in languages using the **Latin** alphabet (like Polish, or German), use: +> - `RECOGNIZER_LATIN_CRNN` +> - To recognize text in languages using the **Cyrillic** alphabet (like Russian or Ukrainian), use: +> - `RECOGNIZER_CYRILLIC_CRNN` -You need to make sure the recognizer models you pass in `recognizerSources` match the `language` you specify. +You need to make sure the recognizer model you pass in `recognizerSource` matches the alphabet of the `language` you specify. ## Supported languages @@ -289,14 +284,10 @@ You need to make sure the recognizer models you pass in `recognizerSources` matc ## Supported models -| Model | Type | -| -------------------------------------------------------- | ---------- | -| [CRAFT_1280\*](https://github.com/clovaai/CRAFT-pytorch) | Detector | -| [CRAFT_320\*](https://github.com/clovaai/CRAFT-pytorch) | Detector | -| [CRNN_512\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | -| [CRNN_64\*](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | - -\* - The number following the underscore (\_) indicates the input image width used during model export. +| Model | Type | +| ------------------------------------------------- | :--------: | +| [CRAFT](https://github.com/clovaai/CRAFT-pytorch) | Detector | +| [CRNN](https://www.jaided.ai/easyocr/modelhub/) | Recognizer | ## Benchmarks @@ -313,10 +304,9 @@ You need to make sure the recognizer models you pass in `recognizerSources` matc ### Memory usage -| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | -| -------------------------------------------------------------------- | :--------------------: | :----------------: | -| Detector (CRAFT_1280) + Detector (CRAFT_320) + Recognizer (CRNN_512) | 1540 | 1470 | -| Detector(CRAFT_1280) + Detector(CRAFT_320) + Recognizer (CRNN_64) | 1070 | 1000 | +| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | +| ------------------------------------ | :--------------------: | :----------------: | +| Detector (CRAFT) + Recognizer (CRNN) | 1000-1600 | 1000-1500 | ### Inference time @@ -332,16 +322,13 @@ Times presented in the tables are measured as consecutive runs of the model. Ini **Time measurements:** -| Metric | iPhone 17 Pro
[ms] | iPhone 16 Pro
[ms] | iPhone SE 3 | Samsung Galaxy S24
[ms] | OnePlus 12
[ms] | -| -------------------------------------------------------------------------- | ------------------------- | ------------------------- | ----------- | ------------------------------ | ---------------------- | -| **Total Inference Time** | 1104 | 1113 | 8840 | 2845 | 2640 | -| **Detector (CRAFT_1280_QUANTIZED)** | 501 | 507 | 4317 | 1405 | 1275 | -| **Detector (CRAFT_320_QUANTIZED)** | | | | | | -| ├─ Average Time | 125 | 121 | 1060 | 338 | 299 | -| ├─ Total Time (4 runs) | 500 | 484 | 4240 | 1352 | 1196 | -| **Recognizer (CRNN_64)**
(_With Flag `independentChars == true`_) | | | | | | -| ├─ Average Time | 5 | 6 | 14 | 7 | 6 | -| ├─ Total Time (21 runs) | 105 | 126 | 294 | 147 | 126 | -| **Recognizer (CRNN_512)**
(_With Flag `independentChars == false`_) | | | | | | -| ├─ Average Time | 46 | 42 | 109 | 47 | 37 | -| ├─ Total Time (4 runs) | 184 | 168 | 436 | 188 | 148 | +Notice that the recognizer models, as well as detector's `forward_320` method, were executed between 4 and 21 times during a single recognition. +The values below represent the averages across all runs for the benchmark image. + +| Model | iPhone 17 Pro [ms] | iPhone 16 Pro [ms] | iPhone SE 3 | Samsung Galaxy S24 [ms] | OnePlus 12 [ms] | +| ------------------------------- | ------------------ | ------------------ | ----------- | ----------------------- | --------------- | +| **Total Inference Time** | 1104 | 1113 | 8840 | 2845 | 2640 | +| Detector (CRAFT) `forward_1280` | 501 | 507 | 4317 | 1405 | 1275 | +| Detector (CRAFT) `forward_320` | 125 | 121 | 1060 | 338 | 299 | +| Recognizer (CRNN) `forward_512` | 46 | 42 | 109 | 47 | 37 | +| Recognizer (CRNN) `forward_64` | 5 | 6 | 14 | 7 | 6 | diff --git a/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md b/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md index c46e65970..f8a42fdf2 100644 --- a/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md +++ b/docs/docs/03-typescript-api/02-computer-vision/OCRModule.md @@ -22,11 +22,11 @@ const detections = await ocrModule.forward(imageUri); ### Methods -| Method | Type | Description | -| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `load` | `(model: { detectorSource: ResourceSource; recognizerLarge: ResourceSource; recognizerMedium: ResourceSource; recognizerSmall: ResourceSource; language: OCRLanguage }, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorSource` is a string that specifies the location of the detector binary, `recognizerLarge` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels, `recognizerMedium` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 256 pixels, `recognizerSmall` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 128 pixels, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. | -| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. | -| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. | +| Method | Type | Description | +| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `load` | `(model: { detectorSource: ResourceSource; recognizerSource: ResourceSource; language: OCRLanguage }, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorSource` is a string that specifies the location of the detector binary, `recognizerSource` is a string that specifies the location of the recognizer binary, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. | +| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. | +| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. |
Type definitions @@ -116,12 +116,10 @@ interface OCRDetection { To load the model, use the `load` method. It accepts an object: -**`model`** - Object containing the detector source, recognizer sources, and language. +**`model`** - Object containing the detector source, recognizer source, and language. - **`detectorSource`** - A string that specifies the location of the detector binary. -- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels. -- **`recognizerMedium`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 256 pixels. -- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 128 pixels. +- **`recognizerSource`** - A string that specifies the location of the recognizer binary. - **`language`** - A parameter that specifies the language of the text to be recognized by the OCR. **`onDownloadProgressCallback`** - (Optional) Function called on download progress. diff --git a/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md b/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md index ecbe5d9d9..c60a95178 100644 --- a/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md +++ b/docs/docs/03-typescript-api/02-computer-vision/VerticalOCRModule.md @@ -26,11 +26,11 @@ const detections = await verticalOCRModule.forward(imageUri); ### Methods -| Method | Type | Description | -| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `load` | `(model: { detectorLarge: ResourceSource; detectorNarrow: ResourceSource; recognizerLarge: ResourceSource; recognizerSmall: ResourceSource; language: OCRLanguage }, independentCharacters: boolean, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorLarge` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 1280 pixels, `detectorNarrow` is a string that specifies the location of the detector binary file which accepts input images with a width of 320 pixels, `recognizerLarge` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels, `recognizerSmall` is a string that specifies the location of the recognizer binary file which accepts input images with a width of 64 pixels, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. | -| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. | -| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. | +| Method | Type | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `load` | `(model: { detectorSource: ResourceSource; recognizerSource: ResourceSource; language: OCRLanguage }, independentCharacters: boolean, onDownloadProgressCallback?: (progress: number) => void): Promise` | Loads the model, where `detectorSource` is a string that specifies the location of the detector binary, `recognizerSource` is a string that specifies the location of the recognizer binary, and `language` is a parameter that specifies the language of the text to be recognized by the OCR. | +| `forward` | `(imageSource: string): Promise` | Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. | +| `delete` | `(): void` | Release the memory held by the module. Calling `forward` afterwards is invalid. Note that you cannot delete model while it's generating. |
Type definitions @@ -130,12 +130,10 @@ interface OCRDetection { To load the model, use the `load` method. It accepts: -**`model`** - Object containing the detector sources, recognizer sources, and language. +**`model`** - Object containing the detector source, recognizer source, and language. -- **`detectorLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 1280 pixels. -- **`detectorNarrow`** - A string that specifies the location of the detector binary file which accepts input images with a width of 320 pixels. -- **`recognizerLarge`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 512 pixels. -- **`recognizerSmall`** - A string that specifies the location of the recognizer binary file which accepts input images with a width of 64 pixels. +- **`detectorSource`** - A string that specifies the location of the detector binary. +- **`recognizerSource`** - A string that specifies the location of the recognizer binary. - **`language`** - A parameter that specifies the language of the text to be recognized by the OCR. **`independentCharacters`** – A boolean parameter that indicates whether the text in the image consists of a random sequence of characters. If set to true, the algorithm will scan each character individually instead of reading them as continuous text. diff --git a/docs/docs/04-benchmarks/inference-time.md b/docs/docs/04-benchmarks/inference-time.md index dbfc2b21d..7777b301b 100644 --- a/docs/docs/04-benchmarks/inference-time.md +++ b/docs/docs/04-benchmarks/inference-time.md @@ -32,24 +32,26 @@ Times presented in the tables are measured as consecutive runs of the model. Ini Notice that the recognizer models were executed between 3 and 7 times during a single recognition. The values below represent the averages across all runs for the benchmark image. -| Model | iPhone 17 Pro (XNNPACK) [ms] | iPhone 16 Pro (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] | -| ------------------------------ | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: | -| Detector (CRAFT_800_QUANTIZED) | 220 | 221 | 1740 | 521 | 492 | -| Recognizer (CRNN_512) | 45 | 38 | 110 | 40 | 38 | -| Recognizer (CRNN_256) | 21 | 18 | 54 | 20 | 19 | -| Recognizer (CRNN_128) | 11 | 9 | 27 | 10 | 10 | +| Model | iPhone 17 Pro [ms] | iPhone 16 Pro [ms] | iPhone SE 3 | Samsung Galaxy S24 [ms] | OnePlus 12 [ms] | +| ------------------------------- | ------------------ | ------------------ | ----------- | ----------------------- | --------------- | +| **Total Inference Time** | 652 | 600 | 2855 | 1092 | 1034 | +| Detector (CRAFT) `forward_800` | 220 | 221 | 1740 | 521 | 492 | +| Recognizer (CRNN) `forward_512` | 45 | 38 | 110 | 40 | 38 | +| Recognizer (CRNN) `forward_256` | 21 | 18 | 54 | 20 | 19 | +| Recognizer (CRNN) `forward_128` | 11 | 9 | 27 | 10 | 10 | ## Vertical OCR -Notice that the recognizer models, as well as detector CRAFT_320 model, were executed between 4 and 21 times during a single recognition. +Notice that the recognizer models, as well as detector's `forward_320` method, were executed between 4 and 21 times during a single recognition. The values below represent the averages across all runs for the benchmark image. -| Model | iPhone 17 Pro (XNNPACK) [ms] | iPhone 16 Pro (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] | -| ------------------------------- | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: | -| Detector (CRAFT_1280_QUANTIZED) | 501 | 507 | 4317 | 1405 | 1275 | -| Detector (CRAFT_320_QUANTIZED) | 125 | 121 | 1060 | 338 | 299 | -| Recognizer (CRNN_512) | 46 | 42 | 109 | 47 | 37 | -| Recognizer (CRNN_64) | 5 | 6 | 14 | 7 | 6 | +| Model | iPhone 17 Pro
[ms] | iPhone 16 Pro
[ms] | iPhone SE 3 | Samsung Galaxy S24
[ms] | OnePlus 12
[ms] | +| ------------------------------- | ------------------------- | ------------------------- | ----------- | ------------------------------ | ---------------------- | +| **Total Inference Time** | 1104 | 1113 | 8840 | 2845 | 2640 | +| Detector (CRAFT) `forward_1280` | 501 | 507 | 4317 | 1405 | 1275 | +| Detector (CRAFT) `forward_320` | 125 | 121 | 1060 | 338 | 299 | +| Recognizer (CRNN) `forward_512` | 46 | 42 | 109 | 47 | 37 | +| Recognizer (CRNN) `forward_64` | 5 | 6 | 14 | 7 | 6 | ## LLMs diff --git a/docs/docs/04-benchmarks/memory-usage.md b/docs/docs/04-benchmarks/memory-usage.md index a0c5a7b6d..3058b5725 100644 --- a/docs/docs/04-benchmarks/memory-usage.md +++ b/docs/docs/04-benchmarks/memory-usage.md @@ -29,16 +29,15 @@ All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12 (A ## OCR -| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | -| ------------------------------------------------------------------------------------------------------ | :--------------------: | :----------------: | -| Detector (CRAFT_800_QUANTIZED) + Recognizer (CRNN_512) + Recognizer (CRNN_256) + Recognizer (CRNN_128) | 1400 | 1320 | +| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | +| ------------------------------------ | :--------------------: | :----------------: | +| Detector (CRAFT) + Recognizer (CRNN) | 1400 | 1320 | ## Vertical OCR -| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | -| ---------------------------------------------------------------------------------------- | :--------------------: | :----------------: | -| Detector (CRAFT_1280_QUANTIZED) + Detector (CRAFT_320_QUANTIZED) + Recognizer (CRNN_512) | 1540 | 1470 | -| Detector(CRAFT_1280_QUANTIZED) + Detector(CRAFT_320_QUANTIZED) + Recognizer (CRNN_64) | 1070 | 1000 | +| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] | +| ------------------------------------ | :--------------------: | :----------------: | +| Detector (CRAFT) + Recognizer (CRNN) | 1000-1600 | 1000-1500 | ## LLMs diff --git a/docs/docs/04-benchmarks/model-size.md b/docs/docs/04-benchmarks/model-size.md index 00e819494..54f33e2cb 100644 --- a/docs/docs/04-benchmarks/model-size.md +++ b/docs/docs/04-benchmarks/model-size.md @@ -25,23 +25,19 @@ title: Model Size ## OCR -| Model | XNNPACK [MB] | -| ------------------------------ | :----------: | -| Detector (CRAFT_800_QUANTIZED) | 19.8 | -| Recognizer (CRNN_512) | 15 - 18\* | -| Recognizer (CRNN_256) | 16 - 18\* | -| Recognizer (CRNN_128) | 17 - 19\* | +| Model | XNNPACK [MB] | +| -------------------------- | :-----------: | +| Detector (CRAFT_QUANTIZED) | 20.9 | +| Recognizer (CRNN) | 18.5 - 25.2\* | \* - The model weights vary depending on the language. ## Vertical OCR -| Model | XNNPACK [MB] | -| ------------------------------- | :----------: | -| Detector (CRAFT_1280_QUANTIZED) | 19.8 | -| Detector (CRAFT_320_QUANTIZED) | 19.8 | -| Recognizer (CRNN_EN_512) | 15 - 18\* | -| Recognizer (CRNN_EN_64) | 15 - 16\* | +| Model | XNNPACK [MB] | +| -------------------------- | :-----------: | +| Detector (CRAFT_QUANTIZED) | 20.9 | +| Recognizer (CRNN) | 18.5 - 25.2\* | \* - The model weights vary depending on the language. diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h index 0cc063379..9b96f1761 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Constants.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include namespace rnexecutorch::models::ocr::constants { @@ -27,6 +29,14 @@ inline constexpr int32_t kMaxWidth = inline constexpr int32_t kSingleCharacterMinSize = 70; inline constexpr int32_t kRecognizerImageSize = 1280; inline constexpr int32_t kVerticalLineThreshold = 20; +inline constexpr int32_t kSmallDetectorWidth = 320; +inline constexpr int32_t kMediumDetectorWidth = 800; +inline constexpr int32_t kLargeDetectorWidth = 1280; +inline constexpr std::array kDetectorInputWidths = { + kSmallDetectorWidth, kMediumDetectorWidth, kLargeDetectorWidth}; +inline constexpr std::array kRecognizerInputWidths = { + kSmallVerticalRecognizerWidth, kSmallRecognizerWidth, + kMediumRecognizerWidth, kLargeRecognizerWidth}; /* Mean and variance values for image normalization were used in EASYOCR pipeline diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp index 2b8c46945..cb8baee5f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.cpp @@ -1,55 +1,79 @@ #include "Detector.h" +#include "Constants.h" +#include #include #include #include - +#include +#include namespace rnexecutorch::models::ocr { Detector::Detector(const std::string &modelSource, std::shared_ptr callInvoker) : BaseModel(modelSource, callInvoker) { - auto inputShapes = getAllInputShapes(); - if (inputShapes.empty()) { - throw std::runtime_error( - "Detector model seems to not take any input tensors."); - } - std::vector modelInputShape = inputShapes[0]; - if (modelInputShape.size() < 2) { - throw std::runtime_error("Unexpected detector model input size, expected " - "at least 2 dimensions but got: " + - std::to_string(modelInputShape.size()) + "."); + + for (auto input_size : constants::kDetectorInputWidths) { + std::string methodName = "forward_" + std::to_string(input_size); + auto inputShapes = getAllInputShapes(methodName); + if (inputShapes[0].size() < 2) { + throw std::runtime_error( + "Unexpected detector model input size for method:" + methodName + + ", expected " + "at least 2 dimensions but got: " + + std::to_string(inputShapes[0].size()) + "."); + } } - modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1], - modelInputShape[modelInputShape.size() - 2]); } -cv::Size Detector::getModelImageSize() const noexcept { return modelImageSize; } - -std::vector Detector::generate(const cv::Mat &inputImage) { +std::vector Detector::generate(const cv::Mat &inputImage, + int32_t inputWidth) { /* Detector as an input accepts tensor with a shape of [1, 3, H, H]. - where H is a constant for model. In our supported models it is currently + where H is a constant for model. In our supported model it is currently either H=800 or H=1280. Due to big influence of resize to quality of recognition the image preserves original aspect ratio and the missing parts are filled with padding. */ - auto inputShapes = getAllInputShapes(); + + utils::validateInputWidth(inputWidth, constants::kDetectorInputWidths, + "Detector"); + + std::string methodName = "forward_" + std::to_string(inputWidth); + auto inputShapes = getAllInputShapes(methodName); + + cv::Size modelInputSize = calculateModelImageSize(inputWidth); + cv::Mat resizedInputImage = - image_processing::resizePadded(inputImage, getModelImageSize()); + image_processing::resizePadded(inputImage, modelInputSize); TensorPtr inputTensor = image_processing::getTensorFromMatrix( inputShapes[0], resizedInputImage, constants::kNormalizationMean, constants::kNormalizationVariance); - auto forwardResult = BaseModel::forward(inputTensor); + auto forwardResult = BaseModel::execute(methodName, {inputTensor}); + if (!forwardResult.ok()) { throw std::runtime_error( - "Failed to forward, error: " + + "Failed to " + methodName + " error: " + std::to_string(static_cast(forwardResult.error()))); } - return postprocess(forwardResult->at(0).toTensor()); + return postprocess(forwardResult->at(0).toTensor(), modelInputSize); +} + +cv::Size Detector::calculateModelImageSize(int32_t methodInputWidth) { + + utils::validateInputWidth(methodInputWidth, constants::kDetectorInputWidths, + "Detector"); + std::string methodName = "forward_" + std::to_string(methodInputWidth); + + auto inputShapes = getAllInputShapes(methodName); + std::vector modelInputShape = inputShapes[0]; + cv::Size modelInputSize = + cv::Size(modelInputShape[modelInputShape.size() - 1], + modelInputShape[modelInputShape.size() - 2]); + return modelInputSize; } std::vector -Detector::postprocess(const Tensor &tensor) const { +Detector::postprocess(const Tensor &tensor, const cv::Size &modelInputSize) { /* The output of the model consists of two matrices (heat maps): 1. ScoreText(Score map) - The probability of a region containing character. @@ -65,7 +89,7 @@ Detector::postprocess(const Tensor &tensor) const { */ auto [scoreTextMat, scoreAffinityMat] = utils::interleavedArrayToMats( tensorData, - cv::Size(modelImageSize.width / 2, modelImageSize.height / 2)); + cv::Size(modelInputSize.width / 2, modelInputSize.height / 2)); /* Heatmaps are then converted into list of bounding boxes. diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h index f9fb2a39b..2a3c98ddb 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Detector.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -16,15 +17,20 @@ namespace rnexecutorch::models::ocr { using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class Detector final : public BaseModel { +class Detector : public BaseModel { public: explicit Detector(const std::string &modelSource, std::shared_ptr callInvoker); - std::vector generate(const cv::Mat &inputImage); - cv::Size getModelImageSize() const noexcept; + virtual std::vector generate(const cv::Mat &inputImage, + int32_t inputWidth); -private: - std::vector postprocess(const Tensor &tensor) const; - cv::Size modelImageSize; + cv::Size calculateModelImageSize(int32_t methodInputWidth); + +protected: + TensorPtr runInference(const cv::Mat &inputImage, int32_t inputWidth, + const std::string &detectorName); + + std::vector postprocess(const Tensor &tensor, + const cv::Size &modelInputSize); }; } // namespace rnexecutorch::models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp index 4045886d3..166e4de8b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp @@ -1,16 +1,14 @@ #include "OCR.h" +#include "Constants.h" #include #include namespace rnexecutorch::models::ocr { -OCR::OCR(const std::string &detectorSource, - const std::string &recognizerSourceLarge, - const std::string &recognizerSourceMedium, - const std::string &recognizerSourceSmall, std::string symbols, +OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource, + const std::string &symbols, std::shared_ptr callInvoker) : detector(detectorSource, callInvoker), - recognitionHandler(recognizerSourceLarge, recognizerSourceMedium, - recognizerSourceSmall, symbols, callInvoker) {} + recognitionHandler(recognizerSource, symbols, callInvoker) {} std::vector OCR::generate(std::string input) { cv::Mat image = image_processing::readImage(input); @@ -23,7 +21,8 @@ std::vector OCR::generate(std::string input) { with text. They are corresponding to the image of size 1280x1280, which is a size later used by Recognition Handler. */ - std::vector bboxesList = detector.generate(image); + std::vector bboxesList = + detector.generate(image, constants::kMediumDetectorWidth); cv::cvtColor(image, image, cv::COLOR_BGR2GRAY); /* diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h index c7c7e61c4..8b283a2c0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h @@ -17,16 +17,15 @@ namespace models::ocr { 2. Recognition - recognizing the text in the bounding boxes, the result is a list of strings and corresponding boxes & confidence scores. - Recognition uses three models, each model is resposible for recognizing text - of different sizes (e.g. large - 512x64, medium - 256x64, small - 128x64). + Recognition uses one model with three methods, each method is resposible for + recognizing text of different sizes (e.g. large - 512x64, medium - 256x64, + small - 128x64). */ class OCR final { public: explicit OCR(const std::string &detectorSource, - const std::string &recognizerSourceLarge, - const std::string &recognizerSourceMedium, - const std::string &recognizerSourceSmall, std::string symbols, + const std::string &recognizerSource, const std::string &symbols, std::shared_ptr callInvoker); std::vector generate(std::string input); std::size_t getMemoryLowerBound() const noexcept; @@ -39,6 +38,5 @@ class OCR final { } // namespace models::ocr REGISTER_CONSTRUCTOR(models::ocr::OCR, std::string, std::string, std::string, - std::string, std::string, std::shared_ptr); } // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp index f0427accf..dfde73765 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp @@ -5,29 +5,18 @@ namespace rnexecutorch::models::ocr { RecognitionHandler::RecognitionHandler( - const std::string &recognizerSourceLarge, - const std::string &recognizerSourceMedium, - const std::string &recognizerSourceSmall, std::string symbols, + const std::string &recognizerSource, const std::string &symbols, std::shared_ptr callInvoker) - : converter(symbols), recognizerLarge(recognizerSourceLarge, callInvoker), - recognizerMedium(recognizerSourceMedium, callInvoker), - recognizerSmall(recognizerSourceSmall, callInvoker) { - memorySizeLowerBound = recognizerSmall.getMemoryLowerBound() + - recognizerMedium.getMemoryLowerBound() + - recognizerLarge.getMemoryLowerBound(); + : converter(symbols), recognizer(recognizerSource, callInvoker) { + memorySizeLowerBound = recognizer.getMemoryLowerBound(); } std::pair, float> RecognitionHandler::runModel(cv::Mat image) { // Note that the height of an image is always equal to 64. - if (image.cols >= constants::kLargeRecognizerWidth) { - return recognizerLarge.generate(image); - } - if (image.cols >= constants::kMediumRecognizerWidth) { - return recognizerMedium.generate(image); - } - return recognizerSmall.generate(image); + int32_t desiredWidth = utils::getDesiredWidth(image, false); + return recognizer.generate(image, desiredWidth); } void RecognitionHandler::processBBox(std::vector &boxList, @@ -100,9 +89,5 @@ std::size_t RecognitionHandler::getMemoryLowerBound() const noexcept { return memorySizeLowerBound; } -void RecognitionHandler::unload() noexcept { - recognizerSmall.unload(); - recognizerMedium.unload(); - recognizerLarge.unload(); -} +void RecognitionHandler::unload() noexcept { recognizer.unload(); } } // namespace rnexecutorch::models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h index d585b74bc..abdfe5ba9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.h @@ -17,10 +17,8 @@ namespace rnexecutorch::models::ocr { class RecognitionHandler final { public: - explicit RecognitionHandler(const std::string &recognizerSourceLarge, - const std::string &recognizerSourceMedium, - const std::string &recognizerSourceSmall, - std::string symbols, + explicit RecognitionHandler(const std::string &recognizer, + const std::string &symbols, std::shared_ptr callInvoker); std::vector recognize(std::vector bboxesList, cv::Mat &imgGray, @@ -35,8 +33,6 @@ class RecognitionHandler final { types::PaddingInfo ratioAndPadding); std::size_t memorySizeLowerBound{0}; CTCLabelConverter converter; - Recognizer recognizerLarge; - Recognizer recognizerMedium; - Recognizer recognizerSmall; + Recognizer recognizer; }; } // namespace rnexecutorch::models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp index e076dabed..237c5154e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.cpp @@ -1,29 +1,21 @@ #include "Recognizer.h" +#include "Constants.h" #include #include #include #include #include +#include #include +#include namespace rnexecutorch::models::ocr { Recognizer::Recognizer(const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { - auto inputShapes = getAllInputShapes(); - if (inputShapes.empty()) { - throw std::runtime_error("Recognizer model has no input tensors."); - } - std::vector modelInputShape = inputShapes[0]; - if (modelInputShape.size() < 2) { - throw std::runtime_error("Unexpected Recognizer model input shape."); - } - modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1], - modelInputShape[modelInputShape.size() - 2]); -} + : BaseModel(modelSource, callInvoker) {} std::pair, float> -Recognizer::generate(const cv::Mat &grayImage) { +Recognizer::generate(const cv::Mat &grayImage, int32_t inputWidth) { /* In our pipeline we use three types of Recognizer, each designated to handle different image sizes: @@ -33,10 +25,19 @@ Recognizer::generate(const cv::Mat &grayImage) { The `generate` function as an argument accepts an image in grayscale already resized to the expected size. */ - std::vector tensorDims = getAllInputShapes()[0]; + utils::validateInputWidth(inputWidth, constants::kRecognizerInputWidths, + "Recognizer"); + + std::string method_name = "forward_" + std::to_string(inputWidth); + auto shapes = getAllInputShapes(method_name); + if (shapes.empty()) { + throw std::runtime_error("Recognizer model: Input shapes for " + + method_name " not found"); + } + std::vector tensorDims = shapes[0]; TensorPtr inputTensor = image_processing::getTensorFromMatrixGray(tensorDims, grayImage); - auto forwardResult = BaseModel::forward(inputTensor); + auto forwardResult = BaseModel::execute(method_name, {inputTensor}); if (!forwardResult.ok()) { throw std::runtime_error( "Failed to forward in Recognizer, error: " + diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h index 50eafe968..337e21483 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Recognizer.h @@ -25,12 +25,11 @@ class Recognizer final : public BaseModel { public: explicit Recognizer(const std::string &modelSource, std::shared_ptr callInvoker); - std::pair, float> generate(const cv::Mat &grayImage); + std::pair, float> generate(const cv::Mat &grayImage, + int32_t inputWidth); private: std::pair, float> postprocess(const Tensor &tensor) const; - - cv::Size modelImageSize; }; } // namespace rnexecutorch::models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp index 25fa160c2..7614e97a1 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.cpp @@ -707,4 +707,21 @@ groupTextBoxes(std::vector &boxes, float centerThreshold, return orderedSortedBoxes; } +void validateInputWidth(int32_t inputWidth, std::span constants, + std::string modelName) { + auto it = std::ranges::find(constants, inputWidth); + + if (it == constants.end()) { + std::string allowed; + for (size_t i = 0; i < constants.size(); ++i) { + allowed += + std::to_string(constants[i]) + (i < constants.size() - 1 ? ", " : ""); + } + + throw std::runtime_error("Unexpected input width for " + modelName + + "! Expected [" + allowed + "] but got " + + std::to_string(inputWidth) + "."); + } +} + } // namespace rnexecutorch::models::ocr::utils diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h index ca0c2676d..0b742a4ce 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/DetectorUtils.h @@ -78,4 +78,19 @@ groupTextBoxes(std::vector &boxes, float centerThreshold, float distanceThreshold, float heightThreshold, int32_t minSideThreshold, int32_t maxSideThreshold, int32_t maxWidth); + +/** + * Validates if the provided image width is supported by the model. + * * This method checks the input width against the passed allowed + * widths in constants vector. If the width is not found, it + * constructs a descriptive error message listing all valid options. + * + * @param inputWidth The width of the input image to be validated. + * @param constants Vector of available input sizes. + * @param modelName String with modelNames used for generating error message + * @throws std::runtime_error If inputWidth is not present in the allowed + * detector input widths array. + */ +void validateInputWidth(int32_t inputWidth, std::span constants, + std::string modelName); } // namespace rnexecutorch::models::ocr::utils diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h index 7e2cfe5bf..e2dea2f7f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.h @@ -43,6 +43,24 @@ void computeRatioAndResize(cv::Mat &img, cv::Size size, int32_t modelHeight); */ cv::Mat cropImage(types::DetectorBBox box, cv::Mat &image, int32_t modelHeight); void adjustContrastGrey(cv::Mat &img, double target); + +/** + * @brief Determines the optimal width for an image based on its aspect ratio + * and orientation, to fit the requirements of the recognition model. + * + * This function calculates a `desiredWidth` that, when combined with a fixed + * `modelHeight` (from `normalizeForRecognizer`), maintains the image's aspect + * ratio and prepares it for input into the recognizer model. It considers + * whether the text in the image is `isVertical`, which might influence the + * chosen width for better recognition performance. + * + * @param img The input image matrix. + * @param isVertical A boolean indicating if the text in the image is oriented + * vertically. + * @return The calculated desired width for the image. + */ +int32_t getDesiredWidth(const cv::Mat &img, bool isVertical); + /** * @brief Prepares an image for recognition models by standardizing size, * contrast, and pixel values. diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp index 3d89f10a1..a0faf43ee 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp @@ -5,51 +5,49 @@ #include #include +#include namespace rnexecutorch::models::ocr { VerticalDetector::VerticalDetector( - const std::string &modelSource, bool detectSingleCharacters, + const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { - this->detectSingleCharacters = detectSingleCharacters; - auto inputShapes = getAllInputShapes(); - if (inputShapes.empty()) { - throw std::runtime_error( - "Detector model seems to not take any input tensors."); - } - std::vector modelInputShape = inputShapes[0]; - if (modelInputShape.size() < 2) { - throw std::runtime_error("Unexpected detector model input size, expected " - "at least 2 dimensions but got: " + - std::to_string(modelInputShape.size()) + "."); - } - modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1], - modelInputShape[modelInputShape.size() - 2]); -} - -cv::Size VerticalDetector::getModelImageSize() const noexcept { - return modelImageSize; -} + : Detector(modelSource, callInvoker) {} std::vector -VerticalDetector::generate(const cv::Mat &inputImage) { - auto inputShapes = getAllInputShapes(); +VerticalDetector::generate(const cv::Mat &inputImage, int32_t inputWidth) { + + bool detectSingleCharacters = + !(inputWidth >= constants::kMediumDetectorWidth); + + utils::validateInputWidth(inputWidth, constants::kDetectorInputWidths, + "VerticalDetector"); + + std::string methodName = "forward_" + std::to_string(inputWidth); + auto inputShapes = getAllInputShapes(methodName); + + cv::Size modelInputSize = calculateModelImageSize(inputWidth); + cv::Mat resizedInputImage = - image_processing::resizePadded(inputImage, getModelImageSize()); + image_processing::resizePadded(inputImage, modelInputSize); TensorPtr inputTensor = image_processing::getTensorFromMatrix( inputShapes[0], resizedInputImage, constants::kNormalizationMean, constants::kNormalizationVariance); - auto forwardResult = BaseModel::forward(inputTensor); + auto forwardResult = BaseModel::execute(methodName, {inputTensor}); + if (!forwardResult.ok()) { throw std::runtime_error( - "Failed to forward, error: " + + "Failed to " + methodName + " error: " + std::to_string(static_cast(forwardResult.error()))); } - return postprocess(forwardResult->at(0).toTensor()); + return postprocess(forwardResult->at(0).toTensor(), + calculateModelImageSize(inputWidth), + detectSingleCharacters); } std::vector -VerticalDetector::postprocess(const Tensor &tensor) const { +VerticalDetector::postprocess(const Tensor &tensor, + const cv::Size &modelInputSize, + bool detectSingleCharacters) const { /* The output of the model consists of two matrices (heat maps): 1. ScoreText(Score map) - The probability of a region containing character. @@ -67,20 +65,20 @@ VerticalDetector::postprocess(const Tensor &tensor) const { */ auto [scoreTextMat, scoreAffinityMat] = utils::interleavedArrayToMats( tensorData, - cv::Size(modelImageSize.width / 2, modelImageSize.height / 2)); - float txtThreshold = this->detectSingleCharacters + cv::Size(modelInputSize.width / 2, modelInputSize.height / 2)); + float txtThreshold = detectSingleCharacters ? constants::kTextThreshold : constants::kTextThresholdVertical; std::vector bBoxesList = utils::getDetBoxesFromTextMapVertical( scoreTextMat, scoreAffinityMat, txtThreshold, - constants::kLinkThreshold, this->detectSingleCharacters); + constants::kLinkThreshold, detectSingleCharacters); const float restoreRatio = utils::calculateRestoreRatio( scoreTextMat.rows, constants::kRecognizerImageSize); utils::restoreBboxRatio(bBoxesList, restoreRatio); // if this is Narrow Detector, do not group boxes. - if (!this->detectSingleCharacters) { + if (!detectSingleCharacters) { bBoxesList = utils::groupTextBoxes( bBoxesList, constants::kCenterThreshold, constants::kDistanceThreshold, constants::kHeightThreshold, constants::kMinSideThreshold, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h index 802ab7329..29e9c86bf 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h @@ -1,9 +1,10 @@ #pragma once +#include #include #include - #include +#include #include namespace rnexecutorch::models::ocr { @@ -15,16 +16,17 @@ namespace rnexecutorch::models::ocr { In Vertical OCR pipeline we make use of Detector two times: - 1. Large Detector -- The differences between Detector used in standard OCR and - Large Detector used in Vertical OCR is: a) To obtain detected boxes from heeat - maps it utilizes `getDetBoxesFromTextMapVertical()` function rather than + 1. Large Detector through forward_1280 method -- The differences between + Detector used in standard OCR and Large Detector used in Vertical OCR is: a) To + obtain detected boxes from heeat maps it utilizes + `getDetBoxesFromTextMapVertical()` function rather than 'getDetBoxesFromTextMap()`. Other than that, refer to the standard OCR Detector. - 2. Narrow Detector -- it is designed to detect a single characters bounding - boxes. `getDetBoxesFromTextMapVertical()` function acts differently for Narrow - Detector and different textThreshold Value is passed. Additionally, the - grouping of detected boxes is completely omited. + 2. Narrow Detector through forward_320 method -- it is designed to detect a + single characters bounding boxes. `getDetBoxesFromTextMapVertical()` function + acts differently for Narrow Detector and different textThreshold Value is + passed. Additionally, the grouping of detected boxes is completely omited. Vertical Detector pipeline differentiate the Large Detector and Narrow Detector based on `detectSingleCharacters` flag passed to the constructor. @@ -33,17 +35,17 @@ namespace rnexecutorch::models::ocr { using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class VerticalDetector final : public BaseModel { +class VerticalDetector final : public Detector { public: explicit VerticalDetector(const std::string &modelSource, - bool detectSingleCharacters, std::shared_ptr callInvoker); - std::vector generate(const cv::Mat &inputImage); - cv::Size getModelImageSize() const noexcept; + + std::vector generate(const cv::Mat &inputImage, + int32_t inputWidth) override; private: - bool detectSingleCharacters; - std::vector postprocess(const Tensor &tensor) const; - cv::Size modelImageSize; + std::vector + postprocess(const Tensor &tensor, const cv::Size &modelInputSize, + bool detectSingleCharacters) const; }; } // namespace rnexecutorch::models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp index 5bb374dc1..40c0ce26a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp @@ -2,19 +2,18 @@ #include #include #include +#include #include #include namespace rnexecutorch::models::ocr { -VerticalOCR::VerticalOCR(const std::string &detectorLargeSource, - const std::string &detectorNarrowSource, +VerticalOCR::VerticalOCR(const std::string &detectorSource, const std::string &recognizerSource, std::string symbols, bool independentChars, std::shared_ptr invoker) - : detectorLarge(detectorLargeSource, false, invoker), - detectorNarrow(detectorNarrowSource, true, invoker), - recognizer(recognizerSource, invoker), converter(symbols), - independentCharacters(independentChars), callInvoker(invoker) {} + : detector(detectorSource, invoker), recognizer(recognizerSource, invoker), + converter(symbols), independentCharacters(independentChars), + callInvoker(invoker) {} std::vector VerticalOCR::generate(std::string input) { cv::Mat image = image_processing::readImage(input); @@ -22,9 +21,11 @@ std::vector VerticalOCR::generate(std::string input) { throw std::runtime_error("Failed to load image from path: " + input); } // 1. Large Detector - std::vector largeBoxes = detectorLarge.generate(image); + std::vector largeBoxes = + detector.generate(image, constants::kLargeDetectorWidth); - cv::Size largeDetectorSize = detectorLarge.getModelImageSize(); + cv::Size largeDetectorSize = + detector.calculateModelImageSize(constants::kLargeDetectorWidth); cv::Mat resizedImage = image_processing::resizePadded(image, largeDetectorSize); types::PaddingInfo imagePaddings = @@ -42,9 +43,7 @@ std::vector VerticalOCR::generate(std::string input) { } std::size_t VerticalOCR::getMemoryLowerBound() const noexcept { - return detectorLarge.getMemoryLowerBound() + - detectorNarrow.getMemoryLowerBound() + - recognizer.getMemoryLowerBound(); + return detector.getMemoryLowerBound() + recognizer.getMemoryLowerBound(); } // Strategy 1: Recognize each character individually @@ -76,7 +75,8 @@ std::pair VerticalOCR::_handleIndependentCharacters( croppedChar = utils::normalizeForRecognizer( croppedChar, constants::kRecognizerHeight, 0.0, true); - const auto &[predIndex, score] = recognizer.generate(croppedChar); + const auto &[predIndex, score] = + recognizer.generate(croppedChar, constants::kRecognizerHeight); if (!predIndex.empty()) { text += converter.decodeGreedy(predIndex, predIndex.size())[0]; } @@ -118,7 +118,7 @@ std::pair VerticalOCR::_handleJointCharacters( mergedCharacters, constants::kRecognizerHeight, 0.0, false); const auto &[predIndex, confidenceScore] = - recognizer.generate(mergedCharacters); + recognizer.generate(mergedCharacters, constants::kLargeRecognizerWidth); if (!predIndex.empty()) { text = converter.decodeGreedy(predIndex, predIndex.size())[0]; } @@ -138,7 +138,7 @@ types::OCRDetection VerticalOCR::_processSingleTextBox( // 2. Narrow Detector - detects single characters std::vector characterBoxes = - detectorNarrow.generate(croppedLargeBox); + detector.generate(croppedLargeBox, constants::kSmallDetectorWidth); std::string text; float confidenceScore = 0.0; @@ -148,7 +148,8 @@ types::OCRDetection VerticalOCR::_processSingleTextBox( static_cast(box.bbox[2].x - box.bbox[0].x); const int32_t boxHeight = static_cast(box.bbox[2].y - box.bbox[0].y); - cv::Size narrowRecognizerSize = detectorNarrow.getModelImageSize(); + cv::Size narrowRecognizerSize = + detector.calculateModelImageSize(constants::kSmallDetectorWidth); types::PaddingInfo paddingsBox = utils::calculateResizeRatioAndPaddings( cv::Size(boxWidth, boxHeight), narrowRecognizerSize); @@ -173,8 +174,7 @@ types::OCRDetection VerticalOCR::_processSingleTextBox( } void VerticalOCR::unload() noexcept { - detectorLarge.unload(); - detectorNarrow.unload(); + detector.unload(); recognizer.unload(); } } // namespace rnexecutorch::models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h index f9f70f2d9..e7654c2f2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h @@ -21,22 +21,20 @@ namespace models::ocr { /* Vertical OCR is OCR designed to handle vertical texts. Vertical OCR pipeline consists of: - 1. Large Detector -- detects regions where text is located. + 1. Detector using forward_1280 method-- detects regions where text is located. Almost identical to the Detector in standard OCR. The result of this phase is a list of bounding boxes. Each detected box is then processed individually through the following steps: - 2. Narrow Detector -- designed for detecting where single characters - are located. - There are two different strategies used for vertical recognition: - Strategy 1 "Independent Characters": - Treating each character region found by Narrow Detector - as compeletely independent. - 3. Each character is forwarded to Small Recognizer (64 x 64). + 2. Detector using forward_320 method -- designed for detecting where single + characters are located. There are two different strategies used for vertical + recognition: Strategy 1 "Independent Characters": Treating each character + region found by Narrow Detector as compeletely independent. + 3. Each character is forwarded to Recognizer with input size 64 x 64. Strategy 2 "Joint Characters": The bounding boxes found by Narrow Detector are horizontally merged to create one wide image. - 3. One wide image is forwarded to Large Recognzer (512 x 64). - Vertical OCR differentiate between those two strategies based on + 3. One wide image is forwarded to Recognzer with input width of 512 + x 64. Vertical OCR differentiate between those two strategies based on `independentChars` flag passed to the constructor. */ @@ -45,8 +43,7 @@ using executorch::extension::TensorPtr; class VerticalOCR final { public: - explicit VerticalOCR(const std::string &detectorLargeSource, - const std::string &detectorNarrowSource, + explicit VerticalOCR(const std::string &detectorSource, const std::string &recognizerSource, std::string symbols, bool indpendentCharacters, std::shared_ptr callInvoker); @@ -60,6 +57,7 @@ class VerticalOCR final { const std::vector &characterBoxes, const types::PaddingInfo &paddingsBox, const types::PaddingInfo &imagePaddings); + std::pair _handleJointCharacters(const types::DetectorBBox &box, const cv::Mat &originalImage, @@ -70,8 +68,8 @@ class VerticalOCR final { _processSingleTextBox(types::DetectorBBox &box, const cv::Mat &originalImage, const cv::Mat &resizedLargeImage, const types::PaddingInfo &imagePaddings); - VerticalDetector detectorLarge; - VerticalDetector detectorNarrow; + + VerticalDetector detector; Recognizer recognizer; CTCLabelConverter converter; bool independentCharacters; @@ -80,6 +78,5 @@ class VerticalOCR final { } // namespace models::ocr REGISTER_CONSTRUCTOR(models::ocr::VerticalOCR, std::string, std::string, - std::string, std::string, bool, - std::shared_ptr); + std::string, bool, std::shared_ptr); } // namespace rnexecutorch diff --git a/packages/react-native-executorch/src/constants/ocr/models.ts b/packages/react-native-executorch/src/constants/ocr/models.ts index c6bad4293..f6d4efb13 100644 --- a/packages/react-native-executorch/src/constants/ocr/models.ts +++ b/packages/react-native-executorch/src/constants/ocr/models.ts @@ -1,884 +1,426 @@ import { alphabets, symbols } from './symbols'; -const URL_PREFIX = - 'https://huggingface.co/software-mansion/react-native-executorch'; -const VERSION_TAG = 'resolve/v0.5.0'; +import { VERSION_TAG, URL_PREFIX } from '../versions'; -const DETECTOR_CRAFT_1280_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_1280_quantized.pte`; -const DETECTOR_CRAFT_800_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_800_quantized.pte`; -const DETECTOR_CRAFT_320_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_320_quantized.pte`; +const DETECTOR_CRAFT_MODEL = `${URL_PREFIX}-detector-craft/${VERSION_TAG}/xnnpack_quantized/xnnpack_craft_quantized.pte`; -type RecognizerSize = 64 | 128 | 256 | 512; +const createHFRecognizerDownloadUrl = (alphabet: keyof typeof alphabets) => + `${URL_PREFIX}-recognizer-crnn.en/${VERSION_TAG}/xnnpack/${alphabet}/xnnpack_crnn_${alphabet}.pte`; -const createHFRecognizerDownloadUrl = ( - alphabet: keyof typeof alphabets, - size: RecognizerSize -) => - `${URL_PREFIX}-recognizer-crnn.en/${VERSION_TAG}/xnnpack/${alphabet}/xnnpack_crnn_${alphabet}_${size}.pte`; - -const RECOGNIZER_ENGLISH_CRNN_512 = createHFRecognizerDownloadUrl( - 'english', - 512 -); -const RECOGNIZER_ENGLISH_CRNN_256 = createHFRecognizerDownloadUrl( - 'english', - 256 -); -const RECOGNIZER_ENGLISH_CRNN_128 = createHFRecognizerDownloadUrl( - 'english', - 128 -); -const RECOGNIZER_ENGLISH_CRNN_64 = createHFRecognizerDownloadUrl('english', 64); - -const RECOGNIZER_LATIN_CRNN_512 = createHFRecognizerDownloadUrl('latin', 512); -const RECOGNIZER_LATIN_CRNN_256 = createHFRecognizerDownloadUrl('latin', 256); -const RECOGNIZER_LATIN_CRNN_128 = createHFRecognizerDownloadUrl('latin', 128); -const RECOGNIZER_LATIN_CRNN_64 = createHFRecognizerDownloadUrl('latin', 64); - -const RECOGNIZER_JAPANESE_CRNN_512 = createHFRecognizerDownloadUrl( - 'japanese', - 512 -); -const RECOGNIZER_JAPANESE_CRNN_256 = createHFRecognizerDownloadUrl( - 'japanese', - 256 -); -const RECOGNIZER_JAPANESE_CRNN_128 = createHFRecognizerDownloadUrl( - 'japanese', - 128 -); -const RECOGNIZER_JAPANESE_CRNN_64 = createHFRecognizerDownloadUrl( - 'japanese', - 64 -); - -const RECOGNIZER_KANNADA_CRNN_512 = createHFRecognizerDownloadUrl( - 'kannada', - 512 -); -const RECOGNIZER_KANNADA_CRNN_256 = createHFRecognizerDownloadUrl( - 'kannada', - 256 -); -const RECOGNIZER_KANNADA_CRNN_128 = createHFRecognizerDownloadUrl( - 'kannada', - 128 -); -const RECOGNIZER_KANNADA_CRNN_64 = createHFRecognizerDownloadUrl('kannada', 64); - -const RECOGNIZER_KOREAN_CRNN_512 = createHFRecognizerDownloadUrl('korean', 512); -const RECOGNIZER_KOREAN_CRNN_256 = createHFRecognizerDownloadUrl('korean', 256); -const RECOGNIZER_KOREAN_CRNN_128 = createHFRecognizerDownloadUrl('korean', 128); -const RECOGNIZER_KOREAN_CRNN_64 = createHFRecognizerDownloadUrl('korean', 64); - -const RECOGNIZER_TELUGU_CRNN_512 = createHFRecognizerDownloadUrl('telugu', 512); -const RECOGNIZER_TELUGU_CRNN_256 = createHFRecognizerDownloadUrl('telugu', 256); -const RECOGNIZER_TELUGU_CRNN_128 = createHFRecognizerDownloadUrl('telugu', 128); -const RECOGNIZER_TELUGU_CRNN_64 = createHFRecognizerDownloadUrl('telugu', 64); - -const RECOGNIZER_ZH_SIM_CRNN_512 = createHFRecognizerDownloadUrl('zh_sim', 512); -const RECOGNIZER_ZH_SIM_CRNN_256 = createHFRecognizerDownloadUrl('zh_sim', 256); -const RECOGNIZER_ZH_SIM_CRNN_128 = createHFRecognizerDownloadUrl('zh_sim', 128); -const RECOGNIZER_ZH_SIM_CRNN_64 = createHFRecognizerDownloadUrl('zh_sim', 64); - -const RECOGNIZER_CYRILLIC_CRNN_512 = createHFRecognizerDownloadUrl( - 'cyrillic', - 512 -); -const RECOGNIZER_CYRILLIC_CRNN_256 = createHFRecognizerDownloadUrl( - 'cyrillic', - 256 -); -const RECOGNIZER_CYRILLIC_CRNN_128 = createHFRecognizerDownloadUrl( - 'cyrillic', - 128 -); -const RECOGNIZER_CYRILLIC_CRNN_64 = createHFRecognizerDownloadUrl( - 'cyrillic', - 64 -); +const RECOGNIZER_ENGLISH_CRNN = createHFRecognizerDownloadUrl('english'); +const RECOGNIZER_LATIN_CRNN = createHFRecognizerDownloadUrl('latin'); +const RECOGNIZER_JAPANESE_CRNN = createHFRecognizerDownloadUrl('japanese'); +const RECOGNIZER_KANNADA_CRNN = createHFRecognizerDownloadUrl('kannada'); +const RECOGNIZER_KOREAN_CRNN = createHFRecognizerDownloadUrl('korean'); +const RECOGNIZER_TELUGU_CRNN = createHFRecognizerDownloadUrl('telugu'); +const RECOGNIZER_ZH_SIM_CRNN = createHFRecognizerDownloadUrl('zh_sim'); +const RECOGNIZER_CYRILLIC_CRNN = createHFRecognizerDownloadUrl('cyrillic'); const createOCRObject = ( - recognizerLarge: string, - recognizerMedium: string, - recognizerSmall: string, + recognizerSource: string, language: keyof typeof symbols ) => { return { - detectorSource: DETECTOR_CRAFT_800_MODEL, - recognizerLarge, - recognizerMedium, - recognizerSmall, + detectorSource: DETECTOR_CRAFT_MODEL, + recognizerSource, language, }; }; const createVerticalOCRObject = ( - recognizerLarge: string, - recognizerSmall: string, + recognizerSource: string, language: keyof typeof symbols ) => { return { - detectorLarge: DETECTOR_CRAFT_1280_MODEL, - detectorNarrow: DETECTOR_CRAFT_320_MODEL, - recognizerLarge, - recognizerSmall, + detectorSource: DETECTOR_CRAFT_MODEL, + recognizerSource, language, }; }; -export const OCR_ABAZA = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'abq' -); +export const OCR_ABAZA = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'abq'); export const VERTICAL_OCR_ABAZA = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'abq' ); -export const OCR_ADYGHE = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'ady' -); +export const OCR_ADYGHE = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'ady'); export const VERTICAL_OCR_ADYGHE = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'ady' ); -export const OCR_AFRIKAANS = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'af' -); +export const OCR_AFRIKAANS = createOCRObject(RECOGNIZER_LATIN_CRNN, 'af'); export const VERTICAL_OCR_AFRIKAANS = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'af' ); -export const OCR_AVAR = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'ava' -); +export const OCR_AVAR = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'ava'); export const VERTICAL_OCR_AVAR = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'ava' ); -export const OCR_AZERBAIJANI = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'az' -); +export const OCR_AZERBAIJANI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'az'); export const VERTICAL_OCR_AZERBAIJANI = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'az' ); -export const OCR_BELARUSIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'be' -); +export const OCR_BELARUSIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'be'); export const VERTICAL_OCR_BELARUSIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'be' ); -export const OCR_BULGARIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'bg' -); +export const OCR_BULGARIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'bg'); export const VERTICAL_OCR_BULGARIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'bg' ); -export const OCR_BOSNIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'bs' -); +export const OCR_BOSNIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'bs'); export const VERTICAL_OCR_BOSNIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'bs' ); export const OCR_SIMPLIFIED_CHINESE = createOCRObject( - RECOGNIZER_ZH_SIM_CRNN_512, - RECOGNIZER_ZH_SIM_CRNN_256, - RECOGNIZER_ZH_SIM_CRNN_128, + RECOGNIZER_ZH_SIM_CRNN, 'chSim' ); export const VERTICAL_OCR_SIMPLIFIED_CHINESE = createVerticalOCRObject( - RECOGNIZER_ZH_SIM_CRNN_512, - RECOGNIZER_ZH_SIM_CRNN_64, + RECOGNIZER_ZH_SIM_CRNN, 'chSim' ); -export const OCR_CHECHEN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'che' -); +export const OCR_CHECHEN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'che'); export const VERTICAL_OCR_CHECHEN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'che' ); -export const OCR_CZECH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'cs' -); +export const OCR_CZECH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'cs'); export const VERTICAL_OCR_CZECH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'cs' ); -export const OCR_WELSH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'cy' -); +export const OCR_WELSH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'cy'); export const VERTICAL_OCR_WELSH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'cy' ); -export const OCR_DANISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'da' -); +export const OCR_DANISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'da'); export const VERTICAL_OCR_DANISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'da' ); -export const OCR_DARGWA = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'dar' -); +export const OCR_DARGWA = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'dar'); export const VERTICAL_OCR_DARGWA = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'dar' ); -export const OCR_GERMAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'de' -); +export const OCR_GERMAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'de'); export const VERTICAL_OCR_GERMAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'de' ); -export const OCR_ENGLISH = createOCRObject( - RECOGNIZER_ENGLISH_CRNN_512, - RECOGNIZER_ENGLISH_CRNN_256, - RECOGNIZER_ENGLISH_CRNN_128, - 'en' -); +export const OCR_ENGLISH = createOCRObject(RECOGNIZER_ENGLISH_CRNN, 'en'); export const VERTICAL_OCR_ENGLISH = createVerticalOCRObject( - RECOGNIZER_ENGLISH_CRNN_512, - RECOGNIZER_ENGLISH_CRNN_64, + RECOGNIZER_ENGLISH_CRNN, 'en' ); -export const OCR_SPANISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'es' -); +export const OCR_SPANISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'es'); export const VERTICAL_OCR_SPANISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'es' ); -export const OCR_ESTONIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'et' -); +export const OCR_ESTONIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'et'); export const VERTICAL_OCR_ESTONIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'et' ); -export const OCR_FRENCH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'fr' -); +export const OCR_FRENCH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'fr'); export const VERTICAL_OCR_FRENCH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'fr' ); -export const OCR_IRISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'ga' -); +export const OCR_IRISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ga'); export const VERTICAL_OCR_IRISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'ga' ); -export const OCR_CROATIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'hr' -); +export const OCR_CROATIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'hr'); export const VERTICAL_OCR_CROATIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'hr' ); -export const OCR_HUNGARIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'hu' -); +export const OCR_HUNGARIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'hu'); export const VERTICAL_OCR_HUNGARIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'hu' ); -export const OCR_INDONESIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'id' -); +export const OCR_INDONESIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'id'); export const VERTICAL_OCR_INDONESIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'id' ); -export const OCR_INGUSH = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'inh' -); +export const OCR_INGUSH = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'inh'); export const VERTICAL_OCR_INGUSH = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'inh' ); -export const OCR_ICELANDIC = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'ic' -); +export const OCR_ICELANDIC = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ic'); export const VERTICAL_OCR_ICELANDIC = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'ic' ); -export const OCR_ITALIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'it' -); +export const OCR_ITALIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'it'); export const VERTICAL_OCR_ITALIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'it' ); -export const OCR_JAPANESE = createOCRObject( - RECOGNIZER_JAPANESE_CRNN_512, - RECOGNIZER_JAPANESE_CRNN_256, - RECOGNIZER_JAPANESE_CRNN_128, - 'ja' -); +export const OCR_JAPANESE = createOCRObject(RECOGNIZER_JAPANESE_CRNN, 'ja'); export const VERTICAL_OCR_JAPANESE = createVerticalOCRObject( - RECOGNIZER_JAPANESE_CRNN_512, - RECOGNIZER_JAPANESE_CRNN_64, + RECOGNIZER_JAPANESE_CRNN, 'ja' ); -export const OCR_KARBADIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'kbd' -); +export const OCR_KARBADIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'kbd'); export const VERTICAL_OCR_KARBADIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'kbd' ); -export const OCR_KANNADA = createOCRObject( - RECOGNIZER_KANNADA_CRNN_512, - RECOGNIZER_KANNADA_CRNN_256, - RECOGNIZER_KANNADA_CRNN_128, - 'kn' -); +export const OCR_KANNADA = createOCRObject(RECOGNIZER_KANNADA_CRNN, 'kn'); export const VERTICAL_OCR_KANNADA = createVerticalOCRObject( - RECOGNIZER_KANNADA_CRNN_512, - RECOGNIZER_KANNADA_CRNN_64, + RECOGNIZER_KANNADA_CRNN, 'kn' ); -export const OCR_KOREAN = createOCRObject( - RECOGNIZER_KOREAN_CRNN_512, - RECOGNIZER_KOREAN_CRNN_256, - RECOGNIZER_KOREAN_CRNN_128, - 'ko' -); +export const OCR_KOREAN = createOCRObject(RECOGNIZER_KOREAN_CRNN, 'ko'); export const VERTICAL_OCR_KOREAN = createVerticalOCRObject( - RECOGNIZER_KOREAN_CRNN_512, - RECOGNIZER_KOREAN_CRNN_64, + RECOGNIZER_KOREAN_CRNN, 'ko' ); -export const OCR_KURDISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'ku' -); +export const OCR_KURDISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ku'); export const VERTICAL_OCR_KURDISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'ku' ); -export const OCR_LATIN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'la' -); +export const OCR_LATIN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'la'); export const VERTICAL_OCR_LATIN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'la' ); -export const OCR_LAK = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'lbe' -); +export const OCR_LAK = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'lbe'); export const VERTICAL_OCR_LAK = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'lbe' ); -export const OCR_LEZGHIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'lez' -); +export const OCR_LEZGHIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'lez'); export const VERTICAL_OCR_LEZGHIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'lez' ); -export const OCR_LITHUANIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'lt' -); +export const OCR_LITHUANIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'lt'); export const VERTICAL_OCR_LITHUANIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'lt' ); -export const OCR_LATVIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'lv' -); +export const OCR_LATVIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'lv'); export const VERTICAL_OCR_LATVIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'lv' ); -export const OCR_MAORI = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'mi' -); +export const OCR_MAORI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'mi'); export const VERTICAL_OCR_MAORI = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'mi' ); -export const OCR_MONGOLIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'mn' -); +export const OCR_MONGOLIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'mn'); export const VERTICAL_OCR_MONGOLIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'mn' ); -export const OCR_MALAY = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'ms' -); +export const OCR_MALAY = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ms'); export const VERTICAL_OCR_MALAY = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'ms' ); -export const OCR_MALTESE = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'mt' -); +export const OCR_MALTESE = createOCRObject(RECOGNIZER_LATIN_CRNN, 'mt'); export const VERTICAL_OCR_MALTESE = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'mt' ); -export const OCR_DUTCH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'nl' -); +export const OCR_DUTCH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'nl'); export const VERTICAL_OCR_DUTCH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'nl' ); -export const OCR_NORWEGIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'no' -); +export const OCR_NORWEGIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'no'); export const VERTICAL_OCR_NORWEGIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'no' ); -export const OCR_OCCITAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'oc' -); +export const OCR_OCCITAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'oc'); export const VERTICAL_OCR_OCCITAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'oc' ); -export const OCR_PALI = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'pi' -); +export const OCR_PALI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'pi'); export const VERTICAL_OCR_PALI = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'pi' ); -export const OCR_POLISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'pl' -); +export const OCR_POLISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'pl'); export const VERTICAL_OCR_POLISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'pl' ); -export const OCR_PORTUGUESE = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'pt' -); +export const OCR_PORTUGUESE = createOCRObject(RECOGNIZER_LATIN_CRNN, 'pt'); export const VERTICAL_OCR_PORTUGUESE = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'pt' ); -export const OCR_ROMANIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'ro' -); +export const OCR_ROMANIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'ro'); export const VERTICAL_OCR_ROMANIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'ro' ); -export const OCR_RUSSIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'ru' -); +export const OCR_RUSSIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'ru'); export const VERTICAL_OCR_RUSSIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'ru' ); export const OCR_SERBIAN_CYRILLIC = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, + RECOGNIZER_CYRILLIC_CRNN, 'rsCyrillic' ); export const VERTICAL_OCR_SERBIAN_CYRILLIC = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'rsCyrillic' ); export const OCR_SERBIAN_LATIN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, + RECOGNIZER_LATIN_CRNN, 'rsLatin' ); export const VERTICAL_OCR_SERBIAN_LATIN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'rsLatin' ); -export const OCR_SLOVAK = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'sk' -); +export const OCR_SLOVAK = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sk'); export const VERTICAL_OCR_SLOVAK = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'sk' ); -export const OCR_SLOVENIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'sl' -); +export const OCR_SLOVENIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sl'); export const VERTICAL_OCR_SLOVENIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'sl' ); -export const OCR_ALBANIAN = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'sq' -); +export const OCR_ALBANIAN = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sq'); export const VERTICAL_OCR_ALBANIAN = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'sq' ); -export const OCR_SWEDISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'sv' -); +export const OCR_SWEDISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sv'); export const VERTICAL_OCR_SWEDISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'sv' ); -export const OCR_SWAHILI = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'sw' -); +export const OCR_SWAHILI = createOCRObject(RECOGNIZER_LATIN_CRNN, 'sw'); export const VERTICAL_OCR_SWAHILI = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'sw' ); -export const OCR_TABASSARAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'tab' -); +export const OCR_TABASSARAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'tab'); export const VERTICAL_OCR_TABASSARAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'tab' ); -export const OCR_TELUGU = createOCRObject( - RECOGNIZER_TELUGU_CRNN_512, - RECOGNIZER_TELUGU_CRNN_256, - RECOGNIZER_TELUGU_CRNN_128, - 'te' -); +export const OCR_TELUGU = createOCRObject(RECOGNIZER_TELUGU_CRNN, 'te'); export const VERTICAL_OCR_TELUGU = createVerticalOCRObject( - RECOGNIZER_TELUGU_CRNN_512, - RECOGNIZER_TELUGU_CRNN_64, + RECOGNIZER_TELUGU_CRNN, 'te' ); -export const OCR_TAJIK = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'tjk' -); +export const OCR_TAJIK = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'tjk'); export const VERTICAL_OCR_TAJIK = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'tjk' ); -export const OCR_TAGALOG = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'tl' -); +export const OCR_TAGALOG = createOCRObject(RECOGNIZER_LATIN_CRNN, 'tl'); export const VERTICAL_OCR_TAGALOG = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'tl' ); -export const OCR_TURKISH = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'tr' -); +export const OCR_TURKISH = createOCRObject(RECOGNIZER_LATIN_CRNN, 'tr'); export const VERTICAL_OCR_TURKISH = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'tr' ); -export const OCR_UKRAINIAN = createOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_256, - RECOGNIZER_CYRILLIC_CRNN_128, - 'uk' -); +export const OCR_UKRAINIAN = createOCRObject(RECOGNIZER_CYRILLIC_CRNN, 'uk'); export const VERTICAL_OCR_UKRAINIAN = createVerticalOCRObject( - RECOGNIZER_CYRILLIC_CRNN_512, - RECOGNIZER_CYRILLIC_CRNN_64, + RECOGNIZER_CYRILLIC_CRNN, 'uk' ); -export const OCR_UZBEK = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'uz' -); +export const OCR_UZBEK = createOCRObject(RECOGNIZER_LATIN_CRNN, 'uz'); export const VERTICAL_OCR_UZBEK = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'uz' ); -export const OCR_VIETNAMESE = createOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_256, - RECOGNIZER_LATIN_CRNN_128, - 'vi' -); +export const OCR_VIETNAMESE = createOCRObject(RECOGNIZER_LATIN_CRNN, 'vi'); export const VERTICAL_OCR_VIETNAMESE = createVerticalOCRObject( - RECOGNIZER_LATIN_CRNN_512, - RECOGNIZER_LATIN_CRNN_64, + RECOGNIZER_LATIN_CRNN, 'vi' ); diff --git a/packages/react-native-executorch/src/constants/versions.ts b/packages/react-native-executorch/src/constants/versions.ts new file mode 100644 index 000000000..e71504b59 --- /dev/null +++ b/packages/react-native-executorch/src/constants/versions.ts @@ -0,0 +1,4 @@ +export const URL_PREFIX = + 'https://huggingface.co/software-mansion/react-native-executorch'; +export const VERSION_TAG = 'resolve/v0.7.0'; +export const NEXT_VERSION_TAG = 'resolve/v0.8.0'; diff --git a/packages/react-native-executorch/src/controllers/OCRController.ts b/packages/react-native-executorch/src/controllers/OCRController.ts index c37903614..f2e81188d 100644 --- a/packages/react-native-executorch/src/controllers/OCRController.ts +++ b/packages/react-native-executorch/src/controllers/OCRController.ts @@ -25,17 +25,12 @@ export class OCRController { public load = async ( detectorSource: ResourceSource, - recognizerSources: { - recognizerLarge: ResourceSource; - recognizerMedium: ResourceSource; - recognizerSmall: ResourceSource; - }, + recognizerSource: ResourceSource, language: OCRLanguage, onDownloadProgressCallback?: (downloadProgress: number) => void ) => { try { - if (!detectorSource || Object.keys(recognizerSources).length !== 3) - return; + if (!detectorSource || !recognizerSource) return; if (!symbols[language]) { throw new Error(getError(ETError.LanguageNotSupported)); @@ -47,18 +42,14 @@ export class OCRController { const paths = await ResourceFetcher.fetch( onDownloadProgressCallback, detectorSource, - recognizerSources.recognizerLarge, - recognizerSources.recognizerMedium, - recognizerSources.recognizerSmall + recognizerSource ); - if (paths === null || paths?.length < 4) { + if (paths === null || paths.length < 2) { throw new Error('Download interrupted!'); } this.nativeModule = global.loadOCR( paths[0]!, paths[1]!, - paths[2]!, - paths[3]!, symbols[language] ); this.isReady = true; diff --git a/packages/react-native-executorch/src/controllers/VerticalOCRController.ts b/packages/react-native-executorch/src/controllers/VerticalOCRController.ts index a303de161..3e0223b5c 100644 --- a/packages/react-native-executorch/src/controllers/VerticalOCRController.ts +++ b/packages/react-native-executorch/src/controllers/VerticalOCRController.ts @@ -24,24 +24,14 @@ export class VerticalOCRController { } public load = async ( - detectorSources: { - detectorLarge: ResourceSource; - detectorNarrow: ResourceSource; - }, - recognizerSources: { - recognizerLarge: ResourceSource; - recognizerSmall: ResourceSource; - }, + detectorSource: ResourceSource, + recognizerSource: ResourceSource, language: OCRLanguage, independentCharacters: boolean, onDownloadProgressCallback: (downloadProgress: number) => void ) => { try { - if ( - Object.keys(detectorSources).length !== 2 || - Object.keys(recognizerSources).length !== 2 - ) - return; + if (!detectorSource || !recognizerSource) return; if (!symbols[language]) { throw new Error(getError(ETError.LanguageNotSupported)); @@ -52,19 +42,15 @@ export class VerticalOCRController { const paths = await ResourceFetcher.fetch( onDownloadProgressCallback, - detectorSources.detectorLarge, - detectorSources.detectorNarrow, - independentCharacters - ? recognizerSources.recognizerSmall - : recognizerSources.recognizerLarge + detectorSource, + recognizerSource ); - if (paths === null || paths.length < 3) { + if (paths === null || paths.length < 2) { throw new Error('Download interrupted'); } this.ocrNativeModule = global.loadVerticalOCR( paths[0]!, paths[1]!, - paths[2]!, symbols[language], independentCharacters ); diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts index 90e3c0b43..b7aaec32e 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts @@ -17,9 +17,7 @@ export const useOCR = ({ }: { model: { detectorSource: ResourceSource; - recognizerLarge: ResourceSource; - recognizerMedium: ResourceSource; - recognizerSmall: ResourceSource; + recognizerSource: ResourceSource; language: OCRLanguage; }; preventLoad?: boolean; @@ -44,11 +42,7 @@ export const useOCR = ({ (async () => { await controllerInstance.load( model.detectorSource, - { - recognizerLarge: model.recognizerLarge, - recognizerMedium: model.recognizerMedium, - recognizerSmall: model.recognizerSmall, - }, + model.recognizerSource, model.language, setDownloadProgress ); @@ -60,9 +54,7 @@ export const useOCR = ({ }, [ controllerInstance, model.detectorSource, - model.recognizerLarge, - model.recognizerMedium, - model.recognizerSmall, + model.recognizerSource, model.language, preventLoad, ]); diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts index 1a6e1d270..c033d3721 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts @@ -17,10 +17,8 @@ export const useVerticalOCR = ({ preventLoad = false, }: { model: { - detectorLarge: ResourceSource; - detectorNarrow: ResourceSource; - recognizerLarge: ResourceSource; - recognizerSmall: ResourceSource; + detectorSource: ResourceSource; + recognizerSource: ResourceSource; language: OCRLanguage; }; independentCharacters?: boolean; @@ -45,14 +43,8 @@ export const useVerticalOCR = ({ (async () => { await controllerInstance.load( - { - detectorLarge: model.detectorLarge, - detectorNarrow: model.detectorNarrow, - }, - { - recognizerLarge: model.recognizerLarge, - recognizerSmall: model.recognizerSmall, - }, + model.detectorSource, + model.recognizerSource, model.language, independentCharacters, setDownloadProgress @@ -64,10 +56,8 @@ export const useVerticalOCR = ({ }; }, [ controllerInstance, - model.detectorLarge, - model.detectorNarrow, - model.recognizerLarge, - model.recognizerSmall, + model.detectorSource, + model.recognizerSource, model.language, independentCharacters, preventLoad, diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts index cddc6f595..af278812a 100644 --- a/packages/react-native-executorch/src/index.ts +++ b/packages/react-native-executorch/src/index.ts @@ -29,14 +29,11 @@ declare global { ) => any; var loadOCR: ( detectorSource: string, - recognizerLarge: string, - recognizerMedium: string, - recognizerSmall: string, + recognizer: string, symbols: string ) => any; var loadVerticalOCR: ( - detectorLarge: string, - detectorNarrow: string, + detectorSource: string, recognizer: string, symbols: string, independentCharacters?: boolean diff --git a/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts b/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts index 158b227ae..ac4e2e2ff 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/OCRModule.ts @@ -12,20 +12,14 @@ export class OCRModule { async load( model: { detectorSource: ResourceSource; - recognizerLarge: ResourceSource; - recognizerMedium: ResourceSource; - recognizerSmall: ResourceSource; + recognizerSource: ResourceSource; language: OCRLanguage; }, onDownloadProgressCallback: (progress: number) => void = () => {} ) { await this.controller.load( model.detectorSource, - { - recognizerLarge: model.recognizerLarge, - recognizerMedium: model.recognizerMedium, - recognizerSmall: model.recognizerSmall, - }, + model.recognizerSource, model.language, onDownloadProgressCallback ); diff --git a/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts index 303ace04a..3eebec716 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/VerticalOCRModule.ts @@ -11,24 +11,16 @@ export class VerticalOCRModule { async load( model: { - detectorLarge: ResourceSource; - detectorNarrow: ResourceSource; - recognizerLarge: ResourceSource; - recognizerSmall: ResourceSource; + detectorSource: ResourceSource; + recognizerSource: ResourceSource; language: OCRLanguage; }, independentCharacters: boolean, onDownloadProgressCallback: (progress: number) => void = () => {} ) { await this.controller.load( - { - detectorLarge: model.detectorLarge, - detectorNarrow: model.detectorNarrow, - }, - { - recognizerLarge: model.recognizerLarge, - recognizerSmall: model.recognizerSmall, - }, + model.detectorSource, + model.recognizerSource, model.language, independentCharacters, onDownloadProgressCallback