Skip to content

Commit 66e00f4

Browse files
committed
feat(rapidocr): update ppocrv5 minor language config and unit testings
1 parent c50038b commit 66e00f4

File tree

2 files changed

+74
-6
lines changed

2 files changed

+74
-6
lines changed

python/rapidocr/default_models.yaml

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,14 @@ onnxruntime:
7777
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/ch_PP-OCRv5_rec_server_infer.onnx
7878
SHA256: e09385400eaaaef34ceff54aeb7c4f0f1fe014c27fa8b9905d4709b65746562a
7979
korean_PP-OCRv5_rec_mobile_infer.onnx:
80-
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer.onnx
80+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer.onnx
8181
SHA256: cd6e2ea50f6943ca7271eb8c56a877a5a90720b7047fe9c41a2e541a25773c9b
82+
latin_PP-OCRv5_rec_mobile_infer.onnx:
83+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer.onnx
84+
SHA256: b20bd37c168a570f583afbc8cd7925603890efbcdc000a59e22c269d160b5f5a
85+
eslav_PP-OCRv5_rec_mobile_infer.onnx:
86+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/eslav_PP-OCRv5_rec_mobile_infer.onnx
87+
SHA256: 08705d6721849b1347d26187f15a5e362c431963a2a62bfff4feac578c489aab
8288

8389
openvino:
8490
PP-OCRv4:
@@ -174,9 +180,17 @@ openvino:
174180
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/ch_PP-OCRv5_rec_server_infer/ppocrv5_dict.txt
175181
SHA256: e09385400eaaaef34ceff54aeb7c4f0f1fe014c27fa8b9905d4709b65746562a
176182
korean_PP-OCRv5_rec_mobile_infer.onnx:
177-
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer.onnx
178-
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer/ppocrv5_korean_dict.txt
183+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer.onnx
184+
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer/ppocrv5_korean_dict.txt
179185
SHA256: cd6e2ea50f6943ca7271eb8c56a877a5a90720b7047fe9c41a2e541a25773c9b
186+
latin_PP-OCRv5_rec_mobile_infer.onnx:
187+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer.onnx
188+
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer/ppocrv5_latin_dict.txt
189+
SHA256: b20bd37c168a570f583afbc8cd7925603890efbcdc000a59e22c269d160b5f5a
190+
eslav_PP-OCRv5_rec_mobile_infer.onnx:
191+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/onnx/PP-OCRv5/rec/eslav_PP-OCRv5_rec_mobile_infer.onnx
192+
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/eslav_PP-OCRv5_rec_mobile_infer/ppocrv5_eslav_dict.txt
193+
SHA256: 08705d6721849b1347d26187f15a5e362c431963a2a62bfff4feac578c489aab
180194

181195
paddle:
182196
PP-OCRv4:
@@ -319,11 +333,24 @@ paddle:
319333
inference.json: 8e6e12e5d42531840310977fffb58165bf889fc5061408c5a8afdb6985f47fcb
320334
inference.yml: 2c719dba044c4e2228aef8ff92f5f575394d75d24c16de096a33b7cfd902f66d
321335
korean_PP-OCRv5_rec_mobile_infer:
322-
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer
323-
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer/ppocrv5_korean_dict.txt
336+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer
337+
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/korean_PP-OCRv5_rec_mobile_infer/ppocrv5_korean_dict.txt
324338
inference.pdiparams: cac3e5f12cf04aaa77f6a5bc704e4e736ef2908476551891d84b41b4e9090462
325339
inference.json: 562404e3c590c50c93778d5f0a94df21b47b5ab8f3ea6d47c7f8a7930c3bc844
326340
inference.yml: f757fa1c40e99edcf27e9cce879b93eb2a51fa46f5ef39095689b8c37dd75998
341+
latin_PP-OCRv5_rec_mobile_infer:
342+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer
343+
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/latin_PP-OCRv5_rec_mobile_infer/ppocrv5_latin_dict.txt
344+
inference.pdiparams: ecebeaac46267ecaa9f21025ac3ba8972a93c4a906c505ceb8a525299e466e0a
345+
inference.json: ea8f315e69b96e33a1339dde868651eadb4aaa62ce92e70c546f4b25e43134e7
346+
inference.yml: 13cfe9c251d13aaabc619c135ff1724444d0a9e247bd8e0b2f4a76298c87eba0
347+
eslav_PP-OCRv5_rec_mobile_infer:
348+
model_dir: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/eslav_PP-OCRv5_rec_mobile_infer
349+
dict_url: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/paddle/PP-OCRv5/rec/eslav_PP-OCRv5_rec_mobile_infer/ppocrv5_eslav_dict.txt
350+
inference.pdiparams: f11057b05d8517868bca505271278973d706600d9dcc184cbcf5c4512091c32b
351+
inference.json: 3fb6e2e658f5139ff16e35260de8f0577f106a9505c902e1dfc1f4f1d03cc9cb
352+
inference.yml: 025039bac23eb4a308efcefa4d58eab3af440767815c6ba6938468bf6353ee5a
353+
327354

328355
torch:
329356
PP-OCRv4:
@@ -461,4 +488,4 @@ fonts:
461488
SHA256: b771ac413157f6b1f1a52fb8ff1b56057f4b492fcce385ddd32ca12eee0c73b0
462489
te:
463490
path: https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.3.0/resources/fonts/telugu.ttf
464-
SHA256: 7f82ab141b77d263f9ea9b31b47faf50c11310f42fce6d9dffeaaa334909bbf9
491+
SHA256: 7f82ab141b77d263f9ea9b31b47faf50c11310f42fce6d9dffeaaa334909bbf9

python/tests/test_main.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,47 @@ def test_korean_lang(ocr_version, gt):
222222
assert result.txts[0] == gt
223223

224224

225+
@mark.parametrize(
226+
"engine_type",
227+
[EngineType.ONNXRUNTIME, EngineType.OPENVINO, EngineType.PADDLE],
228+
)
229+
def test_latin_lang(engine_type):
230+
engine = RapidOCR(
231+
params={
232+
"Rec.lang_type": LangRec.LATIN,
233+
"Rec.model_type": ModelType.MOBILE,
234+
"Rec.ocr_version": OCRVersion.PPOCRV5,
235+
"Rec.engine_type": engine_type,
236+
}
237+
)
238+
img_path = tests_dir / "latin.jpg"
239+
result = engine(img_path, use_det=False, use_cls=False, use_rec=True)
240+
assert result.txts is not None
241+
assert (
242+
result.txts[0]
243+
== "Alphabetum in mundo hodie frequentissie adhibitum est alphabetum Latinum."
244+
)
245+
246+
247+
@mark.parametrize(
248+
"engine_type",
249+
[EngineType.ONNXRUNTIME, EngineType.OPENVINO, EngineType.PADDLE],
250+
)
251+
def test_eslav_lang(engine_type):
252+
engine = RapidOCR(
253+
params={
254+
"Rec.lang_type": LangRec.ESLAV,
255+
"Rec.model_type": ModelType.MOBILE,
256+
"Rec.ocr_version": OCRVersion.PPOCRV5,
257+
"Rec.engine_type": engine_type,
258+
}
259+
)
260+
img_path = tests_dir / "eslav.jpg"
261+
result = engine(img_path, use_det=False, use_cls=False, use_rec=True)
262+
assert result.txts is not None
263+
assert result.txts[0] == "Славянские языки — большая языковая семья."
264+
265+
225266
def test_en_lang():
226267
engine = RapidOCR(
227268
params={"Rec.lang_type": LangRec.EN, "Rec.model_type": ModelType.MOBILE}

0 commit comments

Comments
 (0)