Skip to content

Commit 1d9d645

Browse files
authored
support latin and korean rec model (#4274)
* support latin and korean rec model * refine font support * fixed bugs
1 parent b883593 commit 1d9d645

File tree

13 files changed

+462
-9
lines changed

13 files changed

+462
-9
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
Global:
2+
model: korean_PP-OCRv5_mobile_rec
3+
mode: check_dataset # check_dataset/train/evaluate/predict
4+
dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
5+
device: gpu:0,1,2,3
6+
output: "output"
7+
8+
CheckDataset:
9+
convert:
10+
enable: False
11+
src_dataset_type: null
12+
split:
13+
enable: False
14+
train_percent: null
15+
val_percent: null
16+
17+
Train:
18+
epochs_iters: 20
19+
batch_size: 8
20+
learning_rate: 0.001
21+
pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
22+
resume_path: null
23+
log_interval: 20
24+
eval_interval: 1
25+
save_interval: 1
26+
27+
Evaluate:
28+
weight_path: "output/best_accuracy/best_accuracy.pdparams"
29+
log_interval: 1
30+
31+
Export:
32+
weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
33+
34+
Predict:
35+
batch_size: 1
36+
model_dir: "output/best_accuracy/inference"
37+
input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_003_korean.png"
38+
kernel_option:
39+
run_mode: paddle
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
Global:
2+
model: latin_PP-OCRv5_mobile_rec
3+
mode: check_dataset # check_dataset/train/evaluate/predict
4+
dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
5+
device: gpu:0,1,2,3
6+
output: "output"
7+
8+
CheckDataset:
9+
convert:
10+
enable: False
11+
src_dataset_type: null
12+
split:
13+
enable: False
14+
train_percent: null
15+
val_percent: null
16+
17+
Train:
18+
epochs_iters: 20
19+
batch_size: 8
20+
learning_rate: 0.001
21+
pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
22+
resume_path: null
23+
log_interval: 20
24+
eval_interval: 1
25+
save_interval: 1
26+
27+
Evaluate:
28+
weight_path: "output/best_accuracy/best_accuracy.pdparams"
29+
log_interval: 1
30+
31+
Export:
32+
weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
33+
34+
Predict:
35+
batch_size: 1
36+
model_dir: "output/best_accuracy/inference"
37+
input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_009_latin.png"
38+
kernel_option:
39+
run_mode: paddle

paddlex/inference/models/text_recognition/predictor.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@
1313
# limitations under the License.
1414

1515
from ....modules.text_recognition.model_list import MODELS
16+
from ....utils.fonts import (
17+
ARABIC_FONT,
18+
CYRILLIC_FONT,
19+
DEVANAGARI_FONT,
20+
KANNADA_FONT,
21+
KOREAN_FONT,
22+
LATIN_FONT,
23+
SIMFANG_FONT,
24+
TAMIL_FONT,
25+
TELUGU_FONT,
26+
)
1627
from ....utils.func_register import FuncRegister
1728
from ...common.batch_sampler import ImageBatchSampler
1829
from ...common.reader import ReadImage
@@ -31,6 +42,7 @@ class TextRecPredictor(BasePredictor):
3142
def __init__(self, *args, input_shape=None, **kwargs):
3243
super().__init__(*args, **kwargs)
3344
self.input_shape = input_shape
45+
self.vis_font = self.get_vis_font()
3446
self.pre_tfs, self.infer, self.post_op = self._build()
3547

3648
def _build_batch_sampler(self):
@@ -68,6 +80,7 @@ def process(self, batch_data):
6880
"input_img": batch_raw_imgs,
6981
"rec_text": texts,
7082
"rec_score": scores,
83+
"vis_font": [self.vis_font] * len(batch_raw_imgs),
7184
}
7285

7386
@register("DecodeImage")
@@ -76,7 +89,7 @@ def build_readimg(self, channel_first, img_mode):
7689
return "Read", ReadImage(format=img_mode)
7790

7891
@register("RecResizeImg")
79-
def build_resize(self, image_shape):
92+
def build_resize(self, image_shape, **kwargs):
8093
return "ReisizeNorm", OCRReisizeNormImg(
8194
rec_image_shape=image_shape, input_shape=self.input_shape
8295
)
@@ -96,3 +109,40 @@ def foo(self, *args, **kwargs):
96109
@register("KeepKeys")
97110
def foo(self, *args, **kwargs):
98111
return None, None
112+
113+
def get_vis_font(self):
114+
if self.model_name.startswith("PP-OCR"):
115+
return SIMFANG_FONT
116+
117+
if self.model_name in (
118+
"latin_PP-OCRv3_mobile_rec",
119+
"latin_PP-OCRv5_mobile_rec",
120+
):
121+
return LATIN_FONT
122+
123+
if self.model_name in (
124+
"cyrillic_PP-OCRv3_mobile_rec",
125+
"eslav_PP-OCRv5_mobile_rec",
126+
):
127+
return CYRILLIC_FONT
128+
129+
if self.model_name in (
130+
"korean_PP-OCRv3_mobile_rec",
131+
"korean_PP-OCRv5_mobile_rec",
132+
):
133+
return KOREAN_FONT
134+
135+
if self.model_name == "arabic_PP-OCRv3_mobile_rec":
136+
return ARABIC_FONT
137+
138+
if self.model_name == "ka_PP-OCRv3_mobile_rec":
139+
return KANNADA_FONT
140+
141+
if self.model_name == "te_PP-OCRv3_mobile_rec":
142+
return TELUGU_FONT
143+
144+
if self.model_name == "ta_PP-OCRv3_mobile_rec":
145+
return TAMIL_FONT
146+
147+
if self.model_name == "devanagari_PP-OCRv3_mobile_rec":
148+
return DEVANAGARI_FONT

paddlex/inference/models/text_recognition/result.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import PIL
1818
from PIL import Image, ImageDraw, ImageFont
1919

20-
from ....utils.fonts import PINGFANG_FONT
20+
from ....utils.fonts import SIMFANG_FONT
2121
from ...common.result import BaseCVResult, JsonMixin
2222

2323

@@ -26,22 +26,25 @@ class TextRecResult(BaseCVResult):
2626
def _to_str(self, *args, **kwargs):
2727
data = copy.deepcopy(self)
2828
data.pop("input_img")
29+
data.pop("vis_font")
2930
return JsonMixin._to_str(data, *args, **kwargs)
3031

3132
def _to_json(self, *args, **kwargs):
3233
data = copy.deepcopy(self)
3334
data.pop("input_img")
35+
data.pop("vis_font")
3436
return JsonMixin._to_json(data, *args, **kwargs)
3537

3638
def _to_img(self):
3739
"""Draw label on image"""
3840
image = Image.fromarray(self["input_img"][:, :, ::-1])
3941
rec_text = self["rec_text"]
4042
rec_score = self["rec_score"]
43+
vis_font = self["vis_font"] if self["vis_font"] is not None else SIMFANG_FONT
4144
image = image.convert("RGB")
4245
image_width, image_height = image.size
4346
text = f"{rec_text} ({rec_score})"
44-
font = self.adjust_font_size(image_width, text, PINGFANG_FONT.path)
47+
font = self.adjust_font_size(image_width, text, vis_font.path)
4548
row_height = font.getbbox(text)[3]
4649
new_image_height = image_height + int(row_height * 1.2)
4750
new_image = Image.new("RGB", (image_width, new_image_height), (255, 255, 255))

paddlex/inference/pipelines/ocr/pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ def predict(
368368
"rec_texts": [],
369369
"rec_scores": [],
370370
"rec_polys": [],
371+
"vis_fonts": [],
371372
}
372373
for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
373374
batch_data.input_paths,
@@ -439,6 +440,7 @@ def predict(
439440
if rec_res["rec_score"] >= text_rec_score_thresh:
440441
res["rec_texts"].append(rec_res["rec_text"])
441442
res["rec_scores"].append(rec_res["rec_score"])
443+
res["vis_fonts"].append(rec_res["vis_font"])
442444
res["rec_polys"].append(dt_polys[sno])
443445

444446
for res in results:

paddlex/inference/pipelines/ocr/result.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def _to_img(self) -> Dict[str, Image.Image]:
8282
random.seed(0)
8383
draw_left = ImageDraw.Draw(img_left)
8484
for idx, (box, txt) in enumerate(zip(boxes, txts)):
85+
vis_font = (
86+
self["vis_fonts"][idx]
87+
if self["vis_fonts"][idx] is not None
88+
else SIMFANG_FONT
89+
)
8590
try:
8691
color = (
8792
random.randint(0, 255),
@@ -100,7 +105,7 @@ def _to_img(self) -> Dict[str, Image.Image]:
100105
box_pts = [(int(x), int(y)) for x, y in box.tolist()]
101106
draw_left.polygon(box_pts, fill=color)
102107

103-
img_right_text = draw_box_txt_fine((w, h), box, txt, SIMFANG_FONT.path)
108+
img_right_text = draw_box_txt_fine((w, h), box, txt, vis_font.path)
104109
pts = np.array(box, np.int32).reshape((-1, 1, 2))
105110
cv2.polylines(img_right_text, [pts], True, color, 1)
106111
img_right = cv2.bitwise_and(img_right, img_right_text)

paddlex/inference/utils/official_models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,8 @@
362362
"eslav_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
363363
eslav_PP-OCRv5_mobile_rec_infer.tar",
364364
"PP-DocBee2-3B": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocBee2-3B_infer.tar",
365+
"latin_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/latin_PP-OCRv5_mobile_rec_infer.tar",
366+
"korean_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/korean_PP-OCRv5_mobile_rec_infer.tar",
365367
}
366368

367369

paddlex/modules/text_recognition/model_list.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,7 @@
3333
"ch_RepSVTR_rec",
3434
"PP-OCRv5_server_rec",
3535
"PP-OCRv5_mobile_rec",
36+
"latin_PP-OCRv5_mobile_rec",
3637
"eslav_PP-OCRv5_mobile_rec",
38+
"korean_PP-OCRv5_mobile_rec",
3739
]

paddlex/repo_apis/PaddleOCR_api/configs/eslav_PP-OCRv5_mobile_rec.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Global:
2+
model_name: eslav_PP-OCRv5_mobile_rec # To use static model for inference.
23
debug: false
34
use_gpu: true
45
epoch_num: 75

0 commit comments

Comments
 (0)