chore: update files (#513)

SWHL · web-flow · commit c46d0125f32f · 2025-07-24T21:13:24.000+08:00
diff --git a/python/rapidocr/ch_ppocr_rec/main.py b/python/rapidocr/ch_ppocr_rec/main.py
@@ -23,6 +23,7 @@
 
 from ..utils import Logger
 from ..utils.download_file import DownloadFile, DownloadFileInput
+from ..utils.vis_res import VisRes
 from .typings import TextRecInput, TextRecOutput
 from .utils import CTCLabelDecode
 
@@ -46,6 +47,8 @@ def __init__(self, cfg: Dict[str, Any]):
         self.rec_batch_num = cfg["rec_batch_num"]
         self.rec_image_shape = cfg["rec_img_shape"]
 
+        self.cfg = cfg
+
     def get_character_dict(self, cfg):
         character = None
         dict_path = cfg.get("rec_keys_path", None)
@@ -132,7 +135,14 @@ def __call__(self, args: TextRecInput) -> TextRecOutput:
 
         all_line_results, all_word_results = list(zip(*rec_res))
         txts, scores = list(zip(*all_line_results))
-        return TextRecOutput(img_list, txts, scores, all_word_results, elapse)
+        return TextRecOutput(
+            img_list,
+            txts,
+            scores,
+            all_word_results,
+            elapse,
+            viser=VisRes(lang_type=self.cfg.lang_type, font_path=self.cfg.font_path),
+        )
 
     def resize_norm_img(self, img: np.ndarray, max_wh_ratio: float) -> np.ndarray:
         img_channel, img_height, img_width = self.rec_image_shape
diff --git a/python/rapidocr/ch_ppocr_rec/typings.py b/python/rapidocr/ch_ppocr_rec/typings.py
@@ -43,7 +43,7 @@ class TextRecOutput:
         ("", 1.0, None),
     )
     elapse: Optional[float] = None
-    lang_type: Optional[str] = None
+    viser: Optional[VisRes] = None
 
     def __len__(self):
         if self.txts is None:
@@ -55,10 +55,7 @@ def vis(self, save_path: Optional[Union[str, Path]] = None) -> Optional[np.ndarr
             logger.warning("No image or txts to visualize.")
             return None
 
-        vis = VisRes()
-        vis_img = vis.draw_rec_res(
-            self.imgs, self.txts, self.scores, lang_type=self.lang_type
-        )
+        vis_img = self.viser.draw_rec_res(self.imgs, self.txts, self.scores)
 
         if save_path is not None:
             save_img(save_path, vis_img)
diff --git a/python/rapidocr/main.py b/python/rapidocr/main.py
@@ -67,6 +67,7 @@ def _initialize(self, cfg: DictConfig):
 
         self.use_rec = cfg.Global.use_rec
         cfg.Rec.engine_cfg = cfg.EngineConfig[cfg.Rec.engine_type.value]
+        cfg.Rec.font_path = cfg.Global.font_path
         self.text_rec = TextRecognizer(cfg.Rec)
 
         self.load_img = LoadImage()
diff --git a/python/rapidocr/utils/vis_res.py b/python/rapidocr/utils/vis_res.py
@@ -4,7 +4,7 @@
 import math
 import random
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Sequence, Tuple, Union
 
 import cv2
 import numpy as np
@@ -128,26 +128,19 @@ def get_font_path(
 
     def draw_rec_res(
         self,
-        imgs: List[InputType],
+        imgs: Sequence[InputType],
         txts: Union[List[str], Tuple[str]],
         scores: Tuple[float],
-        lang_type: Optional[str] = None,
     ) -> np.ndarray:
         result_imgs = []
         for img, txt, score in zip(imgs, txts, scores):
-            vis_img = self.draw_one_rec_res(img, txt, score, lang_type)
+            vis_img = self.draw_one_rec_res(img, txt, score)
             result_imgs.append(vis_img)
         return self.concat_imgs(result_imgs, direction="vertical")
 
     def draw_one_rec_res(
-        self,
-        img_content: InputType,
-        txt: str,
-        score: float,
-        lang_type: Optional[str] = None,
+        self, img_content: InputType, txt: str, score: float
     ) -> np.ndarray:
-        font_path = self.get_font_path(None, lang_type)
-
         image = Image.fromarray(self.load_img(img_content))
         h, w = image.height, image.width
         if image.mode == "L":
@@ -162,15 +155,15 @@ def draw_one_rec_res(
         box_width = self.get_box_width(box)
         if box_height > 2 * box_width:
             font_size = max(int(box_width * 0.9), 10)
-            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+            font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
             cur_y = box[0][1]
 
             for c in txt:
                 draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
                 cur_y += self.get_char_size(font, c)
         else:
             font_size = max(int(box_height * 0.8), 10)
-            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+            font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
             draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
 
         img_left = Image.blend(image, img_left, 0.5)