fix(rapidocr): fixed issue #498

SWHL · SWHL · commit b7bd22001493 · 2025-07-22T08:30:14.000+08:00
diff --git a/python/rapidocr/main.py b/python/rapidocr/main.py
@@ -35,12 +35,12 @@ class RapidOCR:
     def __init__(
         self, config_path: Optional[str] = None, params: Optional[Dict[str, Any]] = None
     ):
-        cfg = self.load_config(config_path, params)
-        self.initialize(cfg)
+        cfg = self._load_config(config_path, params)
+        self._initialize(cfg)
 
         self.logger = Logger(logger_name=__name__).get_log()
 
-    def load_config(
+    def _load_config(
         self, config_path: Optional[str], params: Optional[Dict[str, Any]]
     ) -> DictConfig:
         if config_path is not None and Path(config_path).exists():
@@ -52,7 +52,7 @@ def load_config(
             cfg = ParseParams.update_batch(cfg, params)
         return cfg
 
-    def initialize(self, cfg: DictConfig):
+    def _initialize(self, cfg: DictConfig):
         self.text_score = cfg.Global.text_score
         self.min_height = cfg.Global.min_height
         self.width_height_ratio = cfg.Global.width_height_ratio
@@ -273,7 +273,11 @@ def get_final_res(
             scores=rec_res.scores,
             word_results=rec_res.word_results,
             elapse_list=[det_res.elapse, cls_res.elapse, rec_res.elapse],
-            lang_type=self.cfg.Rec.lang_type,
+            viser=VisRes(
+                text_score=self.cfg.Global.text_score,
+                lang_type=self.cfg.Rec.lang_type,
+                font_path=self.cfg.Global.font_path,
+            ),
         )
         ocr_res = self.filter_by_text_score(ocr_res)
         if len(ocr_res) <= 0:
@@ -409,11 +413,12 @@ def main(arg_list: Optional[List[str]] = None):
             save_path = cur_dir / f"{Path(args.img_path).stem}_vis_single.png"
             cv2.imwrite(str(save_path), vis_img)
             print(f"The vis single result has saved in {save_path}")
-        else:
-            save_path = cur_dir / f"{Path(args.img_path).stem}_vis.png"
-            vis_img = vis(args.img_path, result.boxes, result.txts, result.scores)
-            cv2.imwrite(str(save_path), vis_img)
-            print(f"The vis result has saved in {save_path}")
+            return
+
+        save_path = cur_dir / f"{Path(args.img_path).stem}_vis.png"
+        vis_img = vis(args.img_path, result.boxes, result.txts, result.scores)
+        cv2.imwrite(str(save_path), vis_img)
+        print(f"The vis result has saved in {save_path}")
 
 
 if __name__ == "__main__":
diff --git a/python/rapidocr/utils/output.py b/python/rapidocr/utils/output.py
@@ -25,7 +25,7 @@ class RapidOCROutput:
     )
     elapse_list: List[Union[float, None]] = field(default_factory=list)
     elapse: float = field(init=False)
-    lang_type: Optional[str] = None
+    viser: Optional[VisRes] = None
 
     def __post_init__(self):
         self.elapse = sum(v for v in self.elapse_list if isinstance(v, float))
@@ -41,21 +41,17 @@ def to_json(self):
     def to_markdown(self) -> str:
         return ToMarkdown.to(self.boxes, self.txts)
 
-    def vis(self, save_path: Optional[str] = None, font_path: Optional[str] = None):
+    def vis(self, save_path: Optional[str] = None):
         if self.img is None or self.boxes is None:
             logger.warning("No image or boxes to visualize.")
             return
 
-        vis = VisRes()
+        if self.viser is None:
+            logger.error("vis instance is None")
+            return
+
         if all(v is None for v in self.word_results):
-            vis_img = vis(
-                self.img,
-                self.boxes,
-                self.txts,
-                self.scores,
-                font_path=font_path,
-                lang_type=self.lang_type,
-            )
+            vis_img = self.viser(self.img, self.boxes, self.txts, self.scores)
 
             if save_path is not None:
                 save_img(save_path, vis_img)
@@ -65,14 +61,7 @@ def vis(self, save_path: Optional[str] = None, font_path: Optional[str] = None):
         # single word vis
         words_results = sum(self.word_results, ())
         words, words_scores, words_boxes = list(zip(*words_results))
-        vis_img = vis(
-            self.img,
-            words_boxes,
-            words,
-            words_scores,
-            font_path=font_path,
-            lang_type=self.lang_type,
-        )
+        vis_img = self.viser(self.img, words_boxes, words, words_scores)
 
         if save_path is not None:
             save_img(save_path, vis_img)
diff --git a/python/rapidocr/utils/vis_res.py b/python/rapidocr/utils/vis_res.py
@@ -25,28 +25,32 @@
 
 
 class VisRes:
-    def __init__(self, text_score: float = 0.5):
+    def __init__(
+        self,
+        text_score: float = 0.5,
+        lang_type: Optional[LangRec] = None,
+        font_path: Optional[str] = None,
+    ):
         self.logger = Logger(logger_name=__name__).get_log()
 
         self.text_score = text_score
         self.load_img = LoadImage()
 
         self.font_cfg = OmegaConf.load(FONT_YAML_PATH).fonts
 
+        self.font_path = self.get_font_path(font_path, lang_type)
+        self.logger.info(f"Using {self.font_path} to visualize results.")
+
     def __call__(
         self,
         img_content: InputType,
         dt_boxes: np.ndarray,
         txts: Optional[Union[List[str], Tuple[str]]] = None,
         scores: Optional[Tuple[float]] = None,
-        font_path: Optional[str] = None,
-        lang_type: Optional[LangRec] = None,
     ) -> np.ndarray:
         if txts is None:
             return self.draw_dt_boxes(img_content, dt_boxes, scores)
-
-        font_path = self.get_font_path(font_path, lang_type)
-        return self.draw_ocr_box_txt(img_content, dt_boxes, txts, font_path, scores)
+        return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores)
 
     def draw_dt_boxes(
         self,
@@ -180,7 +184,6 @@ def draw_ocr_box_txt(
         img_content: InputType,
         dt_boxes: np.ndarray,
         txts: Union[List[str], Tuple[str]],
-        font_path: str,
         scores: Optional[Tuple[float]] = None,
     ) -> np.ndarray:
         image = Image.fromarray(self.load_img(img_content))
@@ -208,7 +211,7 @@ def draw_ocr_box_txt(
             box_width = self.get_box_width(box)
             if box_height > 2 * box_width:
                 font_size = max(int(box_width * 0.9), 10)
-                font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+                font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
                 cur_y = box[0][1]
 
                 for c in txt:
@@ -218,7 +221,7 @@ def draw_ocr_box_txt(
                     cur_y += self.get_char_size(font, c)
             else:
                 font_size = max(int(box_height * 0.8), 10)
-                font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+                font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
                 draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
 
         img_left = Image.blend(image, img_left, 0.5)