feat: extract param for wiredV2

Joker1212 · Joker1212 · commit 30c4b0c46cda · 2024-11-12T13:11:58.000+08:00
diff --git a/README.md b/README.md
@@ -13,12 +13,12 @@
 </div>
 
 ### 最近更新
-- **2024.10.13**
-  - 补充最新paddlex-SLANet-plus 测评结果(已集成模型到[RapidTable](https://github.com/RapidAI/RapidTable)仓库)
 - **2024.10.22**
   - 补充复杂背景多表格检测提取方案[RapidTableDet](https://github.com/RapidAI/RapidTableDetection)
 - **2024.10.29**
   - 使用yolo11重新训练表格分类器，修正wired_table_rec v2逻辑坐标还原错误，并更新测评
+- **2024.11.12**
+  - 抽离模型识别和处理过程核心阈值，方便大家进行微调适配自己的场景   
     
 ### 简介
 💖该仓库是用来对文档中表格做结构化识别的推理库，包括来自阿里读光有线和无线表格识别模型，llaipython(微信)贡献的有线表格模型，网易Qanything内置表格分类模型等。
@@ -68,6 +68,7 @@
 wired_table_rec_v2(有线表格精度最高): 通用场景有线表格(论文，杂志，期刊, 收据，单据，账单)
 
 paddlex-SLANet-plus(综合精度最高): 文档场景表格(论文，杂志，期刊中的表格)
+[微调入参参考](#核心参数)
 
 ### 安装
 
@@ -100,12 +101,6 @@ else:
 html, elasp, polygons, logic_points, ocr_res = table_engine(img_path)
 print(f"elasp: {elasp}")
 
-#仅返回表格物理box和行列逻辑坐标，不进行ocr识别
-#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, need_ocr=False)  
-
-#默认没有匹配的表格框进行了ocr再识别，取消该行为
-#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, rec_again=False) 
-
 # 使用其他ocr模型
 #ocr_engine =RapidOCR(det_model_dir="xxx/det_server_infer.onnx",rec_model_dir="xxx/rec_server_infer.onnx")
 #ocr_res, _ = ocr_engine(img_path)
@@ -164,6 +159,27 @@ for i, res in enumerate(result):
 # cv2.imwrite(f"{out_dir}/{file_name}-visualize.jpg", img)
 ```
 
+### 核心参数
+```python
+wired_table_rec = WiredTableRecognition()
+html, elasp, polygons, logic_points, ocr_res = wired_table_rec(
+    img_path,
+    version="v2", #默认使用v2线框模型，切换阿里读光模型可改为v1
+    morph_close=True, # 是否进行形态学操作,辅助找到更多线框,默认为True
+    more_h_lines=True, # 是否基于线框检测结果进行更多水平线检查，辅助找到更小线框, 默认为True
+    more_v_lines=True, # 是否基于线框检测结果进行更多垂直线检查，辅助找到更小线框, 默认为True
+    extend_line=True, # 是否基于线框检测结果进行线段延长，辅助找到更多线框, 默认为True
+    need_ocr=True, # 是否进行OCR识别, 默认为True
+    rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
+)
+lineless_table_rec = LinelessTableRecognition()
+html, elasp, polygons, logic_points, ocr_res = lineless_table_rec(
+    need_ocr=True, # 是否进行OCR识别, 默认为True
+    rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
+)
+```
+
+
 ## FAQ (Frequently Asked Questions)
 1. **问：识别框丢失了内部文字信息**
    - 答：默认使用的rapidocr小模型，如果需要更高精度的效果，可以从 [模型列表](https://rapidai.github.io/RapidOCRDocs/model_list/#_1)
diff --git a/demo_wired.py b/demo_wired.py
@@ -15,8 +15,17 @@
 
 table_rec = WiredTableRecognition()
 
-img_path = "tests/test_files/wired/table1.png"
-html, elasp, polygons, logic_points, ocr_res = table_rec(img_path)
+img_path = "tests/test_files/wired/wired_big_box.png"
+html, elasp, polygons, logic_points, ocr_res = table_rec(
+    img_path,
+    version="v2",  # 默认使用v2线框模型，切换阿里读光模型可改为v1
+    morph_close=True,  # 是否进行形态学操作,辅助找到更多线框,默认为True
+    more_h_lines=True,  # 是否基于线框检测结果进行更多水平线检查，辅助找到更小线框, 默认为True
+    more_v_lines=True,  # 是否基于线框检测结果进行更多垂直线检查，辅助找到更小线框, 默认为True
+    extend_line=True,  # 是否基于线框检测结果进行线段延长，辅助找到更多线框, 默认为True
+    need_ocr=True,  # 是否进行OCR识别, 默认为True
+    rec_again=True,  # 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True
+)
 
 print(f"cost: {elasp:.5f}")
 
@@ -29,6 +38,6 @@
 plot_rec_box_with_logic_info(
     img_path, f"{output_dir}/table_rec_box.jpg", logic_points, polygons
 )
-plot_rec_box(img_path, f"{output_dir}/ocr_box.jpg", ocr_res)
+plot_rec_box(f"{output_dir}/table_rec_box.jpg", f"{output_dir}/ocr_box.jpg", ocr_res)
 
 print(f"The results has been saved under {output_dir}")
diff --git a/tests/test_files/wired/wired_big_box.png b/tests/test_files/wired/wired_big_box.png
diff --git a/tests/test_wired_table_rec.py b/tests/test_wired_table_rec.py
@@ -65,6 +65,22 @@ def test_input_normal(img_path, gt_td_nums, gt2):
     assert td_nums >= gt_td_nums
 
 
+@pytest.mark.parametrize(
+    "img_path, gt_td_nums",
+    [
+        ("wired_big_box.png", 70),
+    ],
+)
+def test_input_normal(img_path, gt_td_nums):
+    img_path = test_file_dir / img_path
+
+    ocr_result, _ = ocr_engine(img_path)
+    table_str, *_ = table_recog(str(img_path), ocr_result)
+    td_nums = get_td_nums(table_str)
+
+    assert td_nums >= gt_td_nums
+
+
 @pytest.mark.parametrize(
     "box1, box2, threshold, expected",
     [
diff --git a/wired_table_rec/main.py b/wired_table_rec/main.py
@@ -64,7 +64,7 @@ def __call__(
             rec_again = kwargs.get("rec_again", True)
             need_ocr = kwargs.get("need_ocr", True)
         img = self.load_img(img)
-        polygons = self.table_line_rec(img)
+        polygons = self.table_line_rec(img, **kwargs)
         if polygons is None:
             logging.warning("polygons is None.")
             return "", 0.0, None, None, None
diff --git a/wired_table_rec/table_line_rec.py b/wired_table_rec/table_line_rec.py
@@ -36,7 +36,7 @@ def __init__(self, model_path: Optional[str] = None):
 
         self.session = OrtInferSession(model_path)
 
-    def __call__(self, img: np.ndarray) -> Optional[np.ndarray]:
+    def __call__(self, img: np.ndarray, **kwargs) -> Optional[np.ndarray]:
         img_info = self.preprocess(img)
         pred = self.infer(img_info)
         polygons = self.postprocess(pred)
diff --git a/wired_table_rec/table_line_rec_plus.py b/wired_table_rec/table_line_rec_plus.py
@@ -31,13 +31,12 @@ def __init__(self, model_path: Optional[str] = None):
 
         self.session = OrtInferSession(model_path)
 
-    def __call__(self, img: np.ndarray) -> Optional[np.ndarray]:
+    def __call__(self, img: np.ndarray, **kwargs) -> Optional[np.ndarray]:
         img_info = self.preprocess(img)
         pred = self.infer(img_info)
-        polygons = self.postprocess(img, pred)
+        polygons = self.postprocess(img, pred, **kwargs)
         if polygons.size == 0:
             return None
-
         polygons = polygons.reshape(polygons.shape[0], 4, 2)
         polygons[:, 3, :], polygons[:, 1, :] = (
             polygons[:, 1, :].copy(),
@@ -68,7 +67,25 @@ def infer(self, input):
         result = result[0].astype(np.uint8)
         return result
 
-    def postprocess(self, img, pred, row=50, col=30, alph=15, angle=50):
+    def postprocess(self, img, pred, **kwargs):
+        row = kwargs.get("row", 50) if kwargs else 50
+        col = kwargs.get("col", 30) if kwargs else 30
+        h_lines_threshold = kwargs.get("h_lines_threshold", 100) if kwargs else 100
+        v_lines_threshold = kwargs.get("v_lines_threshold", 15) if kwargs else 15
+        angle = kwargs.get("angle", 50) if kwargs else 50
+        morph_close = (
+            kwargs.get("morph_close", True) if kwargs else True
+        )  # 是否进行闭合运算以找到更多小的框
+        more_h_lines = (
+            kwargs.get("more_h_lines", True) if kwargs else True
+        )  # 是否调整以找到更多的横线
+        more_v_lines = (
+            kwargs.get("more_v_lines", True) if kwargs else True
+        )  # 是否调整以找到更多的横线
+        extend_line = (
+            kwargs.get("extend_line", True) if kwargs else True
+        )  # 是否进行线段延长使得端点连接
+
         ori_shape = img.shape
         pred = np.uint8(pred)
         hpred = copy.deepcopy(pred)  # 横线
@@ -89,16 +106,19 @@ def postprocess(self, img, pred, row=50, col=30, alph=15, angle=50):
         vpred = cv2.morphologyEx(
             vpred, cv2.MORPH_CLOSE, vkernel, iterations=1
         )  # 先膨胀后腐蚀的过程
-        hpred = cv2.morphologyEx(hpred, cv2.MORPH_CLOSE, hkernel, iterations=1)
+        if morph_close:
+            hpred = cv2.morphologyEx(hpred, cv2.MORPH_CLOSE, hkernel, iterations=1)
         colboxes = get_table_line(vpred, axis=1, lineW=col)  # 竖线
         rowboxes = get_table_line(hpred, axis=0, lineW=row)  # 横线
-        # rboxes_row_, rboxes_col_ = adjust_lines(rowboxes, colboxes, alph = alph, angle=angle)
-        rboxes_row_ = adjust_lines(rowboxes, alph=100, angle=angle)
-        rboxes_col_ = adjust_lines(colboxes, alph=alph, angle=angle)
+        rboxes_row_, rboxes_col_ = [], []
+        if more_h_lines:
+            rboxes_row_ = adjust_lines(rowboxes, alph=h_lines_threshold, angle=angle)
+        if more_v_lines:
+            rboxes_col_ = adjust_lines(colboxes, alph=v_lines_threshold, angle=angle)
         rowboxes += rboxes_row_
         colboxes += rboxes_col_
-        rowboxes, colboxes = final_adjust_lines(rowboxes, colboxes)
-
+        if extend_line:
+            rowboxes, colboxes = final_adjust_lines(rowboxes, colboxes)
         tmp = np.zeros(img.shape[:2], dtype="uint8")
         tmp = draw_lines(tmp, rowboxes + colboxes, color=255, lineW=2)
         labels = measure.label(tmp < 255, connectivity=2)  # 8连通区域标记
diff --git a/wired_table_rec/utils_table_recover.py b/wired_table_rec/utils_table_recover.py
@@ -267,8 +267,17 @@ def plot_rec_box_with_logic_info(img_path, output_path, logic_points, sorted_pol
 
         cv2.putText(
             img,
-            f"{idx}-{logic_points[idx]}",
-            (x1, y1),
+            f"row:{logic_points[idx][0]}-{logic_points[idx][1]}",
+            (x0 + 1, y0 + 15),
+            cv2.FONT_HERSHEY_PLAIN,
+            font_scale,
+            (0, 0, 255),
+            thickness,
+        )
+        cv2.putText(
+            img,
+            f"col:{logic_points[idx][2]}-{logic_points[idx][3]}",
+            (x0 + 1, y0 + 40),
             cv2.FONT_HERSHEY_PLAIN,
             font_scale,
             (0, 0, 255),
@@ -303,15 +312,15 @@ def plot_rec_box(img_path, output_path, sorted_polygons):
         font_scale = 1.0  # 原先是0.5
         thickness = 2  # 原先是1
 
-        cv2.putText(
-            img,
-            str(idx),
-            (x1, y1),
-            cv2.FONT_HERSHEY_PLAIN,
-            font_scale,
-            (0, 0, 255),
-            thickness,
-        )
+        # cv2.putText(
+        #     img,
+        #     str(idx),
+        #     (x1, y1),
+        #     cv2.FONT_HERSHEY_PLAIN,
+        #     font_scale,
+        #     (0, 0, 255),
+        #     thickness,
+        # )
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     # 保存绘制后的图像
     cv2.imwrite(output_path, img)