fix: fixed issue #3 #7 #8

SWHL · SWHL · commit 8cacba4f09c3 · 2024-12-24T08:29:30.000+08:00
diff --git a/README.md b/README.md
@@ -29,15 +29,17 @@
 | `yolov8n_layout_report`|   研报   |   `yolov8n_layout_report.onnx`    | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |
 | `yolov8n_layout_publaynet`|   英文   |   `yolov8n_layout_publaynet.onnx`    | `["Text", "Title", "List", "Table", "Figure"]` |
 | `yolov8n_layout_general6`|   通用   |   `yolov8n_layout_general6.onnx`    | `["Text", "Title", "Figure", "Table", "Caption", "Equation"]` |
-| 🔥`doclayout_yolo`|   通用   |   `doclayout_yolo_docstructbench_imgsz1024.onnx`    | `['title', 'text', 'abandon', 'figure', 'figure_caption', 'table', 'table_caption', 'table_footnote', 'isolate_formula', 'formula_caption']` |
+| 🔥`doclayout_docstructbench`|   通用   |   `doclayout_yolo_docstructbench_imgsz1024.onnx`    | `['title', 'plain text', 'abandon', 'figure', 'figure_caption', 'table', 'table_caption', 'table_footnote', 'isolate_formula', 'formula_caption']` |
+| 🔥`doclayout_d4la`|   通用   |   `doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.onnx`    | `['DocTitle', 'ParaTitle', 'ParaText', 'ListText', 'RegionTitle', 'Date', 'LetterHead', 'LetterDear', 'LetterSign', 'Question', 'OtherText', 'RegionKV', 'RegionList', 'Abstract', 'Author', 'TableName', 'Table', 'Figure', 'FigureName', 'Equation', 'Reference', 'Footer', 'PageHeader', 'PageFooter', 'Number', 'Catalog', 'PageNumber']` |
+| 🔥`doclayout_docsynth`|   通用   |   `doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.onnx`    | `['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title']` |
 
 PP模型来源：[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md)
 
 yolov8n系列来源：[360LayoutAnalysis](https://github.com/360AILAB-NLP/360LayoutAnalysis)
 
-（推荐使用）🔥doclayout_yolo模型来源：[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)，该模型是目前最为优秀的开源模型，支持学术论文、Textbook、Financial、Exam Paper、Fuzzy Scans、PPT和Poster 7种文档类型的版面检测。值得一提的是，该模型支持的类别中存在`abandon`一类，主要是文档页面的页眉页脚部分，便于后续快速舍弃。
+（推荐使用）🔥doclayout_yolo模型来源：[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)，该模型是目前最为优秀的开源模型，挑选了3个基于不同训练集训练得到的模型。其中`doclayout_docstructbench`来自[link](https://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench/tree/main)，`doclayout_d4la`来自[link](https://huggingface.co/juliozhao/DocLayout-YOLO-D4LA-Docsynth300K_pretrained/blob/main/doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.pt)，`doclayout_docsynth`来自[link](https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/tree/main)。
 
-模型下载地址为：[link](https://github.com/RapidAI/RapidLayout/releases/tag/v0.0.0)
+DocLayout模型下载地址为：[link](https://github.com/RapidAI/RapidLayout/releases/tag/v0.0.0)
 
 ### 安装
 
diff --git a/demo.py b/demo.py
@@ -5,12 +5,13 @@
 
 from rapid_layout import RapidLayout, VisLayout
 
-layout_engine = RapidLayout(model_type="doclayout_yolo", conf_thres=0.2)
+layout_engine = RapidLayout(model_type="doclayout_docsynth")
 
-img_path = "1.jpg"
+img_path = "tests/test_files/PMC3576793_00004.jpg"
 img = cv2.imread(img_path)
 
-boxes, scores, class_names, elapse = layout_engine(img)
+boxes, scores, class_names, elapse = layout_engine(img_path)
+print(boxes.shape)
 ploted_img = VisLayout.draw_detections(img, boxes, scores, class_names)
 if ploted_img is not None:
     cv2.imwrite("layout_res.png", ploted_img)
diff --git a/rapid_layout/main.py b/rapid_layout/main.py
@@ -35,7 +35,9 @@
     "yolov8n_layout_report": f"{ROOT_URL}/yolov8n_layout_report.onnx",
     "yolov8n_layout_publaynet": f"{ROOT_URL}/yolov8n_layout_publaynet.onnx",
     "yolov8n_layout_general6": f"{ROOT_URL}/yolov8n_layout_general6.onnx",
-    "doclayout_yolo": f"{ROOT_URL}/doclayout_yolo_docstructbench_imgsz1024_meta.onnx",
+    "doclayout_docstructbench": f"{ROOT_URL}/doclayout_yolo_docstructbench_imgsz1024.onnx",
+    "doclayout_d4la": f"{ROOT_URL}/doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.onnx",
+    "doclayout_docsynth": f"{ROOT_URL}/doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.onnx",
 }
 DEFAULT_MODEL_PATH = str(ROOT_DIR / "models" / "layout_cdla.onnx")
 
diff --git a/rapid_layout/utils/post_prepross.py b/rapid_layout/utils/post_prepross.py
@@ -1,7 +1,6 @@
 # -*- encoding: utf-8 -*-
 # @Author: SWHL
 # @Contact: liekkaskono@163.com
-import re
 from typing import List, Tuple
 
 import numpy as np
@@ -325,6 +324,14 @@ def __call__(
         return boxes, confidences, labels
 
 
+def rescale_boxes(boxes, input_width, input_height, img_width, img_height):
+    # Rescale boxes to original image dimensions
+    input_shape = np.array([input_width, input_height, input_width, input_height])
+    boxes = np.divide(boxes, input_shape, dtype=np.float32)
+    boxes *= np.array([img_width, img_height, img_width, img_height])
+    return boxes
+
+
 def scale_boxes(
     img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False
 ):
diff --git a/rapid_layout/utils/pre_procss.py b/rapid_layout/utils/pre_procss.py
@@ -13,7 +13,6 @@
 
 
 class PPPreProcess:
-
     def __init__(self, img_size: Tuple[int, int]):
         self.size = img_size
         self.mean = np.array([0.485, 0.456, 0.406])
@@ -43,7 +42,6 @@ def permute(self, img: np.ndarray) -> np.ndarray:
 
 
 class YOLOv8PreProcess:
-
     def __init__(self, img_size: Tuple[int, int]):
         self.img_size = img_size
 
@@ -56,15 +54,12 @@ def __call__(self, image: np.ndarray) -> np.ndarray:
 
 
 class DocLayoutPreProcess:
-
     def __init__(self, img_size: Tuple[int, int]):
         self.img_size = img_size
         self.letterbox = LetterBox(new_shape=img_size, auto=False, stride=32)
 
     def __call__(self, image: np.ndarray) -> np.ndarray:
-        print(image.shape)
         input_img = self.letterbox(image=image)
-        print(input_img.shape)
         input_img = input_img[None, ...]
         input_img = input_img[..., ::-1].transpose(0, 3, 1, 2)
         input_img = np.ascontiguousarray(input_img)
diff --git a/tests/test_layout.py b/tests/test_layout.py
@@ -26,10 +26,15 @@
     [
         ("yolov8n_layout_publaynet", 12),
         ("yolov8n_layout_general6", 13),
-        ("doclayout_yolo", 14),
+        (
+            "doclayout_docstructbench",
+            14,
+        ),
+        ("doclayout_d4la", 11),
+        ("doclayout_docsynth", 14),
     ],
 )
-def test_yolov8n_layout(model_type, gt):
+def test_layout(model_type, gt):
     img_path = test_file_dir / "PMC3576793_00004.jpg"
     engine = RapidLayout(model_type=model_type)
     boxes, scores, class_names, *elapse = engine(img_path)