Skip to content

Commit a4a3440

Browse files
committed
feat: Support yolov8n publaynet and general layout model
1 parent 1f26070 commit a4a3440

File tree

5 files changed

+64
-48
lines changed

5 files changed

+64
-48
lines changed

README.md

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
|`pp_layout_table`| 表格 | `layout_table.onnx` |`["table"]` |
2525
| `pp_layout_publaynet`| 英文 | `layout_publaynet.onnx` |`["text", "title", "list", "table", "figure"]` |
2626
| `pp_layout_cdla`| 中文 | `layout_cdla.onnx` | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` |
27-
| `yolov8n_layout_paper`| 论文 | `yolov8n_layout_paper.onnx` | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` |
28-
| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['text', 'title', 'header', 'footer', 'figure', 'figure_caption', 'table', 'table_caption', 'toc']` |
27+
| `yolov8n_layout_paper`| 论文 | `yolov8n_layout_paper.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |
28+
| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |
29+
| `yolov8n_layout_publaynet`| 英文 | `yolov8n_layout_publaynet.onnx` | `["Text", "Title", "List", "Table", "Figure"]` |
30+
| `yolov8n_layout_general6`| 通用 | `yolov8n_layout_general6.onnx` | `["Text", "Title", "Figure", "Table", "Caption", "Equation"]` |
2931

3032
PP模型来源:[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md)
3133

@@ -58,28 +60,28 @@ if ploted_img is not None:
5860
```
5961

6062
#### 终端运行
61-
- 用法:
62-
```bash
63-
$ rapid_layout -h
64-
usage: rapid_layout [-h] -img IMG_PATH [-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}]
65-
[--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}]
66-
[--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}] [--use_cuda] [--use_dml]
67-
[-v]
68-
69-
options:
70-
-h, --help show this help message and exit
71-
-img IMG_PATH, --img_path IMG_PATH
72-
Path to image for layout.
73-
-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}
74-
Support model type
75-
--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}
76-
Box threshold, the range is [0, 1]
77-
--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report}
78-
IoU threshold, the range is [0, 1]
79-
--use_cuda Whether to use cuda.
80-
--use_dml Whether to use DirectML, which only works in Windows10+.
81-
-v, --vis Wheter to visualize the layout results.
82-
```
63+
```bash
64+
$ rapid_layout -h
65+
usage: rapid_layout [-h] -img IMG_PATH
66+
[-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}]
67+
[--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}]
68+
[--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}]
69+
[--use_cuda] [--use_dml] [-v]
70+
71+
options:
72+
-h, --help show this help message and exit
73+
-img IMG_PATH, --img_path IMG_PATH
74+
Path to image for layout.
75+
-m {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}, --model_type {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}
76+
Support model type
77+
--conf_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}
78+
Box threshold, the range is [0, 1]
79+
--iou_thres {pp_layout_cdla,pp_layout_publaynet,pp_layout_table,yolov8n_layout_paper,yolov8n_layout_report,yolov8n_layout_publaynet,yolov8n_layout_general6}
80+
IoU threshold, the range is [0, 1]
81+
--use_cuda Whether to use cuda.
82+
--use_dml Whether to use DirectML, which only works in Windows10+.
83+
-v, --vis Wheter to visualize the layout results.
84+
```
8385
- 示例:
8486
```bash
8587
$ rapid_layout -v -img test_images/layout.png

rapid_layout/main.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
"pp_layout_table": f"{ROOT_URL}/layout_table.onnx",
3232
"yolov8n_layout_paper": f"{ROOT_URL}/yolov8n_layout_paper.onnx",
3333
"yolov8n_layout_report": f"{ROOT_URL}/yolov8n_layout_report.onnx",
34+
"yolov8n_layout_publaynet": f"{ROOT_URL}/yolov8n_layout_publaynet.onnx",
35+
"yolov8n_layout_general6": f"{ROOT_URL}/yolov8n_layout_general6.onnx",
3436
}
3537
DEFAULT_MODEL_PATH = str(ROOT_DIR / "models" / "layout_cdla.onnx")
3638

@@ -72,12 +74,10 @@ def __init__(
7274

7375
self.load_img = LoadImage()
7476

75-
self.pp_layout_type = [
76-
"pp_layout_cdla",
77-
"pp_layout_publaynet",
78-
"pp_layout_table",
77+
self.pp_layout_type = [k for k in KEY_TO_MODEL_URL if k.startswith("pp")]
78+
self.yolov8_layout_type = [
79+
k for k in KEY_TO_MODEL_URL if k.startswith("yolov8n")
7980
]
80-
self.yolov8_layout_type = ["yolov8n_layout_paper", "yolov8n_layout_report"]
8181

8282
def __call__(
8383
self, img_content: Union[str, np.ndarray, bytes, Path]
@@ -104,12 +104,15 @@ def pp_layout(self, img: np.ndarray, ori_img_shape: Tuple[int, int]):
104104
return boxes, scores, class_names, elapse
105105

106106
def yolov8_layout(self, img: np.ndarray, ori_img_shape: Tuple[int, int]):
107+
s_time = time.time()
108+
107109
input_tensor = self.yolov8_preprocess(img)
108110
outputs = self.session(input_tensor)
109111
boxes, scores, class_names = self.yolov8_postprocess(
110112
outputs, ori_img_shape, self.yolov8_input_shape
111113
)
112-
return boxes, scores, class_names
114+
elapse = time.time() - s_time
115+
return boxes, scores, class_names, elapse
113116

114117
@staticmethod
115118
def get_model_path(model_type: str, model_path: Union[str, Path, None]) -> str:

rapid_layout/utils/load_image.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ def __call__(self, img: InputType) -> np.ndarray:
2525

2626
origin_img_type = type(img)
2727
img = self.load_img(img)
28-
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
28+
if img.ndim == 3:
29+
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
2930
img = self.convert_img(img, origin_img_type)
3031
return img
3132

319 KB
Loading

tests/test_layout.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,14 @@
2121
img = cv2.imread(str(img_path))
2222

2323

24-
def test_iou_outside_thres():
25-
with pytest.raises(ValueError) as exc:
26-
engine = RapidLayout(iou_thres=1.2)
27-
assert exc.type is ValueError
28-
29-
30-
def test_conf_outside_thres():
31-
with pytest.raises(ValueError) as exc:
32-
engine = RapidLayout(conf_thres=1.2)
33-
assert exc.type is ValueError
34-
35-
36-
def test_empty():
37-
with pytest.raises(LoadImageError) as exc:
38-
engine = RapidLayout()
39-
engine(None)
40-
assert exc.type is LoadImageError
24+
@pytest.mark.parametrize(
25+
"model_type,gt", [("yolov8n_layout_publaynet", 12), ("yolov8n_layout_general6", 13)]
26+
)
27+
def test_yolov8n_layout(model_type, gt):
28+
img_path = test_file_dir / "PMC3576793_00004.jpg"
29+
engine = RapidLayout(model_type=model_type)
30+
boxes, scores, class_names, *elapse = engine(img_path)
31+
assert len(boxes) == gt
4132

4233

4334
@pytest.mark.parametrize(
@@ -56,3 +47,22 @@ def test_yolov8_layout(img_content):
5647
engine = RapidLayout(model_type="yolov8n_layout_paper")
5748
boxes, scores, class_names, *elapse = engine(img_content)
5849
assert len(boxes) == 11
50+
51+
52+
def test_iou_outside_thres():
53+
with pytest.raises(ValueError) as exc:
54+
engine = RapidLayout(iou_thres=1.2)
55+
assert exc.type is ValueError
56+
57+
58+
def test_conf_outside_thres():
59+
with pytest.raises(ValueError) as exc:
60+
engine = RapidLayout(conf_thres=1.2)
61+
assert exc.type is ValueError
62+
63+
64+
def test_empty():
65+
with pytest.raises(LoadImageError) as exc:
66+
engine = RapidLayout()
67+
engine(None)
68+
assert exc.type is LoadImageError

0 commit comments

Comments
 (0)