Skip to content

Commit 8cacba4

Browse files
committed
fix: fixed issue #3 #7 #8
1 parent faa09cf commit 8cacba4

File tree

6 files changed

+27
-15
lines changed

6 files changed

+27
-15
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,17 @@
2929
| `yolov8n_layout_report`| 研报 | `yolov8n_layout_report.onnx` | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |
3030
| `yolov8n_layout_publaynet`| 英文 | `yolov8n_layout_publaynet.onnx` | `["Text", "Title", "List", "Table", "Figure"]` |
3131
| `yolov8n_layout_general6`| 通用 | `yolov8n_layout_general6.onnx` | `["Text", "Title", "Figure", "Table", "Caption", "Equation"]` |
32-
| 🔥`doclayout_yolo`| 通用 | `doclayout_yolo_docstructbench_imgsz1024.onnx` | `['title', 'text', 'abandon', 'figure', 'figure_caption', 'table', 'table_caption', 'table_footnote', 'isolate_formula', 'formula_caption']` |
32+
| 🔥`doclayout_docstructbench`| 通用 | `doclayout_yolo_docstructbench_imgsz1024.onnx` | `['title', 'plain text', 'abandon', 'figure', 'figure_caption', 'table', 'table_caption', 'table_footnote', 'isolate_formula', 'formula_caption']` |
33+
| 🔥`doclayout_d4la`| 通用 | `doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.onnx` | `['DocTitle', 'ParaTitle', 'ParaText', 'ListText', 'RegionTitle', 'Date', 'LetterHead', 'LetterDear', 'LetterSign', 'Question', 'OtherText', 'RegionKV', 'RegionList', 'Abstract', 'Author', 'TableName', 'Table', 'Figure', 'FigureName', 'Equation', 'Reference', 'Footer', 'PageHeader', 'PageFooter', 'Number', 'Catalog', 'PageNumber']` |
34+
| 🔥`doclayout_docsynth`| 通用 | `doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.onnx` | `['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title']` |
3335

3436
PP模型来源:[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md)
3537

3638
yolov8n系列来源:[360LayoutAnalysis](https://github.com/360AILAB-NLP/360LayoutAnalysis)
3739

38-
(推荐使用)🔥doclayout_yolo模型来源:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO),该模型是目前最为优秀的开源模型,支持学术论文、Textbook、Financial、Exam Paper、Fuzzy Scans、PPT和Poster 7种文档类型的版面检测。值得一提的是,该模型支持的类别中存在`abandon`一类,主要是文档页面的页眉页脚部分,便于后续快速舍弃
40+
(推荐使用)🔥doclayout_yolo模型来源:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO),该模型是目前最为优秀的开源模型,挑选了3个基于不同训练集训练得到的模型。其中`doclayout_docstructbench`来自[link](https://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench/tree/main)`doclayout_d4la`来自[link](https://huggingface.co/juliozhao/DocLayout-YOLO-D4LA-Docsynth300K_pretrained/blob/main/doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.pt)`doclayout_docsynth`来自[link](https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/tree/main)
3941

40-
模型下载地址为[link](https://github.com/RapidAI/RapidLayout/releases/tag/v0.0.0)
42+
DocLayout模型下载地址为[link](https://github.com/RapidAI/RapidLayout/releases/tag/v0.0.0)
4143

4244
### 安装
4345

demo.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
from rapid_layout import RapidLayout, VisLayout
77

8-
layout_engine = RapidLayout(model_type="doclayout_yolo", conf_thres=0.2)
8+
layout_engine = RapidLayout(model_type="doclayout_docsynth")
99

10-
img_path = "1.jpg"
10+
img_path = "tests/test_files/PMC3576793_00004.jpg"
1111
img = cv2.imread(img_path)
1212

13-
boxes, scores, class_names, elapse = layout_engine(img)
13+
boxes, scores, class_names, elapse = layout_engine(img_path)
14+
print(boxes.shape)
1415
ploted_img = VisLayout.draw_detections(img, boxes, scores, class_names)
1516
if ploted_img is not None:
1617
cv2.imwrite("layout_res.png", ploted_img)

rapid_layout/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
"yolov8n_layout_report": f"{ROOT_URL}/yolov8n_layout_report.onnx",
3636
"yolov8n_layout_publaynet": f"{ROOT_URL}/yolov8n_layout_publaynet.onnx",
3737
"yolov8n_layout_general6": f"{ROOT_URL}/yolov8n_layout_general6.onnx",
38-
"doclayout_yolo": f"{ROOT_URL}/doclayout_yolo_docstructbench_imgsz1024_meta.onnx",
38+
"doclayout_docstructbench": f"{ROOT_URL}/doclayout_yolo_docstructbench_imgsz1024.onnx",
39+
"doclayout_d4la": f"{ROOT_URL}/doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.onnx",
40+
"doclayout_docsynth": f"{ROOT_URL}/doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.onnx",
3941
}
4042
DEFAULT_MODEL_PATH = str(ROOT_DIR / "models" / "layout_cdla.onnx")
4143

rapid_layout/utils/post_prepross.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# -*- encoding: utf-8 -*-
22
# @Author: SWHL
33
# @Contact: [email protected]
4-
import re
54
from typing import List, Tuple
65

76
import numpy as np
@@ -325,6 +324,14 @@ def __call__(
325324
return boxes, confidences, labels
326325

327326

327+
def rescale_boxes(boxes, input_width, input_height, img_width, img_height):
328+
# Rescale boxes to original image dimensions
329+
input_shape = np.array([input_width, input_height, input_width, input_height])
330+
boxes = np.divide(boxes, input_shape, dtype=np.float32)
331+
boxes *= np.array([img_width, img_height, img_width, img_height])
332+
return boxes
333+
334+
328335
def scale_boxes(
329336
img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False
330337
):

rapid_layout/utils/pre_procss.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414

1515
class PPPreProcess:
16-
1716
def __init__(self, img_size: Tuple[int, int]):
1817
self.size = img_size
1918
self.mean = np.array([0.485, 0.456, 0.406])
@@ -43,7 +42,6 @@ def permute(self, img: np.ndarray) -> np.ndarray:
4342

4443

4544
class YOLOv8PreProcess:
46-
4745
def __init__(self, img_size: Tuple[int, int]):
4846
self.img_size = img_size
4947

@@ -56,15 +54,12 @@ def __call__(self, image: np.ndarray) -> np.ndarray:
5654

5755

5856
class DocLayoutPreProcess:
59-
6057
def __init__(self, img_size: Tuple[int, int]):
6158
self.img_size = img_size
6259
self.letterbox = LetterBox(new_shape=img_size, auto=False, stride=32)
6360

6461
def __call__(self, image: np.ndarray) -> np.ndarray:
65-
print(image.shape)
6662
input_img = self.letterbox(image=image)
67-
print(input_img.shape)
6863
input_img = input_img[None, ...]
6964
input_img = input_img[..., ::-1].transpose(0, 3, 1, 2)
7065
input_img = np.ascontiguousarray(input_img)

tests/test_layout.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,15 @@
2626
[
2727
("yolov8n_layout_publaynet", 12),
2828
("yolov8n_layout_general6", 13),
29-
("doclayout_yolo", 14),
29+
(
30+
"doclayout_docstructbench",
31+
14,
32+
),
33+
("doclayout_d4la", 11),
34+
("doclayout_docsynth", 14),
3035
],
3136
)
32-
def test_yolov8n_layout(model_type, gt):
37+
def test_layout(model_type, gt):
3338
img_path = test_file_dir / "PMC3576793_00004.jpg"
3439
engine = RapidLayout(model_type=model_type)
3540
boxes, scores, class_names, *elapse = engine(img_path)

0 commit comments

Comments
 (0)