Skip to content

Commit 8adc9b2

Browse files
committed
v0.7.0
1 parent 6b50efe commit 8adc9b2

File tree

9 files changed

+16
-14
lines changed

9 files changed

+16
-14
lines changed

demo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def do_parse(
4343
f_dump_model_output=True, # Whether to dump model output files
4444
f_dump_orig_pdf=True, # Whether to dump original PDF files
4545
f_dump_content_list=True, # Whether to dump content list files
46-
f_dump_md_html=True, # Whether to convert markdown to HTML
47-
f_dump_md_docx=True, # Whether to convert markdown to docx (via Pandoc)
46+
f_dump_md_html=False, # Whether to convert markdown to HTML
47+
f_dump_md_docx=False, # Whether to convert markdown to docx (via Pandoc)
4848
f_make_md_mode=MakeMode.MM_MD, # The mode for making markdown content, default is MM_MD
4949
start_page_id=0, # Start page ID for parsing, default is 0
5050
end_page_id=None, # End page ID for parsing, default is None (parse all pages until the end of the document)

docker/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ RUN apt-get update && \
2121
WORKDIR /app
2222

2323
# 安装 Python 依赖
24-
RUN python3 -m pip install 'rapid-doc[cpu]==0.6.1' -i https://pypi.org/simple --break-system-packages && \
25-
python3 -m pip install 'rapid-doc[api]==0.6.1' -i https://pypi.org/simple --break-system-packages && \
24+
RUN python3 -m pip install 'rapid-doc[cpu]==0.7.0' -i https://pypi.org/simple --break-system-packages && \
25+
python3 -m pip install 'rapid-doc[api]==0.7.0' -i https://pypi.org/simple --break-system-packages && \
2626
python3 -m pip cache purge
2727

2828
# 复制配置文件和脚本(优先复制,利用Docker缓存)

docker/DockerfileGPU

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ WORKDIR /app
2929

3030
# 安装 Python 依赖
3131
RUN python3 -m pip install --upgrade pip setuptools wheel && \
32-
python3 -m pip install 'rapid-doc[gpu]==0.6.1' 'rapid-doc[api]==0.6.1' -i https://pypi.org/simple && \
32+
python3 -m pip install 'rapid-doc[gpu]==0.7.0' 'rapid-doc[api]==0.7.0' -i https://pypi.org/simple && \
3333
python3 -m pip install 'onnxruntime-gpu==1.23.0' -i https://pypi.org/simple && \
3434
python3 -m pip cache purge
3535

docker/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
cd docker
1313

1414
# 1. CPU 模式
15-
docker build -f Dockerfile -t hzkitty/rapid-doc:0.6.1 .
15+
docker build -f Dockerfile -t hzkitty/rapid-doc:0.7.0 .
1616

1717
# 2. GPU 模式
18-
docker build -f DockerfileGPU -t hzkitty/rapid-doc:0.6.1-gpu .
18+
docker build -f DockerfileGPU -t hzkitty/rapid-doc:0.7.0-gpu .
1919
```
2020

2121

docker/docker-compose-gpu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
services:
22
rapid-doc-server:
33
container_name: rapid-doc-server
4-
image: hzkitty/rapid-doc:0.6.1-gpu
4+
image: hzkitty/rapid-doc:0.7.0-gpu
55
deploy:
66
resources:
77
reservations:

docker/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
services:
22
rapid-doc-server:
33
container_name: rapid-doc-server
4-
image: hzkitty/rapid-doc:0.6.1
4+
image: hzkitty/rapid-doc:0.7.0
55
ports:
66
- "8888:8888"
77
environment:

rapid_doc/backend/pipeline/model_init.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@ def atom_model_init(model_name: str, **kwargs):
8686
kwargs.get('layout_config'),
8787
)
8888
elif model_name == AtomicModel.FORMULA:
89-
atom_model = kwargs.get('formula_config').get('custom_model')
89+
atom_model = (kwargs.get('formula_config') or {}).get('custom_model')
9090
if not isinstance(atom_model, CustomBaseModel):
9191
atom_model = formula_model_init(
9292
kwargs.get('formula_config'),
9393
)
9494
elif model_name == AtomicModel.OCR:
95-
atom_model = kwargs.get('ocr_config').get('custom_model')
95+
atom_model = (kwargs.get('ocr_config') or {}).get('custom_model')
9696
if not isinstance(atom_model, CustomBaseModel):
9797
atom_model = ocr_model_init(
9898
kwargs.get('det_db_box_thresh', 0.3),
@@ -102,7 +102,7 @@ def atom_model_init(model_name: str, **kwargs):
102102
kwargs.get('enable_merge_det_boxes', True)
103103
)
104104
elif model_name == AtomicModel.Table:
105-
atom_model = kwargs.get('table_config').get('custom_model')
105+
atom_model = (kwargs.get('table_config') or {}).get('custom_model')
106106
if not isinstance(atom_model, CustomBaseModel):
107107
atom_model = table_model_init(
108108
kwargs.get('lang'),

rapid_doc/model/table/rapid_table_self/table_structure/unet/utils/utils_table_line_rec.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@ def min_area_rect_box(
237237
"""
238238
boxes = []
239239
for region in regions:
240-
bbox_area = getattr(region, "area_bbox", region.bbox_area)
240+
bbox_area = getattr(region, "area_bbox", None)
241+
if bbox_area is None:
242+
bbox_area = region.bbox_area
241243
if bbox_area > H * W * 3 / 4: # 过滤大的单元格
242244
continue
243245
rect = cv2.minAreaRect(region.coords[:, ::-1])

rapid_doc/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
__version__ = "0.6.1"
1+
__version__ = "0.7.0"
22
__mineru_version__ = "2.6.4"

0 commit comments

Comments
 (0)