Skip to content

Commit 772eb2e

Browse files
z275748353zhanglongbin
andauthored
Fix the bug of dataflow with ID #34 (#55)
Co-authored-by: zhanglongbin <[email protected]>
1 parent f23d335 commit 772eb2e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+3862
-711
lines changed

.env-dev

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,7 @@ WORKFLOW_TTL_AFTER_SUCCESS=120
7777
WORKFLOW_TTL_AFTER_FAILURE=3600
7878
WORKFLOW_POD_GC_STRATEGY=OnWorkflowCompletion
7979

80+
81+
#Mineru URL
82+
MINERU_API_URL: http://111.4.242.20:30000
83+

.env-prd

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,8 @@ WORKFLOW_MEMORY_LIMIT=4Gi
7272

7373
# user
7474
WORKFLOW_RUN_AS_USER=0
75-
WORKFLOW_RUN_AS_GROUP=0
75+
WORKFLOW_RUN_AS_GROUP=0
76+
77+
78+
#Mineru URL
79+
MINERU_API_URL: http://111.4.242.20:30000

.env-stg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,7 @@ WORKFLOW_POD_GC_STRATEGY=OnWorkflowCompletion
7979
# user
8080
WORKFLOW_RUN_AS_USER=0
8181
WORKFLOW_RUN_AS_GROUP=0
82+
83+
84+
#Mineru URL
85+
MINERU_API_URL: http://111.4.242.20:30000
3.03 KB
Loading
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
MinerU 独立工作进程脚本
5+
用于在独立进程中运行 MinerU,避免阻塞 Celery Worker
6+
"""
7+
import sys
8+
import json
9+
from pathlib import Path
10+
11+
12+
def main():
13+
"""从命令行参数读取配置并执行 MinerU 转换"""
14+
if len(sys.argv) < 6:
15+
print("Usage: python mineru_worker.py <pdf_file_path> <temp_output_dir> <server_url> <backend> <result_json_path>", file=sys.stderr)
16+
sys.exit(1)
17+
18+
pdf_file_path = sys.argv[1]
19+
temp_output_dir = sys.argv[2]
20+
server_url = sys.argv[3]
21+
backend = sys.argv[4]
22+
result_json_path = sys.argv[5]
23+
24+
try:
25+
from mineru.cli.common import read_fn, prepare_env
26+
from mineru.data.data_reader_writer import FileBasedDataWriter
27+
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
28+
29+
pdf_bytes = read_fn(Path(pdf_file_path))
30+
pdf_file_name = Path(pdf_file_path).stem
31+
32+
local_image_dir, local_md_dir = prepare_env(temp_output_dir, pdf_file_name, "vlm")
33+
image_writer = FileBasedDataWriter(local_image_dir)
34+
35+
middle_json, _ = vlm_doc_analyze(
36+
pdf_bytes,
37+
image_writer=image_writer,
38+
backend=backend,
39+
server_url=server_url
40+
)
41+
42+
result = {
43+
"success": True,
44+
"middle_json": middle_json
45+
}
46+
47+
with open(result_json_path, 'w', encoding='utf-8') as f:
48+
json.dump(result, f, ensure_ascii=False, default=str)
49+
50+
sys.exit(0)
51+
52+
except Exception as e:
53+
result = {
54+
"success": False,
55+
"error": str(e)
56+
}
57+
58+
try:
59+
with open(result_json_path, 'w', encoding='utf-8') as f:
60+
json.dump(result, f, ensure_ascii=False)
61+
except:
62+
pass
63+
64+
print(f"Error: {e}", file=sys.stderr)
65+
sys.exit(1)
66+
67+
68+
if __name__ == '__main__':
69+
main()
70+

0 commit comments

Comments
 (0)