RapidAI
diff --git a/‎README.md‎
Lines changed: 13 additions & 13 deletions b/‎README.md‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎demo.py‎
Lines changed: 6 additions & 5 deletions b/‎demo.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎demo/RapidDoc_end2end.py‎
Lines changed: 8 additions & 5 deletions b/‎demo/RapidDoc_end2end.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎demo/demo.py‎
Lines changed: 3 additions & 3 deletions b/‎demo/demo.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docker/README_API.md‎
Lines changed: 1 addition & 1 deletion b/‎docker/README_API.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/analyze_param.md‎
Lines changed: 8 additions & 7 deletions b/‎docs/analyze_param.md‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎rapid_doc/backend/pipeline/pipeline_magic_model.py‎
Lines changed: 2 additions & 0 deletions b/‎rapid_doc/backend/pipeline/pipeline_magic_model.py‎
Lines changed: 2 additions & 0 deletions
@@ -25,8 +25,8 @@
 
 - **版面识别**
   - 模型使用 `PP-DocLayout` 系列 ONNX 模型（v2、plus-L、L、M、S）
-    - **PP-DocLayoutV2**：PaddleOCR-VL使用的版面模型，自带阅读顺序
-    - **PP-DocLayout_plus-L**：效果好运行稳定，默认使用 
+    - **PP-DocLayoutV2**：自带阅读顺序，效果最好，默认使用
+    - **PP-DocLayout_plus-L**：效果好运行稳定
     - **PP-DocLayout-L**：速度快，效果也不错
     - **PP-DocLayout-S**：速度极快，存在部分漏检
 
@@ -56,7 +56,7 @@
 
 ### 1. OmniDocBench
 
-以下是RapidDoc在 OmniDocBench 上的评估结果。Pipeline 模型使用 PP-DocLayout_plus-L、PP-OCRv5-mobile、PP-FormulaNet_plus-M、UNET_SLANET_PLUS。
+以下是RapidDoc在 OmniDocBench 上的评估结果。Pipeline 模型使用 PP-DocLayoutV2、PP-OCRv5-mobile、PP-FormulaNet_plus-M、UNET_SLANET_PLUS。
 <table style="width:100%; border-collapse: collapse;">
     <caption>Comprehensive evaluation of document parsing on OmniDocBench (v1.5)</caption>
     <thead>
@@ -295,6 +295,16 @@
         </tr>
         <tr>
             <td rowspan="4"><strong>Pipeline</strong><br><strong>Tools</strong></td>
+            <td><strong>RapidDoc</strong></td>
+            <td>-</td>
+            <td>87.81</td>
+            <td>0.065</td>
+            <td>89.348</td>
+            <td>80.59</td>
+            <td>87.90</td>
+            <td>0.053</td>
+        </tr>
+        <tr>
             <td>PP-StructureV3</td>
             <td>-</td>
             <td>86.73</td>
@@ -304,16 +314,6 @@
             <td>89.48</td>
             <td>0.073</td>
         </tr>
-        <tr>
-            <td><strong>RapidDoc</strong></td>
-            <td>-</td>
-            <td>85.25</td>
-            <td>0.085</td>
-            <td>85.19</td>
-            <td>79.07</td>
-            <td>86.35</td>
-            <td>0.114</td>
-        </tr>
         <tr>
             <td>Mineru2-pipeline</td>
             <td>-</td>
 
@@ -41,17 +41,18 @@ def do_parse(
     f_dump_model_output=True,  # Whether to dump model output files
     f_dump_orig_pdf=True,  # Whether to dump original PDF files
     f_dump_content_list=True,  # Whether to dump content list files
-    f_dump_md_html=True,  # Whether to convert markdown to HTML
-    f_dump_md_docx=True,  # Whether to convert markdown to docx (via Pandoc)
+    f_dump_md_html=False,  # Whether to convert markdown to HTML
+    f_dump_md_docx=False,  # Whether to convert markdown to docx (via Pandoc)
     f_make_md_mode=MakeMode.MM_MD,  # The mode for making markdown content, default is MM_MD
     start_page_id=0,  # Start page ID for parsing, default is 0
     end_page_id=None,  # End page ID for parsing, default is None (parse all pages until the end of the document)
 ):
     layout_config = {
-        # "model_type": LayoutModelType.PP_DOCLAYOUT_PLUS_L,
+        # "model_type": LayoutModelType.PP_DOCLAYOUTV2,
         # "conf_thresh": 0.4,
         # "batch_num": 1,
-        # "model_dir_or_path": r"C:\ocr\models\ppmodel\layout\PP-DocLayout_plus-L\pp_doclayout_plus_l.onnx",
+        # "model_dir_or_path": r"C:\ocr\models\ppmodel\layout\PP-DocLayoutV2\pp_doclayoutv2.onnx",
+        # "markdown_ignore_labels": ["number", "footnote", "header", "header_image", "footer", "footer_image", "aside_text",]
     }
 
     ocr_config = {
@@ -85,7 +86,7 @@ def do_parse(
 
     table_config = {
         # "force_ocr": False, # 表格文字，是否强制使用ocr，默认 False 根据 parse_method 来判断是否需要ocr还是从pdf中直接提取文本
-        # 注：文字版pdf可以使用pypdfium2提取到表格内图片，扫描版或图片需要使用PP_DOCLAYOUT_PLUS_L版面识别模型，才能识别到表格内的图片
+        # 注：文字版pdf可以使用pypdfium2提取到表格内图片，扫描版或图片需要使用PP_DOCLAYOUT_PLUS_L/PP_DOCLAYOUTV2版面识别模型，才能识别到表格内的图片
         # "skip_text_in_image": True, # 是否跳过表格里图片中的文字（如表格单元格中嵌入的图片、图标、扫描底图等）
         # "use_img2table": False, # 是否优先使用img2table库提取表格，需要手动安装（pip install img2table），基于opencv识别准确度不如使用模型，但是速度很快，默认关闭
 
 
@@ -16,6 +16,7 @@
 from rapid_doc.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
 from rapid_doc.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
 from rapid_doc.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
+from rapid_doc.model.layout.rapid_layout_self import ModelType as LayoutModelType
 
 def do_parse(
     output_dir,
@@ -30,6 +31,7 @@ def do_parse(
     end_page_id=None,
 ):
     layout_config = {
+        "model_type": LayoutModelType.PP_DOCLAYOUTV2,
     }
 
     ocr_config = {
@@ -112,13 +114,14 @@ def parse_doc(
 
 
 if __name__ == '__main__':
-    files_dir = r"/root/hzkitty/OmniDocBenchFiles/images"
-    output_dir = r"/root/hzkitty/OmniDocBenchFiles/layout_plus_l-ocr_mobile-image"
+    files_dir = r"/root/hzkitty/OmniDocBenchFiles/pdfs"
+    output_dir = r"/root/hzkitty/OmniDocBenchFiles/layout_v2-ocr_mobile-pdf"
 
-    # files_dir = r"D:\Download\OmniDocBench\images"
-    # output_dir = r"D:\Download\OmniDocBench\layout_plus_l-ocr_mobile-image"
+    # files_dir = r"D:\Download\OmniDocBench\pdfs"
+    # output_dir = r"D:\Download\OmniDocBench\layout_v2-ocr_mobile-pdf"
 
-    suffixes = [".pdf", ".png", ".jpg", ".jpeg"]
+    # suffixes = [".pdf", ".png", ".jpg", ".jpeg"]
+    suffixes = [".pdf"]
     batch_size = 100
 
     doc_path_list = []
 
@@ -45,10 +45,10 @@ def do_parse(
 
 
     layout_config = {
-        "model_type": LayoutModelType.PP_DOCLAYOUT_PLUS_L,
+        "model_type": LayoutModelType.PP_DOCLAYOUTV2,
         # "conf_thresh": 0.4,
         # "batch_num": 1,
-        # "model_dir_or_path": "C:\ocr\models\ppmodel\layout\PP-DocLayout-L\pp_doclayout_l.onnx"
+        # "model_dir_or_path": "C:\ocr\models\ppmodel\layout\PP-DocLayoutV2\pp_doclayoutv2.onnx"
     }
 
     ocr_config = {
@@ -82,7 +82,7 @@ def do_parse(
 
     table_config = {
         # "force_ocr": False, # 表格文字，是否强制使用ocr，默认 False 根据 parse_method 来判断是否需要ocr还是从pdf中直接提取文本
-        # 注：文字版pdf可以使用pypdfium2提取到表格内图片，扫描版或图片需要使用PP_DOCLAYOUT_PLUS_L版面识别模型，才能识别到表格内的图片
+        # 注：文字版pdf可以使用pypdfium2提取到表格内图片，扫描版或图片需要使用PP_DOCLAYOUT_PLUS_L/PP_DOCLAYOUTV2版面识别模型，才能识别到表格内的图片
         # "skip_text_in_image": True, # 是否跳过表格里图片中的文字（如表格单元格中嵌入的图片、图标、扫描底图等）
         # "use_img2table": False, # 是否优先使用img2table库提取表格，需要手动安装（pip install img2table），基于opencv识别准确度不如使用模型，但是速度很快，默认关闭
 
 
@@ -103,7 +103,7 @@ curl --location --request POST 'http://localhost:8888/file_parse' \
 --form 'return_content_list=true' \
 --form 'return_images=true' \
 --form 'clear_output_file=false' \
---form 'layout_config="{\"model_type\": \"LayoutModelType.PP_DOCLAYOUT_PLUS_L\"}"'
+--form 'layout_config="{\"model_type\": \"LayoutModelType.PP_DOCLAYOUTV2\"}"'
 
 # 多文件批量处理
 curl -X POST "http://localhost:8888/file_parse" \
 
@@ -46,12 +46,13 @@ os.environ['MINERU_DEVICE_MODE'] = "cuda:1"
 
 #### 2、layout_config 版面解析参数说明如下：
 
-|  参数名   |  说明   |         默认值         | 备注 |
-| :-------: |:-----:|:-------------------:|:--:|
-| model_type |  模型   | PP_DOCLAYOUT_PLUS_L |  |
-| conf_thresh  |  阈值   |     0.5（_S为0.2）     |  |
-| batch_num | 批处理大小 |          1          |  |
-| model_dir_or_path | 模型路径  |        None         |  |
+|  参数名   |   说明    |         默认值         | 备注 |
+| :-------: |:-------:|:-------------------:|:--:|
+| model_type |   模型    | PP_DOCLAYOUTV2 |  |
+| conf_thresh  |   阈值    |     0.5（_S为0.2）     |  |
+| batch_num |  批处理大小  |          1          |  |
+| model_dir_or_path |  模型路径   |        None         |  |
+| markdown_ignore_labels | 忽略的版面类型 |        ["number","footnote","header","header_image","footer","footer_image","aside_text",]        |  |
 示例：
 
 ```python
@@ -151,7 +152,7 @@ from rapid_doc.model.table.rapid_table_self import ModelType as TableModelType,
 
 table_config = {
     # "force_ocr": False, # 表格文字，是否强制使用ocr，默认 False 根据 parse_method 来判断是否需要ocr还是从pdf中直接提取文本
-    # 注：文字版pdf可以使用pypdfium2提取到表格内图片，扫描版或图片需要使用PP_DOCLAYOUT_PLUS_L版面识别模型，才能识别到表格内的图片
+    # 注：文字版pdf可以使用pypdfium2提取到表格内图片，扫描版或图片需要使用PP_DOCLAYOUT_PLUS_L/PP_DOCLAYOUTV2版面识别模型，才能识别到表格内的图片
     # "skip_text_in_image": True, # 是否跳过表格里图片中的文字（如表格单元格中嵌入的图片、图标、扫描底图等）
     # "use_img2table": False, # 是否优先使用img2table库提取表格，需要手动安装（pip install img2table），基于opencv识别准确度不如使用模型，但是速度很快，默认关闭
 
 
@@ -392,6 +392,8 @@ def __get_blocks_by_type(
             if category_id == category_type:
                 block = {
                     'bbox': bbox,
+                    'original_label': item.get('original_label'),
+                    'original_order': item.get('original_order'),
                     'score': item.get('score'),
                 }
                 for col in extra_col:
Original file line number	Diff line number	Diff line change
`@@ -392,6 +392,8 @@ def __get_blocks_by_type(`
`392`	`392`	`if category_id == category_type:`
`393`	`393`	`block = {`
`394`	`394`	`'bbox': bbox,`
	`395`	`+ 'original_label': item.get('original_label'),`
	`396`	`+ 'original_order': item.get('original_order'),`
`395`	`397`	`'score': item.get('score'),`
`396`	`398`	`}`
`397`	`399`	`for col in extra_col:`