opendatalab
diff --git a/‎README.md‎
Lines changed: 18 additions & 10 deletions b/‎README.md‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎README_zh-CN.md‎
Lines changed: 22 additions & 10 deletions b/‎README_zh-CN.md‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎demo/demo.py‎
Lines changed: 2 additions & 1 deletion b/‎demo/demo.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docker/china/Dockerfile‎
Lines changed: 0 additions & 6 deletions b/‎docker/china/Dockerfile‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎docker/china/maca.Dockerfile‎
Lines changed: 34 additions & 0 deletions b/‎docker/china/maca.Dockerfile‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎docker/china/npu.Dockerfile‎
Lines changed: 29 additions & 0 deletions b/‎docker/china/npu.Dockerfile‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎docker/china/ppu.Dockerfile‎
Lines changed: 34 additions & 0 deletions b/‎docker/china/ppu.Dockerfile‎
Lines changed: 34 additions & 0 deletions
@@ -44,6 +44,9 @@
 </div>
 
 # Changelog
+- 2025/11/26 2.6.5 Release
+  - Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm.
+
 - 2025/11/04 2.6.4 Release
   - Added timeout configuration for PDF image rendering, default is 300 seconds, can be configured via environment variable `MINERU_PDF_RENDER_TIMEOUT` to prevent long blocking of the rendering process caused by some abnormal PDF files.
   - Added CPU thread count configuration options for ONNX models, default is the system CPU core count, can be configured via environment variables `MINERU_INTRA_OP_NUM_THREADS` and `MINERU_INTER_OP_NUM_THREADS` to reduce CPU resource contention conflicts in high concurrency scenarios.
@@ -632,12 +635,13 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
         <tr>
             <th rowspan="2">Parsing Backend</th>
             <th rowspan="2">pipeline <br> (Accuracy<sup>1</sup> 82+)</th>
-            <th colspan="4">vlm (Accuracy<sup>1</sup> 90+)</th>
+            <th colspan="5">vlm (Accuracy<sup>1</sup> 90+)</th>
         </tr>
         <tr>
             <th>transformers</th>
             <th>mlx-engine</th>
             <th>vllm-engine / <br>vllm-async-engine</th>
+            <th>lmdeploy-engine</th>
             <th>http-client</th>
         </tr>
     </thead>
@@ -648,40 +652,42 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
             <td>Good compatibility, <br>but slower</td>
             <td>Faster than transformers</td>
             <td>Fast, compatible with the vLLM ecosystem</td>
-            <td>Suitable for OpenAI-compatible servers<sup>5</sup></td>
+            <td>Fast, compatible with the LMDeploy ecosystem</td>
+            <td>Suitable for OpenAI-compatible servers<sup>6</sup></td>
         </tr>
         <tr>
             <th>Operating System</th>
             <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
             <td style="text-align:center;">macOS<sup>3</sup></td>
             <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
+            <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>5</sup> </td>
             <td>Any</td>
         </tr>
         <tr>
             <th>CPU inference support</th>
             <td colspan="2" style="text-align:center;">✅</td>
-            <td colspan="2" style="text-align:center;">❌</td>
+            <td colspan="3" style="text-align:center;">❌</td>
             <td>Not required</td>
         </tr>
         <tr>
             <th>GPU Requirements</th><td colspan="2" style="text-align:center;">Volta or later architectures, 6 GB VRAM or more, or Apple Silicon</td>
             <td>Apple Silicon</td>
-            <td>Volta or later architectures, 8 GB VRAM or more</td>
+            <td colspan="2" style="text-align:center;">Volta or later architectures, 8 GB VRAM or more</td>
             <td>Not required</td>
         </tr>
         <tr>
             <th>Memory Requirements</th>
-            <td colspan="4" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
+            <td colspan="5" style="text-align:center;">Minimum 16 GB, 32 GB recommended</td>
             <td>8 GB</td>
         </tr>
         <tr>
             <th>Disk Space Requirements</th>
-            <td colspan="4" style="text-align:center;">20 GB or more, SSD recommended</td>
+            <td colspan="5" style="text-align:center;">20 GB or more, SSD recommended</td>
             <td>2 GB</td>
         </tr>
         <tr>
             <th>Python Version</th>
-            <td colspan="5" style="text-align:center;">3.10-3.13</td>
+            <td colspan="6" style="text-align:center;">3.10-3.13<sup>7</sup></td>
         </tr>
     </tbody>
 </table>
@@ -690,7 +696,9 @@ A WebUI developed based on Gradio, with a simple interface and only core parsing
 <sup>2</sup> Linux supports only distributions released in 2019 or later.  
 <sup>3</sup> MLX requires macOS 13.5 or later, recommended for use with version 14.0 or higher.  
 <sup>4</sup> Windows vLLM support via WSL2(Windows Subsystem for Linux).  
-<sup>5</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.
+<sup>5</sup> Windows LMDeploy can only use the `turbomind` backend, which is slightly slower than the `pytorch` backend. If performance is critical, it is recommended to run it via WSL2.  
+<sup>6</sup> Servers compatible with the OpenAI API, such as local or remote model services deployed via inference frameworks like `vLLM`, `SGLang`, or `LMDeploy`.  
+<sup>7</sup> Windows + LMDeploy only supports Python versions 3.10–3.12, as the critical dependency `ray` does not yet support Python 3.13 on Windows.
 
 
 ### Install MinerU
@@ -710,8 +718,8 @@ uv pip install -e .[core]
 ```
 
 > [!TIP]
-> `mineru[core]` includes all core features except `vLLM` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
-> If you need to use `vLLM` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
+> `mineru[core]` includes all core features except `vLLM`/`LMDeploy` acceleration, compatible with Windows / Linux / macOS systems, suitable for most users.
+> If you need to use `vLLM`/`LMDeploy` acceleration for VLM model inference or install a lightweight client on edge devices, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/).
 
 ---
 
 
@@ -44,6 +44,13 @@
 </div>
 
 # 更新记录
+
+- 2025/11/26 2.6.5 发布
+  - 增加新后端`vlm-lmdeploy-engine`支持，使用方式与`vlm-vllm-(async)engine`类似，但使用`lmdeploy`作为推理引擎，与`vllm`相比额外支持Windows平台原生推理加速。
+  - 新增国产算力平台`昇腾/npu`、`平头哥/ppu`、`沐曦/maca`的适配支持，用户可在对应平台上使用`pipeline`与`vlm`模型，并使用`vllm`/`lmdeploy`引擎加速vlm模型推理，具体使用方式请参考[其他加速卡适配](https://opendatalab.github.io/MinerU/zh/usage/)。
+    - 国产平台适配不易，我们已尽量确保适配的完整性和稳定性，但仍可能存在一些稳定性/兼容问题与精度对齐问题，请大家根据适配文档页面内红绿灯情况自行选择合适的环境与场景进行使用。
+    - 如在使用国产化平台适配方案的过程中遇到任何文档未提及的问题，为便于其他用户查找解决方案，请在discussions的[指定帖子](https://github.com/opendatalab/MinerU/discussions/4053)中进行反馈。
+
 - 2025/11/04 2.6.4 发布
   - 为pdf渲染图片增加超时配置，默认为300秒，可通过环境变量`MINERU_PDF_RENDER_TIMEOUT`进行配置，防止部分异常pdf文件导致渲染过程长时间阻塞。
   - 为onnx模型增加cpu线程数配置选项，默认为系统cpu核心数，可通过环境变量`MINERU_INTRA_OP_NUM_THREADS`和`MINERU_INTER_OP_NUM_THREADS`进行配置，以减少高并发场景下的对cpu资源的抢占冲突。
@@ -619,12 +626,13 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
         <tr>
             <th rowspan="2">解析后端</th>
             <th rowspan="2">pipeline <br> (精度<sup>1</sup> 82+)</th>
-            <th colspan="4">vlm (精度<sup>1</sup> 90+)</th>
+            <th colspan="5">vlm (精度<sup>1</sup> 90+)</th>
         </tr>
         <tr>
             <th>transformers</th>
             <th>mlx-engine</th>
             <th>vllm-engine / <br>vllm-async-engine</th>
+            <th>lmdeploy-engine</th>
             <th>http-client</th>
         </tr>
     </thead>
@@ -635,40 +643,42 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
             <td>兼容性好, 速度较慢</td>
             <td>比transformers快</td>
             <td>速度快, 兼容vllm生态</td>
-            <td>适用于OpenAI兼容服务器<sup>5</sup></td>
+            <td>速度快, 兼容lmdeploy生态</td>
+            <td>适用于OpenAI兼容服务器<sup>6</sup></td>
         </tr>
         <tr>
             <th>操作系统</th>
             <td colspan="2" style="text-align:center;">Linux<sup>2</sup> / Windows / macOS</td>
             <td style="text-align:center;">macOS<sup>3</sup></td>
             <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>4</sup> </td>
+            <td style="text-align:center;">Linux<sup>2</sup> / Windows<sup>5</sup> </td>
             <td>不限</td>
         </tr>
         <tr>
             <th>CPU推理支持</th>
             <td colspan="2" style="text-align:center;">✅</td>
-            <td colspan="2" style="text-align:center;">❌</td>
+            <td colspan="3" style="text-align:center;">❌</td>
             <td >不需要</td>
         </tr>
         <tr>
             <th>GPU要求</th><td colspan="2" style="text-align:center;">Volta及以后架构, 6G显存以上或Apple Silicon</td>
             <td>Apple Silicon</td>
-            <td>Volta及以后架构, 8G显存以上</td>
+            <td colspan="2" style="text-align:center;">Volta及以后架构, 8G显存以上</td>
             <td>不需要</td>
         </tr>
         <tr>
             <th>内存要求</th>
-            <td colspan="4" style="text-align:center;">最低16GB以上, 推荐32GB以上</td>
+            <td colspan="5" style="text-align:center;">最低16GB以上, 推荐32GB以上</td>
             <td>8GB</td>
         </tr>
         <tr>
             <th>磁盘空间要求</th>
-            <td colspan="4" style="text-align:center;">20GB以上, 推荐使用SSD</td>
+            <td colspan="5" style="text-align:center;">20GB以上, 推荐使用SSD</td>
             <td>2GB</td>
         </tr>
         <tr>
             <th>python版本</th>
-            <td colspan="5" style="text-align:center;">3.10-3.13</td>
+            <td colspan="6" style="text-align:center;">3.10-3.13<sup>7</sup></td>
         </tr>
     </tbody>
 </table> 
@@ -677,7 +687,9 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
 <sup>2</sup> Linux仅支持2019年及以后发行版  
 <sup>3</sup> MLX需macOS 13.5及以上版本支持，推荐14.0以上版本使用  
 <sup>4</sup> Windows vLLM通过WSL2(适用于 Linux 的 Windows 子系统)实现支持  
-<sup>5</sup> 兼容OpenAI API的服务器，如通过`vLLM`/`SGLang`/`LMDeploy`等推理框架部署的本地模型服务器或远程模型服务
+<sup>5</sup> Windows LMDeploy只能使用`turbomind`后端，速度比`pytorch`后端稍慢，如对速度有要求建议通过WSL2运行  
+<sup>6</sup> 兼容OpenAI API的服务器，如通过`vLLM`/`SGLang`/`LMDeploy`等推理框架部署的本地模型服务器或远程模型服务  
+<sup>7</sup> Windows + LMDeploy 由于关键依赖`ray`未能在windows平台支持Python 3.13，故仅支持至3.10~3.12版本
 
 > [!TIP]
 > 除以上主流环境与平台外，我们也收录了一些社区用户反馈的其他平台支持情况，详情请参考[其他加速卡适配](https://opendatalab.github.io/MinerU/zh/usage/)。  
@@ -700,8 +712,8 @@ uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
 ```
 
 > [!TIP]
-> `mineru[core]`包含除`vLLM`加速外的所有核心功能，兼容Windows / Linux / macOS系统，适合绝大多数用户。
-> 如果您有使用`vLLM`加速VLM模型推理，或是在边缘设备安装轻量版client端等需求，可以参考文档[扩展模块安装指南](https://opendatalab.github.io/MinerU/zh/quick_start/extension_modules/)。
+> `mineru[core]`包含除`vLLM`/`LMDeploy`加速外的所有核心功能，兼容Windows / Linux / macOS系统，适合绝大多数用户。
+> 如果您需要使用`vLLM`/`LMDeploy`加速VLM模型推理，或是有在边缘设备安装轻量版client端等需求，可以参考文档[扩展模块安装指南](https://opendatalab.github.io/MinerU/zh/quick_start/extension_modules/)。
 
 ---
 
 
@@ -236,5 +236,6 @@ def parse_doc(
     """To enable VLM mode, change the backend to 'vlm-xxx'"""
     # parse_doc(doc_path_list, output_dir, backend="vlm-transformers")  # more general.
     # parse_doc(doc_path_list, output_dir, backend="vlm-mlx-engine")  # faster than transformers in macOS 13.5+.
-    # parse_doc(doc_path_list, output_dir, backend="vlm-vllm-engine")  # faster(engine).
+    # parse_doc(doc_path_list, output_dir, backend="vlm-vllm-engine")  # faster(vllm-engine).
+    # parse_doc(doc_path_list, output_dir, backend="vlm-lmdeploy-engine")  # faster(lmdeploy-engine).
     # parse_doc(doc_path_list, output_dir, backend="vlm-http-client", server_url="http://127.0.0.1:30000")  # faster(client).
@@ -2,15 +2,9 @@
 # Compute Capability version query (https://developer.nvidia.com/cuda-gpus)
 FROM docker.m.daocloud.io/vllm/vllm-openai:v0.10.1.1
 
-# Use the official vllm image
-# FROM vllm/vllm-openai:v0.10.1.1
-
 # Use DaoCloud mirrored vllm image for China region for gpu with Turing architecture and below (Compute Capability<8.0)
 # FROM docker.m.daocloud.io/vllm/vllm-openai:v0.10.2
 
-# Use the official vllm image
-# FROM vllm/vllm-openai:v0.10.2
-
 # Install libgl for opencv support & Noto fonts for Chinese characters
 RUN apt-get update && \
     apt-get install -y \
 
@@ -0,0 +1,34 @@
+# 基础镜像配置 vLLM 或 LMDeploy 推理环境，请根据实际需要选择其中一个，要求 amd64(x86-64) CPU + metax GPU。
+# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + metax GPU.
+FROM cr.metax-tech.com/public-ai-release/maca/vllm:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-amd64
+# Base image containing the LMDeploy inference environment, requiring amd64(x86-64) CPU + metax GPU.
+# FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/maca:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-lmdeploy0.10.2-amd64
+
+# Install libgl for opencv support & Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig \
+        libgl1 && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# mod torchvision to be compatible with torch 2.6
+RUN sed -i '3s/^Version: 0.15.1+metax3\.1\.0\.4$/Version: 0.21.0+metax3.1.0.4/' /opt/conda/lib/python3.10/site-packages/torchvision-0.15.1+metax3.1.0.4.dist-info/METADATA && \
+    mv /opt/conda/lib/python3.10/site-packages/torchvision-0.15.1+metax3.1.0.4.dist-info /opt/conda/lib/python3.10/site-packages/torchvision-0.21.0+metax3.1.0.4.dist-info
+
+# Install mineru latest
+RUN /opt/conda/bin/python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
+    /opt/conda/bin/python3 -m pip install 'mineru[core]>=2.6.5' \
+                                           numpy==1.26.4 \
+                                           opencv-python==4.11.0.86 \
+                                           -i https://mirrors.aliyun.com/pypi/simple && \
+    /opt/conda/bin/python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN /bin/bash -c "/opt/conda/bin/mineru-models-download -s modelscope -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
@@ -0,0 +1,29 @@
+# 基础镜像配置 vLLM 或 LMDeploy ，请根据实际需要选择其中一个，要求 ARM(AArch64) CPU + Ascend NPU。
+# Base image containing the vLLM inference environment, requiring ARM(AArch64) CPU + Ascend NPU.
+FROM quay.io/ascend/vllm-ascend:v0.11.0rc1
+# Base image containing the LMDeploy inference environment, requiring ARM(AArch64) CPU + Ascend NPU.
+# FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ascend:mineru-a2
+
+
+# Install libgl for opencv support & Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig \
+        libgl1 \
+        libglib2.0-0 && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install mineru latest
+RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install -U 'mineru[core]>=2.6.5' -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN TORCH_DEVICE_BACKEND_AUTOLOAD=0 /bin/bash -c "mineru-models-download -s modelscope -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
@@ -0,0 +1,34 @@
+# 基础镜像配置 vLLM 或 LMDeploy 推理环境，请根据实际需要选择其中一个，要求 amd64(x86-64) CPU + t-head PPU。
+# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + t-head PPU.
+FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/ppu:ppu-pytorch2.6.0-ubuntu24.04-cuda12.6-vllm0.8.5-py312
+# Base image containing the LMDeploy inference environment, requiring amd64(x86-64) CPU + t-head PPU.
+# FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ppu:mineru-ppu
+
+# Install libgl for opencv support & Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig \
+        libgl1 && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install mineru latest
+RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip install 'mineru[core]>=2.6.5' \
+                            numpy==1.26.4 \
+                            opencv-python==4.11.0.86 \
+                            huggingface_hub==0.36.0 \
+                            dill==0.3.6 \
+                            setuptools==74.1.1 \
+                            tokenizers==0.21.1 \
+                            -i https://mirrors.aliyun.com/pypi/simple && \
+    python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]