add MINERU_MODELS_DIR、npu

hzkitty · hzkitty · commit 37d5a1050ea3 · 2025-11-20T21:25:59.000+08:00
diff --git a/demo.py b/demo.py
@@ -9,7 +9,8 @@
 # os.environ['MINERU_DEVICE_MODE'] = "cuda"
 # # 或指定 GPU 编号，例如使用第二块 GPU（cuda:1）
 # os.environ['MINERU_DEVICE_MODE'] = "cuda:1"
-
+# # 模型文件存储目录
+# os.environ['MINERU_MODELS_DIR'] = r'D:\CodeProjects\doc\RapidAI\models' #模型文件存储目录，如果不设置会默认下载到rapid_doc项目里面
 from loguru import logger
 
 from rapid_doc.cli.common import convert_pdf_bytes_to_bytes_by_pypdfium2, prepare_env, read_fn
diff --git a/demo/demo.py b/demo/demo.py
@@ -7,6 +7,8 @@
 # os.environ['MINERU_DEVICE_MODE'] = "cuda"
 # # 或指定 GPU 编号，例如使用第二块 GPU（cuda:1）
 # os.environ['MINERU_DEVICE_MODE'] = "cuda:1"
+# # 模型文件存储目录
+# os.environ['MINERU_MODELS_DIR'] = r'D:\CodeProjects\doc\RapidAI\models' #模型文件存储目录，如果不设置会默认下载到rapid_doc项目里面
 from loguru import logger
 
 from rapid_doc.cli.common import convert_pdf_bytes_to_bytes_by_pypdfium2, prepare_env, read_fn
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -43,6 +43,7 @@ ENV API_PORT=8888
 ENV STARTUP_WAIT_TIME=15
 ENV LOG_LEVEL=INFO
 ENV MINERU_DEVICE_MODE=cpu
+ENV MINERU_MODELS_DIR=/app/models
 # 下载默认模型文件实现离线部署
 RUN python3 download_models.py
 
diff --git a/docker/DockerfileGPU b/docker/DockerfileGPU
@@ -51,6 +51,7 @@ ENV API_PORT=8888
 ENV STARTUP_WAIT_TIME=15
 ENV LOG_LEVEL=INFO
 ENV MINERU_DEVICE_MODE=cuda:0
+ENV MINERU_MODELS_DIR=/app/models
 # 下载默认模型文件实现离线部署
 RUN python3 download_models.py
 
diff --git a/docker/README.md b/docker/README.md
@@ -72,3 +72,4 @@ curl -X POST "http://localhost:8888/parse" \
 |--------|--------|------|
 | `STARTUP_WAIT_TIME` | `15` | 启动等待时间（秒） |
 | `LOG_LEVEL` | `INFO` | 日志级别 |
+| `MINERU_MODELS_DIR` | `/app/models` | 模型文件存储目录 |
diff --git a/docker/app.py b/docker/app.py
@@ -88,7 +88,7 @@ def _convert_value_to_enum(config):
         return config
     from rapidocr import EngineType as OCREngineType, OCRVersion, ModelType as OCRModelType, LangDet, LangRec
     from rapid_doc.model.layout.rapid_layout_self import ModelType as LayoutModelType
-    from rapid_doc.model.formula.rapid_formula_self import ModelType as FormulaModelType
+    from rapid_doc.model.formula.rapid_formula_self import ModelType as FormulaModelType, EngineType as FormulaEngineType
     from rapid_doc.model.table.rapid_table_self import ModelType as TableModelType
 
     # 可识别的枚举类映射表（可扩展）
@@ -100,6 +100,7 @@ def _convert_value_to_enum(config):
         "LangRec": LangRec,
         "LayoutModelType": LayoutModelType,
         "FormulaModelType": FormulaModelType,
+        "FormulaEngineType": FormulaEngineType,
         "TableModelType": TableModelType,
     }
 
diff --git a/docker/download_models.py b/docker/download_models.py
@@ -1,159 +1,14 @@
+#!/usr/bin/env python3
+"""
+Model download script for Docker build
+Downloads pipeline models for offline deployment
+"""
+import os
 import sys
-from pathlib import Path
-from typing import Union
-from loguru import logger
-from omegaconf import DictConfig, OmegaConf
-from download_file import DownloadFileInput, DownloadFile
-
-def read_yaml(file_path: Union[str, Path]) -> DictConfig:
-    return OmegaConf.load(file_path)
-
-def default_download(models_pkg, configs_pkg):
-    # 获取 models 模块的目录
-    model_dir = Path(models_pkg.__path__[0])
-    # 获取 configs 模块所在目录
-    configs_dir = Path(configs_pkg.__file__).parent
-    # 拼接 default_models.yaml 文件路径
-    default_models_yaml = configs_dir / "default_models.yaml"
-    model_map = read_yaml(default_models_yaml)
-
-    for model_name, model_info in model_map.items():
-        if model_name in ['unitable']:
-            # multi_models
-            model_root_dir = model_info["model_dir_or_path"]
-            save_model_dir = model_dir / Path(model_root_dir).name
-            for file_name, sha256 in model_info["SHA256"].items():
-                save_path = save_model_dir / file_name
-
-                download_params = DownloadFileInput(
-                    file_url=f"{model_root_dir}/{file_name}",
-                    sha256=sha256,
-                    save_path=save_path,
-                )
-                DownloadFile.run(download_params)
-        elif model_name in ['onnxruntime', 'torch', 'openvino']:
-            for name, item_model_info in model_info.items():
-                model_dir_or_path = item_model_info["model_dir_or_path"]
-                sha256 = item_model_info["SHA256"]
-                save_model_path = (
-                        model_dir / Path(model_dir_or_path).name
-                )
-                download_params = DownloadFileInput(
-                    file_url=model_dir_or_path,
-                    sha256=sha256,
-                    save_path=save_model_path,
-                )
-                DownloadFile.run(download_params)
-
-                # 如果有字典文件，下载字典
-                dict_download_url = item_model_info.get("dict_url")
-                if dict_download_url:
-                    dict_path = (model_dir / Path(dict_download_url).name)
-                if dict_download_url and not Path(dict_path).exists():
-                    DownloadFile.run(
-                        DownloadFileInput(
-                            file_url=dict_download_url,
-                            sha256=None,
-                            save_path=dict_path,
-                        )
-                    )
-        else:
-            model_dir_or_path = model_info["model_dir_or_path"]
-            sha256 = model_info["SHA256"]
-
-            save_model_path = (
-                    model_dir / Path(model_dir_or_path).name
-            )
-            download_params = DownloadFileInput(
-                file_url=model_dir_or_path,
-                sha256=sha256,
-                save_path=save_model_path,
-            )
-            DownloadFile.run(download_params)
-
-def ocr_download(models_pkg, configs_pkg):
-    # 获取 models 模块的目录
-    model_dir = Path(models_pkg.__path__[0])
-    # 获取 configs 模块所在目录
-    configs_dir = Path(configs_pkg.__file__).parent
-    # 拼接 default_models.yaml 文件路径
-    default_models_yaml = configs_dir / "default_models.yaml"
-    model_map = read_yaml(default_models_yaml)
-
-    for engin_name, engin_info in model_map.items(): # model_info为onnxruntime层级
-        if engin_name in ['openvino', 'torch', 'fonts']:
-            if engin_name == 'fonts':
-                for lang, font_info in engin_info.items():
-                    font_path = font_info["path"]
-                    font_sha256 = font_info["SHA256"]
-
-                    font_save_model_path = (
-                            model_dir / Path(font_path).name
-                    )
-                    download_params = DownloadFileInput(
-                        file_url=font_path,
-                        sha256=font_sha256,
-                        save_path=font_save_model_path,
-                    )
-                    DownloadFile.run(download_params)
-            else:
-                for version, ocr_info in engin_info.items(): # ocr_info为PP-OCRv4层级
-                    for det, det_info in ocr_info.items(): # info为det层级
-                        for model_name, model_info in det_info.items():
-                            # 如果有字典文件，下载字典
-                            dict_download_url = model_info.get("dict_url")
-                            if dict_download_url:
-                                dict_path = (model_dir / Path(dict_download_url).name)
-                            if dict_download_url and not Path(dict_path).exists():
-                                DownloadFile.run(
-                                    DownloadFileInput(
-                                        file_url=dict_download_url,
-                                        sha256=None,
-                                        save_path=dict_path,
-                                    )
-                                )
-                            # 下载模型
-                            model_path = model_dir / Path(model_info["model_dir"]).name
-                            download_params = DownloadFileInput(
-                                file_url=model_info["model_dir"],
-                                sha256=model_info["SHA256"],
-                                save_path=model_path,
-                            )
-                            DownloadFile.run(download_params)
-
-def download_pipeline_models():
-    """下载Pipeline模型"""
-    try:
-        # # 下载版面识别模型
-        # logger.info('开始下载版面识别模型...')
-        # import rapid_doc.model.layout.rapid_layout_self.models as layout_models_pkg
-        # import rapid_doc.model.layout.rapid_layout_self.configs as layout_configs_pkg
-        # default_download(layout_models_pkg, layout_configs_pkg)
-        #
-        # # 下载公式识别模型
-        # logger.info('开始下载公式识别模型...')
-        # import rapid_doc.model.formula.rapid_formula_self.models as formula_models_pkg
-        # import rapid_doc.model.formula.rapid_formula_self.configs as formula_configs_pkg
-        # default_download(formula_models_pkg, formula_configs_pkg)
-
-        # 下载表格识别模型
-        logger.info('开始下载表格识别模型...')
-        import rapid_doc.model.table.rapid_table_self.models as table_models_pkg
-        import rapid_doc.model.table.rapid_table_self as table_configs_pkg
-        default_download(table_models_pkg, table_configs_pkg)
-
-        # # 下载OCR模型
-        # logger.info('开始下载OCR模型...')
-        # import rapidocr.models as ocr_models_pkg
-        # import rapidocr as ocr_configs_pkg
-        # ocr_download(ocr_models_pkg, ocr_configs_pkg)
-        # logger.info('所有模型下载完成: success download')
-        return True
-    except Exception as e:
-        logger.error(f'模型下载失败: {e}')
-    return True
-
+from rapid_doc.utils.models_download_utils import download_pipeline_models
 
 if __name__ == '__main__':
+    os.environ['MINERU_MODELS_DIR'] = r'D:\CodeProjects\doc\RapidAI\models' #模型文件存储目录
+    os.environ["MINERU_DEVICE_MODE"] = "cpu" # cpu、cuda、npu、all（all只是用来下载）
     success = download_pipeline_models()
     sys.exit(0 if success else 1)
diff --git a/docs/analyze_param.md b/docs/analyze_param.md
@@ -18,6 +18,15 @@ def doc_analyze(
 )
 ```
 在mineru参数基础上新增了layout_config、ocr_config、formula_config、table_config、checkbox_config参数
+
+#### 0、环境变量
+```bash
+# 用于指定推理设备。支持cpu/cuda/cuda:0/npu等设备类型
+os.environ['MINERU_DEVICE_MODE'] = "cpu"
+
+# 模型文件存储目录。如果不设置会默认下载到rapid_doc项目里面
+os.environ['MINERU_MODELS_DIR'] = r'D:\CodeProjects\doc\RapidAI\models'
+```
 #### 1、使用gpu推理
 ```bash
 # 在安装完 rapid_doc 之后，卸载 cpu 版的 onnxruntime
diff --git a/magic.json b/magic.json
@@ -13,5 +13,5 @@
             "right": "$"
         }
     },
-    "config_version": "1.3.0"
+    "config_version": "1.3.1"
 }
diff --git a/rapid_doc/model/formula/rapid_formula_model.py b/rapid_doc/model/formula/rapid_formula_model.py
@@ -1,7 +1,14 @@
+import os
 import time
+from pathlib import Path
 
 from rapid_doc.model.formula.rapid_formula_self import ModelType, RapidFormula, RapidFormulaInput, EngineType
 from rapid_doc.utils.config_reader import get_device
+from rapid_doc.model.formula.rapid_formula_self.model_handler import ModelProcessor
+models_dir = os.getenv('MINERU_MODELS_DIR', None)
+if models_dir:
+    # 从指定的文件夹内寻找模型文件
+    ModelProcessor.DEFAULT_MODEL_DIR = Path(models_dir)
 
 class RapidFormulaModel(object):
     def __init__(self, formula_config=None):
@@ -14,6 +21,12 @@ def __init__(self, formula_config=None):
             cfg.engine_cfg = engine_cfg
             cfg.model_type = ModelType.PP_FORMULANET_PLUS_M
             cfg.engine_type = EngineType.TORCH
+        elif device.startswith('npu'):
+            device_id = int(device.split(':')[1]) if ':' in device else 0  # npu 编号
+            engine_cfg = {'use_npu': True, "npu_id": device_id}
+            cfg.engine_cfg = engine_cfg
+            cfg.model_type = ModelType.PP_FORMULANET_PLUS_M
+            cfg.engine_type = EngineType.TORCH
         # 如果传入了 formula_config，则用传入配置覆盖默认配置
         if formula_config is not None:
             # 遍历字典，把传入配置设置到 default_cfg 对象中
diff --git a/rapid_doc/model/formula/rapid_formula_self/configs/default_models.yaml b/rapid_doc/model/formula/rapid_formula_self/configs/default_models.yaml
@@ -1,15 +1,3 @@
-#pp_formulanet_plus_s:
-#  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidDoc/resolve/v1.0.0/formula/PP-FormulaNet_plus-S/pp_formulanet_plus_s.onnx
-#  SHA256: 30998d10c94ccff1ad8981df0c71048cb1f3eec7b1e515b809767f1f72aebe3b
-#
-#pp_formulanet_plus_m:
-#  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidDoc/resolve/v1.0.0/formula/PP-FormulaNet_plus-M/pp_formulanet_plus_m.onnx
-#  SHA256: 71b6d389cf7b857e45252a4b98cfced1a3ffca7bf24d9497d02d052a41d9493b
-#
-#pp_formulanet_plus_l:
-#  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidDoc/resolve/v1.0.0/formula/PP-FormulaNet_plus-L/pp_formulanet_plus_l.onnx
-#  SHA256: 5ef81a0b197ea2c8c1463b31c3eb2ad0ae1eb655fb1ff3b550858c7d85bc84e8
-
 onnxruntime:
   pp_formulanet_plus_s:
     model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidDoc/resolve/v1.0.0/formula/PP-FormulaNet_plus-S/pp_formulanet_plus_s.onnx
diff --git a/rapid_doc/model/formula/rapid_formula_self/inference_engine/base.py b/rapid_doc/model/formula/rapid_formula_self/inference_engine/base.py
@@ -14,12 +14,8 @@
 
 class InferSession(ABC):
     cur_dir = Path(__file__).resolve().parent.parent
-    # MODEL_URL_PATH = cur_dir / "configs" / "default_models.yaml"
     ENGINE_CFG_PATH = cur_dir / "configs" / "engine_cfg.yaml"
 
-    # model_info = OmegaConf.load(MODEL_URL_PATH)
-    # DEFAULT_MODEL_PATH = cur_dir / "models"
-
     engine_cfg = OmegaConf.load(ENGINE_CFG_PATH)
 
     @abstractmethod
diff --git a/rapid_doc/model/layout/rapid_layout.py b/rapid_doc/model/layout/rapid_layout.py
@@ -1,9 +1,16 @@
+import os
 import cv2
+from pathlib import Path
 
 from rapid_doc.model.layout.rapid_layout_self import ModelType, RapidLayout, RapidLayoutInput
 from rapid_doc.model.layout.rapid_layout_self.utils.typings import PP_DOCLAYOUT_PLUS_L_Threshold, PP_DOCLAYOUT_L_Threshold
 from rapid_doc.utils.config_reader import get_device
 from rapid_doc.utils.enum_class import CategoryId
+from rapid_doc.model.layout.rapid_layout_self.model_handler import ModelProcessor
+models_dir = os.getenv('MINERU_MODELS_DIR', None)
+if models_dir:
+    # 从指定的文件夹内寻找模型文件
+    ModelProcessor.DEFAULT_MODEL_DIR = Path(models_dir)
 
 class RapidLayoutModel(object):
     def __init__(self, layout_config=None):
@@ -14,7 +21,10 @@ def __init__(self, layout_config=None):
             device_id = int(device.split(':')[1]) if ':' in device else 0  # GPU 编号
             engine_cfg = {'use_cuda': True, "cuda_ep_cfg.device_id": device_id}
             cfg.engine_cfg = engine_cfg
-
+        elif device.startswith('npu'):
+            device_id = int(device.split(':')[1]) if ':' in device else 0  # npu 编号
+            engine_cfg = {'use_cann': True, "cann_ep_cfg.device_id": device_id}
+            cfg.engine_cfg = engine_cfg
         # 如果传入了 layout_config，则用传入配置覆盖默认配置
         if layout_config is not None:
             if not layout_config.get("conf_thresh"):
@@ -32,6 +42,7 @@ def __init__(self, layout_config=None):
                 if hasattr(cfg, key):
                     setattr(cfg, key, value)
                     setattr(cfg, key, value)
+
         self.model = RapidLayout(cfg=cfg)
         self.model_type = cfg.model_type
         self.doclayout_yolo_list = ['title', 'plain text', 'abandon', 'figure', 'figure_caption', 'table', 'table_caption', 'table_footnote', 'isolate_formula', 'formula_caption',
diff --git a/rapid_doc/model/layout/rapid_layout_self/inference_engine/base.py b/rapid_doc/model/layout/rapid_layout_self/inference_engine/base.py
@@ -14,12 +14,8 @@
 
 class InferSession(ABC):
     cur_dir = Path(__file__).resolve().parent.parent
-    MODEL_URL_PATH = cur_dir / "configs" / "default_models.yaml"
     ENGINE_CFG_PATH = cur_dir / "configs" / "engine_cfg.yaml"
 
-    model_info = OmegaConf.load(MODEL_URL_PATH)
-    DEFAULT_MODEL_PATH = cur_dir / "models"
-
     engine_cfg = OmegaConf.load(ENGINE_CFG_PATH)
 
     @abstractmethod
diff --git a/rapid_doc/model/layout/rapid_layout_self/model_handler/utils.py b/rapid_doc/model/layout/rapid_layout_self/model_handler/utils.py
@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Dict
 
 from ..utils.download_file import DownloadFile, DownloadFileInput
 from ..utils.logger import Logger
@@ -38,25 +37,3 @@ def get_single_model_path(cls, model_type: ModelType) -> str:
         DownloadFile.run(download_params)
 
         return str(save_model_path)
-
-    @classmethod
-    def get_multi_models_dict(cls, model_type: ModelType) -> Dict[str, str]:
-        model_info = cls.model_map[model_type.value]
-
-        results = {}
-
-        model_root_dir = model_info["model_dir_or_path"]
-        save_model_dir = cls.DEFAULT_MODEL_DIR / Path(model_root_dir).name
-        for file_name, sha256 in model_info["SHA256"].items():
-            save_path = save_model_dir / file_name
-
-            download_params = DownloadFileInput(
-                file_url=f"{model_root_dir}/{file_name}",
-                sha256=sha256,
-                save_path=save_path,
-                logger=cls.logger,
-            )
-            DownloadFile.run(download_params)
-            results[Path(file_name).stem] = str(save_path)
-
-        return results
diff --git a/rapid_doc/model/ocr/rapid_ocr.py b/rapid_doc/model/ocr/rapid_ocr.py
diff --git a/rapid_doc/model/table/rapid_table.py b/rapid_doc/model/table/rapid_table.py
diff --git a/rapid_doc/model/table/rapid_table_self/main.py b/rapid_doc/model/table/rapid_table_self/main.py
diff --git a/rapid_doc/utils/download_file.py b/rapid_doc/utils/download_file.py
diff --git a/rapid_doc/utils/models_download_utils.py b/rapid_doc/utils/models_download_utils.py

Original file line number	Diff line number	Diff line change
`@@ -13,5 +13,5 @@`
`13`	`13`	`"right": "$"`
`14`	`14`	`}`
`15`	`15`	`},`
`16`		`- "config_version": "1.3.0"`
	`16`	`+ "config_version": "1.3.1"`
`17`	`17`	`}`