Merge pull request #50 from opendatalab/dev

myhloli · web-flow · commit 93396f26209c · 2026-01-09T17:18:52.000+08:00
Dev
diff --git a/mineru_vl_utils/vlm_client/http_client.py b/mineru_vl_utils/vlm_client/http_client.py
@@ -7,6 +7,7 @@
 import httpx
 from httpx_retries import Retry, RetryTransport
 from PIL import Image
+from loguru import logger
 
 from .base_client import (
     DEFAULT_SYSTEM_PROMPT,
@@ -63,14 +64,22 @@ def __init__(
 
         if not server_url:
             server_url = _get_env("MINERU_VL_SERVER")
-
         if server_url.endswith("/"):  # keep server_url if it ends with '/'
             server_url = server_url.rstrip("/")
         else:  # use base_url if it does not end with '/' (backward compatibility)
             server_url = self._get_base_url(server_url)
-
         self.server_url = server_url
-        self.server_headers = server_headers
+
+        api_key = os.getenv("MINERU_VL_API_KEY", "").strip()
+        if api_key:
+            headers = dict(server_headers) if server_headers else {}
+            if "Authorization" in headers:
+                logger.warning("Overriding existing 'Authorization' header with MINERU_VL_API_KEY from environment variable.")
+            headers["Authorization"] = f"Bearer {api_key}"
+            self.server_headers = headers
+        else:
+            self.server_headers = server_headers
+
         self.http_timeout = http_timeout
         self.max_retries = max_retries
         self.retry_backoff_factor = retry_backoff_factor
@@ -79,6 +88,7 @@ def __init__(
         self._aio_client_sem = asyncio.Semaphore(1)
         self._aio_client_cache: dict[asyncio.AbstractEventLoop, httpx.AsyncClient] = {}
 
+        model_name = model_name or os.getenv("MINERU_VL_MODEL_NAME")
         if model_name:
             self._check_model_name(self.server_url, model_name)
             self.model_name = model_name
@@ -164,7 +174,8 @@ def _get_model_name(self, base_url: str) -> str:
             raise RequestError(f"No models found in response from {base_url}. Response body: {response.text}")
         if len(models) != 1:
             raise RequestError(
-                f"Expected exactly one model from {base_url}, but got {len(models)}. Please specify the model name."
+                f"Expected exactly one model from {base_url}, but got {len(models)}. Please specify the model name"
+                f" or set the `MINERU_VL_MODEL_NAME` environment variable."
             )
         model_name = models[0].get("id", "")
         if not model_name:
@@ -275,6 +286,11 @@ def get_response_content(self, response_data: dict) -> str:
         content = message["content"]
         if not (content is None or isinstance(content, str)):
             raise ServerError(f"Unexpected content type: {type(content)}.")
+        # Allow the end token to be configured via environment variable, falling back to the default.
+        # Set MINERU_VLM_END_TOKEN to override or disable stripping (e.g., set to an empty string).
+        end_token = os.getenv("MINERU_VLM_END_TOKEN", "<|im_end|>")
+        if end_token and isinstance(content, str) and content.endswith(end_token):
+            content = content[:-len(end_token)]
         return content or ""
 
     def predict(
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,11 +14,12 @@ description = "Utilities for MinerU Vision-Language models"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
 dependencies = [
-  "httpx",
-  "httpx-retries",
-  "aiofiles",
-  "pillow",
-  "pydantic",
+    "httpx",
+    "httpx-retries",
+    "aiofiles",
+    "pillow",
+    "pydantic",
+    "loguru",
 ]
 classifiers = [
   "Programming Language :: Python :: 3",
@@ -33,20 +34,20 @@ Issues = "https://github.com/opendatalab/mineru-vl-utils/issues"
 
 [project.optional-dependencies]
 transformers = [
-  "torch>=2.6.0,<3",
-  "transformers>=4.51.1,<5.0.0",
-  "accelerate>=1.5.1",
-  "torchvision",
+    "torch>=2.6.0,<3",
+    "transformers>=4.51.1,<5.0.0",
+    "accelerate>=1.5.1",
+    "torchvision",
 ]
 vllm = [
-  "vllm>=0.10.0,<=0.11.0",
+    "vllm>=0.10.1.1,<0.12",
 ]
 mlx = [
-  "mlx-vlm>=0.3.3,<0.4.0",
+    "mlx-vlm>=0.3.3,<0.4.0",
 ]
 lmdeploy = [
-  "lmdeploy>=0.10.2",
-  "qwen_vl_utils",
+    "lmdeploy>=0.10.2,<0.12",
+    "qwen_vl_utils",
 ]
 
 [tool.setuptools.dynamic]