Skip to content

Commit ce639ec

Browse files
authored
Merge branch 'opendatalab:dev' into dev
2 parents 3dbfd14 + 1f35a53 commit ce639ec

File tree

4 files changed

+58
-11
lines changed

4 files changed

+58
-11
lines changed

mineru_vl_utils/mineru_client.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def resize_by_need(self, image: Image.Image) -> Image.Image:
121121
image = new_image
122122
if min(image.size) < self.min_image_edge:
123123
scale = self.min_image_edge / min(image.size)
124-
new_w, new_h = round(image.width * scale), round(image.height * scale)
124+
new_w, new_h = math.ceil(image.width * scale), math.ceil(image.height * scale)
125125
image = image.resize((new_w, new_h), Image.Resampling.BICUBIC)
126126
return image
127127

@@ -315,6 +315,10 @@ def __init__(
315315
executor: Executor | None = None,
316316
batch_size: int = 0, # for transformers and vllm-engine
317317
http_timeout: int = 600, # for http-client backend only
318+
connect_timeout: int = 10, # for http-client backend only
319+
max_connections: int | None = None, # for http-client backend only
320+
max_keepalive_connections: int | None = 20, # for http-client backend only
321+
keepalive_expiry: float | None = 5, # for http-client backend only
318322
use_tqdm: bool = True,
319323
debug: bool = False,
320324
max_retries: int = 3, # for http-client backend only
@@ -421,6 +425,10 @@ def __init__(
421425
max_concurrency=max_concurrency,
422426
batch_size=batch_size,
423427
http_timeout=http_timeout,
428+
connect_timeout=connect_timeout,
429+
max_connections=max_connections,
430+
max_keepalive_connections=max_keepalive_connections,
431+
keepalive_expiry=keepalive_expiry,
424432
use_tqdm=use_tqdm,
425433
debug=debug,
426434
max_retries=max_retries,

mineru_vl_utils/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.19.1"
1+
__version__ = "0.1.21"

mineru_vl_utils/vlm_client/base_client.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,15 @@ def new_vlm_client(
165165
max_concurrency: int = 100,
166166
batch_size: int = 0,
167167
http_timeout: int = 600,
168+
connect_timeout: int = 10,
169+
max_connections: int | None = None,
170+
max_keepalive_connections: int | None = 20,
171+
keepalive_expiry: float | None = 5,
168172
use_tqdm: bool = True,
169173
debug: bool = False,
170174
max_retries: int = 3,
171175
retry_backoff_factor: float = 0.5,
172176
) -> VlmClient:
173-
174177
if backend == "http-client":
175178
from .http_client import HttpVlmClient
176179

@@ -185,6 +188,10 @@ def new_vlm_client(
185188
allow_truncated_content=allow_truncated_content,
186189
max_concurrency=max_concurrency,
187190
http_timeout=http_timeout,
191+
connect_timeout=connect_timeout,
192+
max_connections=max_connections,
193+
max_keepalive_connections=max_keepalive_connections,
194+
keepalive_expiry=keepalive_expiry,
188195
debug=debug,
189196
max_retries=max_retries,
190197
retry_backoff_factor=retry_backoff_factor,

mineru_vl_utils/vlm_client/http_client.py

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
import httpx
88
from httpx_retries import Retry, RetryTransport
9-
from PIL import Image
109
from loguru import logger
10+
from PIL import Image
1111

1212
from .base_client import (
1313
DEFAULT_SYSTEM_PROMPT,
@@ -48,6 +48,10 @@ def __init__(
4848
allow_truncated_content: bool = False,
4949
max_concurrency: int = 100,
5050
http_timeout: int = 600,
51+
connect_timeout: int = 10,
52+
max_connections: int | None = None,
53+
max_keepalive_connections: int | None = 20,
54+
keepalive_expiry: float | None = 5,
5155
debug: bool = False,
5256
max_retries: int = 3,
5357
retry_backoff_factor: float = 0.5,
@@ -81,6 +85,10 @@ def __init__(
8185
self.server_headers = server_headers
8286

8387
self.http_timeout = http_timeout
88+
self.connect_timeout = connect_timeout
89+
self.max_connections = max_connections
90+
self.max_keepalive_connections = max_keepalive_connections
91+
self.keepalive_expiry = keepalive_expiry
8492
self.max_retries = max_retries
8593
self.retry_backoff_factor = retry_backoff_factor
8694

@@ -102,23 +110,47 @@ def chat_url(self) -> str:
102110
def _new_client(self) -> httpx.Client:
103111
return httpx.Client(
104112
headers=self.server_headers,
105-
timeout=httpx.Timeout(connect=10.0, read=self.http_timeout, write=self.http_timeout, pool=None),
113+
timeout=httpx.Timeout(
114+
connect=self.connect_timeout,
115+
read=self.http_timeout,
116+
write=self.http_timeout,
117+
pool=None,
118+
),
106119
transport=RetryTransport(
107-
retry=Retry(total=self.max_retries, backoff_factor=self.retry_backoff_factor),
120+
retry=Retry(
121+
total=self.max_retries,
122+
backoff_factor=self.retry_backoff_factor,
123+
),
108124
transport=httpx.HTTPTransport(
109-
limits=httpx.Limits(max_connections=None, max_keepalive_connections=20),
125+
limits=httpx.Limits(
126+
max_connections=self.max_connections,
127+
max_keepalive_connections=self.max_keepalive_connections,
128+
keepalive_expiry=self.keepalive_expiry,
129+
),
110130
),
111131
),
112132
)
113133

114134
async def _new_aio_client(self) -> httpx.AsyncClient:
115135
return httpx.AsyncClient(
116136
headers=self.server_headers,
117-
timeout=httpx.Timeout(connect=10.0, read=self.http_timeout, write=self.http_timeout, pool=None),
137+
timeout=httpx.Timeout(
138+
connect=self.connect_timeout,
139+
read=self.http_timeout,
140+
write=self.http_timeout,
141+
pool=None,
142+
),
118143
transport=RetryTransport(
119-
retry=Retry(total=self.max_retries, backoff_factor=self.retry_backoff_factor),
144+
retry=Retry(
145+
total=self.max_retries,
146+
backoff_factor=self.retry_backoff_factor,
147+
),
120148
transport=httpx.AsyncHTTPTransport(
121-
limits=httpx.Limits(max_connections=None, max_keepalive_connections=20),
149+
limits=httpx.Limits(
150+
max_connections=self.max_connections,
151+
max_keepalive_connections=self.max_keepalive_connections,
152+
keepalive_expiry=self.keepalive_expiry,
153+
),
122154
),
123155
),
124156
)
@@ -290,7 +322,7 @@ def get_response_content(self, response_data: dict) -> str:
290322
# Set MINERU_VLM_END_TOKEN to override or disable stripping (e.g., set to an empty string).
291323
end_token = os.getenv("MINERU_VLM_END_TOKEN", "<|im_end|>")
292324
if end_token and isinstance(content, str) and content.endswith(end_token):
293-
content = content[:-len(end_token)]
325+
content = content[: -len(end_token)]
294326
return content or ""
295327

296328
def predict(

0 commit comments

Comments
 (0)