Skip to content

Commit 20d65aa

Browse files
authored
[Frontend] Multithreaded async multimodal load_bytes (#22710)
Signed-off-by: Alexandre Milesi <[email protected]> Co-authored-by: Alexandre Milesi <[email protected]>
1 parent b159c0a commit 20d65aa

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

vllm/envs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
6464
VLLM_VIDEO_FETCH_TIMEOUT: int = 30
6565
VLLM_AUDIO_FETCH_TIMEOUT: int = 10
66+
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
6667
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
6768
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
6869
VLLM_MM_INPUT_CACHE_GIB: int = 4
@@ -555,6 +556,12 @@ def get_vllm_port() -> Optional[int]:
555556
"VLLM_AUDIO_FETCH_TIMEOUT":
556557
lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),
557558

559+
# Max number of workers for the thread pool handling
560+
# media bytes loading. Set to 1 to disable parallel processing.
561+
# Default is 8
562+
"VLLM_MEDIA_LOADING_THREAD_COUNT":
563+
lambda: int(os.getenv("VLLM_MEDIA_LOADING_THREAD_COUNT", "8")),
564+
558565
# Maximum filesize in MB for a single audio file when processing
559566
# speech-to-text requests. Files larger than this will be rejected.
560567
# Default is 25 MB

vllm/multimodal/utils.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import asyncio
5+
import atexit
6+
from concurrent.futures import ThreadPoolExecutor
47
from itertools import groupby
58
from pathlib import Path
69
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
@@ -33,6 +36,10 @@
3336
MultiModalKwargs = Any
3437
MultiModalPlaceholderDict = Any
3538

39+
global_thread_pool = ThreadPoolExecutor(
40+
max_workers=envs.VLLM_MEDIA_LOADING_THREAD_COUNT)
41+
atexit.register(global_thread_pool.shutdown)
42+
3643

3744
class MediaConnector:
3845

@@ -139,19 +146,26 @@ async def load_from_url_async(
139146
fetch_timeout: Optional[int] = None,
140147
) -> _M:
141148
url_spec = urlparse(url)
149+
loop = asyncio.get_running_loop()
142150

143151
if url_spec.scheme.startswith("http"):
144152
connection = self.connection
145153
data = await connection.async_get_bytes(url, timeout=fetch_timeout)
146-
147-
return media_io.load_bytes(data)
154+
future = loop.run_in_executor(global_thread_pool,
155+
media_io.load_bytes, data)
156+
return await future
148157

149158
if url_spec.scheme == "data":
150-
return self._load_data_url(url_spec, media_io)
159+
future = loop.run_in_executor(global_thread_pool,
160+
self._load_data_url, url_spec,
161+
media_io)
162+
return await future
151163

152164
if url_spec.scheme == "file":
153-
return self._load_file_url(url_spec, media_io)
154-
165+
future = loop.run_in_executor(global_thread_pool,
166+
self._load_file_url, url_spec,
167+
media_io)
168+
return await future
155169
msg = "The URL must be either a HTTP, data or file URL."
156170
raise ValueError(msg)
157171

@@ -489,4 +503,4 @@ def fetch_video(
489503
"video": video_io_kwargs
490504
}
491505
media_connector = MediaConnector(media_io_kwargs=media_io_kwargs)
492-
return media_connector.fetch_video(video_url)
506+
return media_connector.fetch_video(video_url)

0 commit comments

Comments
 (0)