File tree Expand file tree Collapse file tree 3 files changed +16
-5
lines changed Expand file tree Collapse file tree 3 files changed +16
-5
lines changed Original file line number Diff line number Diff line change @@ -351,6 +351,11 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai
351
351
Code example: < gh-file:examples/online_serving/openai_transcription_client.py >
352
352
<!-- TODO: api enforced limits + uploading audios -->
353
353
354
+ #### API Enforced Limits
355
+
356
+ Set the maximum audio file size (in MB) that VLLM will accept, via the
357
+ ` VLLM_MAX_AUDIO_CLIP_FILESIZE_MB ` environment variable. Default is 25 MB.
358
+
354
359
#### Extra Parameters
355
360
356
361
The following [ sampling parameters] [ sampling-params ] are supported.
Original file line number Diff line number Diff line change 11
11
import numpy as np
12
12
from fastapi import Request
13
13
14
+ import vllm .envs as envs
14
15
from vllm .config import ModelConfig
15
16
from vllm .engine .protocol import EngineClient
16
17
from vllm .entrypoints .logger import RequestLogger
38
39
39
40
logger = init_logger (__name__ )
40
41
41
- # As per https://platform.openai.com/docs/guides/speech-to-text#overview.
42
- # TODO configurable
43
- MAX_AUDIO_CLIP_FILESIZE_MB = 25
44
-
45
42
46
43
class OpenAISpeechToText (OpenAIServing ):
47
44
"""Base class for speech-to-text operations like transcription and
@@ -70,6 +67,8 @@ def __init__(
70
67
self .asr_config = self .model_cls .get_speech_to_text_config (
71
68
model_config , task_type )
72
69
70
+ self .max_audio_filesize_mb = envs .VLLM_MAX_AUDIO_CLIP_FILESIZE_MB
71
+
73
72
if self .default_sampling_params :
74
73
logger .info (
75
74
"Overwriting default completion sampling param with: %s" ,
@@ -93,7 +92,7 @@ async def _preprocess_speech_to_text(
93
92
lang = request .language or "en"
94
93
self .model_cls .validate_language (lang )
95
94
96
- if len (audio_data ) / 1024 ** 2 > MAX_AUDIO_CLIP_FILESIZE_MB :
95
+ if len (audio_data ) / 1024 ** 2 > self . max_audio_filesize_mb :
97
96
raise ValueError ("Maximum file size exceeded." )
98
97
99
98
with io .BytesIO (audio_data ) as bytes_ :
Original file line number Diff line number Diff line change 61
61
VLLM_IMAGE_FETCH_TIMEOUT : int = 5
62
62
VLLM_VIDEO_FETCH_TIMEOUT : int = 30
63
63
VLLM_AUDIO_FETCH_TIMEOUT : int = 10
64
+ VLLM_MAX_AUDIO_CLIP_FILESIZE_MB : int = 25
64
65
VLLM_VIDEO_LOADER_BACKEND : str = "opencv"
65
66
VLLM_MM_INPUT_CACHE_GIB : int = 8
66
67
VLLM_TARGET_DEVICE : str = "cuda"
@@ -519,6 +520,12 @@ def get_vllm_port() -> Optional[int]:
519
520
"VLLM_AUDIO_FETCH_TIMEOUT" :
520
521
lambda : int (os .getenv ("VLLM_AUDIO_FETCH_TIMEOUT" , "10" )),
521
522
523
+ # Maximum filesize in MB for a single audio file when processing
524
+ # speech-to-text requests. Files larger than this will be rejected.
525
+ # Default is 25 MB
526
+ "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB" :
527
+ lambda : int (os .getenv ("VLLM_MAX_AUDIO_CLIP_FILESIZE_MB" , "25" )),
528
+
522
529
# Backend for Video IO
523
530
# - "opencv": Default backend that uses OpenCV stream buffered backend.
524
531
#
You can’t perform that action at this time.
0 commit comments