Skip to content

Commit 518e453

Browse files
committed
send multipart/form-data for transcription model's health check
Signed-off-by: David Gao <[email protected]>
1 parent b156254 commit 518e453

File tree

1 file changed

+36
-10
lines changed

1 file changed

+36
-10
lines changed

src/vllm_router/utils.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import re
55
import resource
66

7-
import requests
87
from fastapi.requests import Request
8+
import requests
99
from starlette.datastructures import MutableHeaders
1010
import io
1111
import wave
@@ -184,14 +184,40 @@ def update_content_length(request: Request, request_body: str):
184184

185185
def is_model_healthy(url: str, model: str, model_type: str) -> bool:
186186
model_details = ModelType[model_type]
187+
187188
try:
188-
response = requests.post(
189-
f"{url}{model_details.value}",
190-
headers={"Content-Type": "application/json"},
191-
json={"model": model} | model_details.get_test_payload(model_type),
192-
timeout=30,
193-
)
194-
except Exception as e:
195-
logger.error(e)
189+
if model_type == "transcription":
190+
191+
# for transcription, the backend expects multipart/form-data with a file
192+
# we will use pre-generated silent wav bytes
193+
files = {
194+
"file": ("empty.wav", _SILENT_WAV_BYTES, "audio/wav")
195+
}
196+
data = {"model":model}
197+
response = requests.post(
198+
f"{url}{model_details.value}",
199+
files=files, # multipart/form-data
200+
data=data
201+
)
202+
else:
203+
# for other model types (chat, completion, etc.)
204+
response = requests.post(
205+
f"{url}{model_details.value}",
206+
headers={"Content-Type": "application/json"},
207+
json={"model":model} | model_details.get_test_payload(model_type)
208+
)
209+
210+
response.raise_for_status()
211+
212+
if model_type == "transcription":
213+
return True
214+
else:
215+
response.json() # verify it's valid json for other model types
216+
217+
except requests.exceptions.RequestException as e:
218+
logger.warning(f"{model_type} model {model} at {url} not healthy: {e}")
219+
return False
220+
221+
except json.JSONDecodeError as e:
222+
logger.error(f"Failed to decode JSON from {model_type} model {model} at {url}: {e}")
196223
return False
197-
return response.status_code == 200

0 commit comments

Comments
 (0)