Skip to content

Commit accf334

Browse files
authored
[bugfix] Fix multimodal_output property to check completion outputs where audio data is attached (#1203)
Signed-off-by: linyueqian <linyueqian@outlook.com>
1 parent 53a44c5 commit accf334

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

examples/online_serving/qwen3_tts/openai_speech_client.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,14 @@ def run_tts_generation(args) -> None:
111111
print(response.text)
112112
return
113113

114-
if response.content.decode("utf-8").startswith('{"error"'):
115-
print(f"Error: {response.content.decode('utf-8')}")
116-
return
114+
# Check for JSON error response (only if content is valid UTF-8 text)
115+
try:
116+
text = response.content.decode("utf-8")
117+
if text.startswith('{"error"'):
118+
print(f"Error: {text}")
119+
return
120+
except UnicodeDecodeError:
121+
pass # Binary audio data, not an error
117122

118123
# Save audio response
119124
output_path = args.output or "tts_output.wav"

vllm_omni/outputs.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,16 @@ def from_diffusion(
124124
def multimodal_output(self) -> dict[str, Any]:
125125
"""Return multimodal output from the underlying request output or local field.
126126
127-
For pipeline outputs, this proxies to request_output.multimodal_output.
127+
For pipeline outputs, this checks completion outputs first, then request_output.
128128
For diffusion outputs, this returns the local _multimodal_output field.
129129
"""
130130
if self.request_output is not None:
131+
# Check completion outputs first (where multimodal_output is attached)
132+
if self.request_output.outputs:
133+
for output in self.request_output.outputs:
134+
mm = getattr(output, "multimodal_output", None)
135+
if mm:
136+
return mm
131137
return getattr(self.request_output, "multimodal_output", {})
132138
return self._multimodal_output
133139

0 commit comments

Comments
 (0)