Skip to content

Commit d274720

Browse files
committed
fix(vllm): avoid unnecessary incremental processing in non-streaming mode
1 parent 048e90d commit d274720

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

mellea/backends/vllm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,13 @@ def _generate_from_context_standard(
275275
**self._make_backend_specific_and_remove(
276276
model_options, vllm.SamplingParams
277277
),
278-
output_kind=vllm.sampling_params.RequestOutputKind.DELTA, # returns results incrementally
278+
output_kind=(
279+
# returns results incrementally
280+
vllm.sampling_params.RequestOutputKind.DELTA
281+
if model_options.get(ModelOption.STREAM, False)
282+
# returns only the final result
283+
else vllm.sampling_params.RequestOutputKind.FINAL_ONLY
284+
),
279285
)
280286

281287
if format is not None:

0 commit comments

Comments
 (0)