We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 048e90d commit d274720Copy full SHA for d274720
mellea/backends/vllm.py
@@ -275,7 +275,13 @@ def _generate_from_context_standard(
275
**self._make_backend_specific_and_remove(
276
model_options, vllm.SamplingParams
277
),
278
- output_kind=vllm.sampling_params.RequestOutputKind.DELTA, # returns results incrementally
+ output_kind=(
279
+ # returns results incrementally
280
+ vllm.sampling_params.RequestOutputKind.DELTA
281
+ if model_options.get(ModelOption.STREAM, False)
282
+ # returns only the final result
283
+ else vllm.sampling_params.RequestOutputKind.FINAL_ONLY
284
+ ),
285
)
286
287
if format is not None:
0 commit comments