Skip to content

Commit 169313b

Browse files
authored
[Misc] Make handling of SamplingParams clearer in n>1 case (vllm-project#26032)
Signed-off-by: Nick Hill <[email protected]>
1 parent 0b018d8 commit 169313b

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

vllm/v1/engine/async_llm.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,19 @@ async def add_request(
289289
await self._add_request(request, prompt_str, None, 0, queue)
290290
return queue
291291

292+
# Get the updated SamplingParams from the request, which
293+
# were cloned/updated in processor.process_inputs above.
294+
parent_params = request.sampling_params
295+
assert parent_params is not None
296+
292297
# Fan out child requests (for n>1).
293-
parent_request = ParentRequest(request_id, request.sampling_params)
294-
for idx in range(params.n):
295-
request_id, params = parent_request.get_child_info(idx)
296-
child_request = request if idx == params.n - 1 else copy(request)
298+
parent_request = ParentRequest(request_id, parent_params)
299+
for idx in range(parent_params.n):
300+
request_id, child_params = parent_request.get_child_info(idx)
301+
child_request = request if idx == parent_params.n - 1 else copy(
302+
request)
297303
child_request.request_id = request_id
298-
child_request.sampling_params = params
304+
child_request.sampling_params = child_params
299305
await self._add_request(child_request, prompt_str, parent_request,
300306
idx, queue)
301307
return queue

0 commit comments

Comments
 (0)