Skip to content

Commit 4393684

Browse files
[BugFix] Fix PP/async scheduling with pooling models (#28899)
Signed-off-by: Nick Hill <[email protected]> Co-authored-by: Cyrus Leung <[email protected]>
1 parent 896e41a commit 4393684

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

vllm/v1/engine/core.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def __init__(
184184
vllm_config.ec_transfer_config is not None
185185
and vllm_config.ec_transfer_config.is_ec_producer
186186
)
187+
self.is_pooling_model = vllm_config.model_config.runner_type == "pooling"
187188

188189
self.request_block_hasher: Callable[[Request], list[BlockHash]] | None = None
189190
if vllm_config.cache_config.enable_prefix_caching or kv_connector is not None:
@@ -392,7 +393,7 @@ def step_with_batch_queue(
392393
if not self.ec_producer:
393394
model_executed = scheduler_output.total_num_scheduled_tokens > 0
394395

395-
if not model_executed:
396+
if self.is_pooling_model or not model_executed:
396397
# No sampling required (no requests scheduled).
397398
future = cast(Future[ModelRunnerOutput], exec_future)
398399
else:

vllm/v1/executor/ray_executor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ def _init_executor(self) -> None:
9999
# KV connector setup
100100
self.has_connector = self.vllm_config.kv_transfer_config is not None
101101

102-
self.ec_producer = (
103-
self.vllm_config.ec_transfer_config is not None
104-
and self.vllm_config.ec_transfer_config.is_ec_producer
102+
self.uses_sampler = self.vllm_config.model_config.runner_type != "pooling" and (
103+
self.vllm_config.ec_transfer_config is None
104+
or not self.vllm_config.ec_transfer_config.is_ec_producer
105105
)
106106

107107
self.scheduler_output: SchedulerOutput | None = None
@@ -401,7 +401,7 @@ def execute_model( # type: ignore[override]
401401
"after execute_model() returns None."
402402
)
403403

404-
if self.ec_producer or not scheduler_output.total_num_scheduled_tokens:
404+
if not self.uses_sampler or not scheduler_output.total_num_scheduled_tokens:
405405
# Model will not execute, call model runner immediately.
406406
return self._execute_dag(scheduler_output, None, non_block)
407407

0 commit comments

Comments
 (0)