Skip to content

Commit 7ad7adb

Browse files
authored
v1: Pass KVConnectorOutput to scheduler-side (#22157)
Signed-off-by: Or Ozeri <[email protected]>
1 parent 6ade99e commit 7ad7adb

File tree

3 files changed

+22
-0
lines changed

3 files changed

+22
-0
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
times for a given request and should be side-effect free.
1313
update_state_after_alloc() - update KVConnector state after
1414
temporary buffer alloc by the CacheManager.
15+
update_connector_output() - update KVConnector state after
16+
output is received from worker-side connectors.
1517
request_finished() - called when a request is finished, with
1618
the computed kv cache blocks for the request.
1719
Returns whether KV cache should be freed now or will be
@@ -38,6 +40,7 @@
3840

3941
from vllm.logger import init_logger
4042
from vllm.v1.core.sched.output import SchedulerOutput
43+
from vllm.v1.outputs import KVConnectorOutput
4144

4245
if TYPE_CHECKING:
4346
from vllm.attention.backends.abstract import AttentionMetadata
@@ -283,6 +286,16 @@ def build_connector_meta(
283286
"""
284287
pass
285288

289+
def update_connector_output(self, connector_output: KVConnectorOutput):
290+
"""
291+
Update KVConnector state from worker-side connectors output.
292+
293+
Args:
294+
connector_output (KVConnectorOutput): the worker-side
295+
connectors output.
296+
"""
297+
return
298+
286299
def request_finished(
287300
self,
288301
request: "Request",

vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from vllm.logger import init_logger
1515
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
1616
from vllm.v1.core.sched.output import SchedulerOutput
17+
from vllm.v1.outputs import KVConnectorOutput
1718

1819
if TYPE_CHECKING:
1920
from vllm.attention.backends.abstract import AttentionMetadata
@@ -177,6 +178,10 @@ def build_connector_meta(
177178
self._extra_async_saves = {}
178179
return metadata
179180

181+
def update_connector_output(self, connector_output: KVConnectorOutput):
182+
for c in self._connectors:
183+
c.update_connector_output(connector_output)
184+
180185
def request_finished(
181186
self,
182187
request: "Request",

vllm/v1/core/sched/scheduler.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,10 @@ def _update_from_kv_xfer_finished(self,
11501150
# if finished_recving: add to state so we can
11511151
scheduler the request during the next step.
11521152
"""
1153+
1154+
assert self.connector is not None
1155+
self.connector.update_connector_output(kv_connector_output)
1156+
11531157
# KV Connector:: update recv and send status from last step.
11541158
for req_id in (kv_connector_output.finished_recving or ()):
11551159
logger.debug("Finished recving KV transfer for request %s", req_id)

0 commit comments

Comments
 (0)