File tree Expand file tree Collapse file tree 3 files changed +22
-0
lines changed
distributed/kv_transfer/kv_connector/v1 Expand file tree Collapse file tree 3 files changed +22
-0
lines changed Original file line number Diff line number Diff line change 12
12
times for a given request and should be side-effect free.
13
13
update_state_after_alloc() - update KVConnector state after
14
14
temporary buffer alloc by the CacheManager.
15
+ update_connector_output() - update KVConnector state after
16
+ output is received from worker-side connectors.
15
17
request_finished() - called when a request is finished, with
16
18
the computed kv cache blocks for the request.
17
19
Returns whether KV cache should be freed now or will be
38
40
39
41
from vllm .logger import init_logger
40
42
from vllm .v1 .core .sched .output import SchedulerOutput
43
+ from vllm .v1 .outputs import KVConnectorOutput
41
44
42
45
if TYPE_CHECKING :
43
46
from vllm .attention .backends .abstract import AttentionMetadata
@@ -283,6 +286,16 @@ def build_connector_meta(
283
286
"""
284
287
pass
285
288
289
+ def update_connector_output (self , connector_output : KVConnectorOutput ):
290
+ """
291
+ Update KVConnector state from worker-side connectors output.
292
+
293
+ Args:
294
+ connector_output (KVConnectorOutput): the worker-side
295
+ connectors output.
296
+ """
297
+ return
298
+
286
299
def request_finished (
287
300
self ,
288
301
request : "Request" ,
Original file line number Diff line number Diff line change 14
14
from vllm .logger import init_logger
15
15
from vllm .v1 .core .kv_cache_manager import KVCacheBlocks
16
16
from vllm .v1 .core .sched .output import SchedulerOutput
17
+ from vllm .v1 .outputs import KVConnectorOutput
17
18
18
19
if TYPE_CHECKING :
19
20
from vllm .attention .backends .abstract import AttentionMetadata
@@ -177,6 +178,10 @@ def build_connector_meta(
177
178
self ._extra_async_saves = {}
178
179
return metadata
179
180
181
+ def update_connector_output (self , connector_output : KVConnectorOutput ):
182
+ for c in self ._connectors :
183
+ c .update_connector_output (connector_output )
184
+
180
185
def request_finished (
181
186
self ,
182
187
request : "Request" ,
Original file line number Diff line number Diff line change @@ -1150,6 +1150,10 @@ def _update_from_kv_xfer_finished(self,
1150
1150
# if finished_recving: add to state so we can
1151
1151
scheduler the request during the next step.
1152
1152
"""
1153
+
1154
+ assert self .connector is not None
1155
+ self .connector .update_connector_output (kv_connector_output )
1156
+
1153
1157
# KV Connector:: update recv and send status from last step.
1154
1158
for req_id in (kv_connector_output .finished_recving or ()):
1155
1159
logger .debug ("Finished recving KV transfer for request %s" , req_id )
You can’t perform that action at this time.
0 commit comments