File tree Expand file tree Collapse file tree 1 file changed +11
-1
lines changed Expand file tree Collapse file tree 1 file changed +11
-1
lines changed Original file line number Diff line number Diff line change @@ -437,14 +437,24 @@ def schedule(self) -> SchedulerOutput:
437
437
# The request cannot be scheduled.
438
438
break
439
439
440
+ # Handles an edge case when P/D Disaggregation
441
+ # is used with Spec Decoding where an
442
+ # extra block gets allocated which
443
+ # creates a mismatch between the number
444
+ # of local and remote blocks.
445
+ effective_lookahead_tokens = (0 if request .num_computed_tokens
446
+ == 0 else
447
+ self .num_lookahead_tokens )
448
+
440
449
new_blocks = self .kv_cache_manager .allocate_slots (
441
450
request ,
442
451
num_new_tokens + num_external_computed_tokens ,
443
452
num_new_local_computed_tokens ,
444
453
new_computed_blocks ,
445
- num_lookahead_tokens = self . num_lookahead_tokens ,
454
+ num_lookahead_tokens = effective_lookahead_tokens ,
446
455
delay_cache_blocks = load_kv_async ,
447
456
)
457
+
448
458
if new_blocks is None :
449
459
# The request cannot be scheduled.
450
460
break
You can’t perform that action at this time.
0 commit comments