Skip to content

Commit b5fd950

Browse files
authored
[Bugfix] get_num_blocks_to_allocate with null_block (vllm-project#19031)
Signed-off-by: Chen Zhang <[email protected]>
1 parent 135cf55 commit b5fd950

File tree

4 files changed

+32
-4
lines changed

4 files changed

+32
-4
lines changed

tests/v1/core/test_specialized_manager.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,26 @@ def assert_block_id(block_table, ids):
144144
# of removed blocks should be [1003, 1002].
145145
manager.remove_skipped_blocks("test", 11)
146146
assert_block_id(block_table, [null_block_id] * 4 + original_block_ids[4:])
147+
148+
149+
def test_get_num_blocks_to_allocate():
150+
block_size = 2
151+
sliding_window_spec = SlidingWindowSpec(
152+
block_size=block_size,
153+
num_kv_heads=1,
154+
head_size=1,
155+
dtype=torch.float32,
156+
sliding_window=4, # Placeholder value, not related to test result
157+
use_mla=False,
158+
)
159+
160+
block_pool = BlockPool(num_gpu_blocks=100, enable_caching=True)
161+
manager = get_sliding_window_manager(sliding_window_spec, block_pool)
162+
cached_blocks_1 = [KVCacheBlock(i + 1) for i in range(10)]
163+
cached_blocks_2 = [block_pool.null_block for _ in range(5)
164+
] + [KVCacheBlock(i + 1) for i in range(5)]
165+
166+
assert manager.get_num_blocks_to_allocate("1", 20 * block_size,
167+
cached_blocks_1) == 20
168+
assert manager.get_num_blocks_to_allocate("2", 20 * block_size,
169+
cached_blocks_2) == 15

vllm/v1/core/block_pool.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def __init__(
6363
# The ref_cnt of null_block is not maintained, needs special care to
6464
# avoid freeing it.
6565
self.null_block = self.free_block_queue.popleft()
66+
self.null_block.is_null = True
6667

6768
self.enable_kv_cache_events = enable_kv_cache_events
6869
self.kv_event_queue: list[KVCacheEvent] = []
@@ -252,7 +253,7 @@ def touch(self, blocks: list[KVCacheBlock]) -> None:
252253
for block in blocks:
253254
# ref_cnt=0 means this block is in the free list (i.e. eviction
254255
# candidate), so remove it.
255-
if block.ref_cnt == 0 and block != self.null_block:
256+
if block.ref_cnt == 0 and not block.is_null:
256257
self.free_block_queue.remove(block)
257258
block.incr_ref()
258259

@@ -267,7 +268,7 @@ def free_blocks(self, ordered_blocks: Iterable[KVCacheBlock]) -> None:
267268
for block in ordered_blocks:
268269
block.decr_ref()
269270
# null_block should not be added to the free list.
270-
if block.ref_cnt == 0 and block != self.null_block:
271+
if block.ref_cnt == 0 and not block.is_null:
271272
self.free_block_queue.append(block)
272273

273274
def reset_prefix_cache(self) -> bool:

vllm/v1/core/kv_cache_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ class KVCacheBlock:
125125
prev_free_block: Optional["KVCacheBlock"] = None
126126
next_free_block: Optional["KVCacheBlock"] = None
127127

128+
# Whether the block is a null block that should never be cached.
129+
is_null: bool = False
130+
128131
def incr_ref(self):
129132
self.ref_cnt += 1
130133

vllm/v1/core/single_type_kv_cache_manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,9 @@ def get_num_blocks_to_allocate(
8383
# free queue and ref_cnt == 0), it will be changed from a free block
8484
# to a computed block when the request is allocated, so we also count
8585
# it as needed to be allocated.
86-
num_evictable_computed_blocks = sum(blk.ref_cnt == 0
87-
for blk in new_computed_blocks)
86+
num_evictable_computed_blocks = sum(
87+
blk.ref_cnt == 0 and not blk.is_null
88+
for blk in new_computed_blocks)
8889
return ((num_new_blocks + num_evictable_computed_blocks) *
8990
self.num_kv_cache_groups)
9091

0 commit comments

Comments
 (0)