Skip to content

Commit ed25054

Browse files
authored
[Core] Introduce popleft_n and append_n in FreeKVCacheBlockQueue to further optimize block_pool (#21222)
Signed-off-by: Jialin Ouyang <[email protected]>
1 parent 10904e6 commit ed25054

File tree

3 files changed

+183
-20
lines changed

3 files changed

+183
-20
lines changed

tests/v1/core/test_kv_cache_utils.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,111 @@ def test_free_kv_cache_block_queue_operations():
184184
assert str(e.value) == "No free blocks available"
185185

186186

187+
def test_free_kv_cache_block_queue_append_n():
188+
# Create an empty FreeKVCacheBlockQueue with these blocks
189+
queue = FreeKVCacheBlockQueue([])
190+
blocks = [KVCacheBlock(block_id=i) for i in range(6)]
191+
# Append 0 block
192+
# fake_head->fake_tail
193+
queue.append_n([])
194+
assert queue.num_free_blocks == 0
195+
assert (queue.fake_free_list_head.next_free_block
196+
is queue.fake_free_list_tail)
197+
assert (queue.fake_free_list_tail.prev_free_block
198+
is queue.fake_free_list_head)
199+
# Append 1 block
200+
# fake_head->b0->fake_tail
201+
queue.append_n(blocks[0:1])
202+
assert queue.num_free_blocks == 1
203+
assert queue.fake_free_list_head.next_free_block is blocks[0]
204+
assert blocks[0].prev_free_block is queue.fake_free_list_head
205+
assert blocks[0].next_free_block is queue.fake_free_list_tail
206+
assert queue.fake_free_list_tail.prev_free_block is blocks[0]
207+
# Append 2 blocks
208+
# fake_head->b0->b4->b5->fake_tail
209+
queue.append_n(blocks[4:6])
210+
assert queue.num_free_blocks == 3
211+
assert queue.fake_free_list_head.next_free_block is blocks[0]
212+
assert blocks[0].prev_free_block is queue.fake_free_list_head
213+
assert blocks[0].next_free_block is blocks[4]
214+
assert blocks[4].prev_free_block is blocks[0]
215+
assert blocks[4].next_free_block is blocks[5]
216+
assert blocks[5].prev_free_block is blocks[4]
217+
assert blocks[5].next_free_block is queue.fake_free_list_tail
218+
assert queue.fake_free_list_tail.prev_free_block is blocks[5]
219+
# Append 3 blocks
220+
# fake_head->b0->b4->b5->b1->b2->b3->fake_tail
221+
queue.append_n(blocks[1:4])
222+
assert queue.num_free_blocks == 6
223+
assert queue.fake_free_list_head.next_free_block is blocks[0]
224+
assert blocks[0].prev_free_block is queue.fake_free_list_head
225+
assert blocks[0].next_free_block is blocks[4]
226+
assert blocks[4].prev_free_block is blocks[0]
227+
assert blocks[4].next_free_block is blocks[5]
228+
assert blocks[5].prev_free_block is blocks[4]
229+
assert blocks[5].next_free_block is blocks[1]
230+
assert blocks[1].prev_free_block is blocks[5]
231+
assert blocks[1].next_free_block is blocks[2]
232+
assert blocks[2].prev_free_block is blocks[1]
233+
assert blocks[2].next_free_block is blocks[3]
234+
assert blocks[3].prev_free_block is blocks[2]
235+
assert blocks[3].next_free_block is queue.fake_free_list_tail
236+
assert queue.fake_free_list_tail.prev_free_block is blocks[3]
237+
238+
239+
def test_free_kv_cache_block_queue_popleft_n():
240+
blocks = [KVCacheBlock(block_id=i) for i in range(6)]
241+
# Create a empty FreeKVCacheBlockQueue with these blocks
242+
queue = FreeKVCacheBlockQueue(
243+
[blocks[1], blocks[3], blocks[5], blocks[4], blocks[0], blocks[2]])
244+
assert queue.num_free_blocks == 6
245+
assert queue.fake_free_list_head.next_free_block is blocks[1]
246+
assert blocks[1].prev_free_block is queue.fake_free_list_head
247+
assert blocks[1].next_free_block is blocks[3]
248+
assert blocks[3].prev_free_block is blocks[1]
249+
assert blocks[3].next_free_block is blocks[5]
250+
assert blocks[5].prev_free_block is blocks[3]
251+
assert blocks[5].next_free_block is blocks[4]
252+
assert blocks[4].prev_free_block is blocks[5]
253+
assert blocks[4].next_free_block is blocks[0]
254+
assert blocks[0].prev_free_block is blocks[4]
255+
assert blocks[0].next_free_block is blocks[2]
256+
assert blocks[2].prev_free_block is blocks[0]
257+
assert blocks[2].next_free_block is queue.fake_free_list_tail
258+
assert queue.fake_free_list_tail.prev_free_block is blocks[2]
259+
260+
# Pop 0 block
261+
# fake_head->b1->b3->b5->b4->b0->b2->fake_tail
262+
assert len(queue.popleft_n(0)) == 0
263+
# Pop 1 block
264+
# fake_head->b3->b5->b4->b0->b2->fake_tail
265+
result_blocks = queue.popleft_n(1)
266+
assert len(result_blocks) == 1
267+
assert result_blocks[0] is blocks[1]
268+
for block in result_blocks:
269+
assert block.prev_free_block is None
270+
assert block.next_free_block is None
271+
# Pop 2 blocks
272+
# fake_head->b4->b0->b2->fake_tail
273+
result_blocks = queue.popleft_n(2)
274+
assert len(result_blocks) == 2
275+
assert result_blocks[0] is blocks[3]
276+
assert result_blocks[1] is blocks[5]
277+
for block in result_blocks:
278+
assert block.prev_free_block is None
279+
assert block.next_free_block is None
280+
# Pop 3 blocks
281+
# fake_head->fake_tail
282+
result_blocks = queue.popleft_n(3)
283+
assert len(result_blocks) == 3
284+
assert result_blocks[0] is blocks[4]
285+
assert result_blocks[1] is blocks[0]
286+
assert result_blocks[2] is blocks[2]
287+
for block in result_blocks:
288+
assert block.prev_free_block is None
289+
assert block.next_free_block is None
290+
291+
187292
def test_free_kv_cache_block_queue_get_all_free_blocks():
188293
# Create a list of KVCacheBlock objects
189294
blocks = [KVCacheBlock(block_id=i) for i in range(5)]

vllm/v1/core/block_pool.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -214,21 +214,18 @@ def get_new_blocks(self, num_blocks: int) -> list[KVCacheBlock]:
214214
raise ValueError(
215215
f"Cannot get {num_blocks} free blocks from the pool")
216216

217-
ret: list[KVCacheBlock] = []
218-
idx = 0
219-
while idx < num_blocks:
220-
# First allocate blocks.
221-
curr_block = self.free_block_queue.popleft()
222-
assert curr_block.ref_cnt == 0
223-
224-
# If the block is cached, evict it.
225-
if self.enable_caching:
226-
self._maybe_evict_cached_block(curr_block)
227-
228-
curr_block.incr_ref()
229-
ret.append(curr_block)
230-
idx += 1
231-
217+
ret: list[KVCacheBlock] = self.free_block_queue.popleft_n(num_blocks)
218+
219+
# In order to only iterate the list once, we duplicated code a bit
220+
if self.enable_caching:
221+
for block in ret:
222+
self._maybe_evict_cached_block(block)
223+
assert block.ref_cnt == 0
224+
block.ref_cnt += 1
225+
else:
226+
for block in ret:
227+
assert block.ref_cnt == 0
228+
block.ref_cnt += 1
232229
return ret
233230

234231
def _maybe_evict_cached_block(self, block: KVCacheBlock) -> bool:
@@ -289,11 +286,14 @@ def free_blocks(self, ordered_blocks: Iterable[KVCacheBlock]) -> None:
289286
ordered_blocks: A list of blocks to free ordered by their eviction
290287
priority.
291288
"""
292-
for block in ordered_blocks:
293-
block.decr_ref()
294-
# null_block should not be added to the free list.
295-
if block.ref_cnt == 0 and not block.is_null:
296-
self.free_block_queue.append(block)
289+
# Materialize the iterable to allow multiple passes.
290+
blocks_list = list(ordered_blocks)
291+
for block in blocks_list:
292+
block.ref_cnt -= 1
293+
self.free_block_queue.append_n([
294+
block for block in blocks_list
295+
if block.ref_cnt == 0 and not block.is_null
296+
])
297297

298298
def reset_prefix_cache(self) -> bool:
299299
"""Reset prefix cache. This function may be used in RLHF

vllm/v1/core/kv_cache_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ class KVCacheBlock:
154154
# Whether the block is a null block that should never be cached.
155155
is_null: bool = False
156156

157+
# TODO(Jialin): For performance, let callers handle ref_cnt bumps to
158+
# avoid function calls.
157159
def incr_ref(self):
158160
self.ref_cnt += 1
159161

@@ -273,6 +275,39 @@ def popleft(self) -> KVCacheBlock:
273275
self.num_free_blocks -= 1
274276
return first_block
275277

278+
def popleft_n(self, n: int) -> list[KVCacheBlock]:
279+
"""Pop the first n free blocks and reduce num_free_blocks by n.
280+
281+
Args:
282+
n: The number of blocks to pop.
283+
284+
Returns:
285+
A list of n free blocks.
286+
"""
287+
if n == 0:
288+
return []
289+
assert self.num_free_blocks >= n
290+
self.num_free_blocks -= n
291+
292+
curr_block = self.fake_free_list_head.next_free_block
293+
# Pop n blocks from the head of the list
294+
ret = []
295+
for _ in range(n):
296+
assert curr_block is not None
297+
ret.append(curr_block)
298+
last_block = curr_block
299+
curr_block = curr_block.next_free_block
300+
# Reset prev_free_block and next_free_block of all popped blocks
301+
last_block.prev_free_block = None
302+
last_block.next_free_block = None
303+
304+
if curr_block is not None:
305+
# The queue is not empty, connect the fake head to
306+
# the new first block.
307+
self.fake_free_list_head.next_free_block = curr_block
308+
curr_block.prev_free_block = self.fake_free_list_head
309+
return ret
310+
276311
def remove(self, block: KVCacheBlock) -> None:
277312
"""Remove a block in the free list and reduce num_free_blocks by 1.
278313
@@ -315,6 +350,29 @@ def append(self, block: KVCacheBlock) -> None:
315350

316351
self.num_free_blocks += 1
317352

353+
def append_n(self, blocks: list[KVCacheBlock]) -> None:
354+
"""Put a list of blocks back into the free list
355+
356+
Args:
357+
blocks: The blocks to append.
358+
"""
359+
if len(blocks) == 0:
360+
return
361+
self.num_free_blocks += len(blocks)
362+
363+
last_block = self.fake_free_list_tail.prev_free_block
364+
assert last_block is not None, (
365+
"prev_free_block of fake_free_list_tail should always exist")
366+
# Add inter-connections between consecutive blocks
367+
for block in blocks:
368+
block.prev_free_block = last_block
369+
last_block.next_free_block = block
370+
last_block = block
371+
372+
# Connect the last block of <blocks> to the fake tail
373+
last_block.next_free_block = self.fake_free_list_tail
374+
self.fake_free_list_tail.prev_free_block = last_block
375+
318376
def get_all_free_blocks(self) -> list[KVCacheBlock]:
319377
"""Get all free blocks in the free list. Mainly used for testing.
320378

0 commit comments

Comments
 (0)