|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
| 3 | +import itertools |
3 | 4 | from abc import ABC, abstractmethod
|
4 | 5 | from collections import defaultdict
|
5 | 6 | from typing import Callable
|
@@ -177,14 +178,17 @@ def free(self, request_id: str) -> None:
|
177 | 178 | def get_num_common_prefix_blocks(self, request_id: str,
|
178 | 179 | num_running_requests: int) -> int:
|
179 | 180 | """
|
180 |
| - Get the number of common prefix blocks for a request. |
| 181 | + Get the number of common prefix blocks for all requests in the RUNNING |
| 182 | + state. |
181 | 183 |
|
182 | 184 | Args:
|
183 | 185 | request_id: The request ID.
|
184 |
| - num_running_requests: The number of requests in the RUNNING state. |
| 186 | + num_running_requests: The total number of requests in the RUNNING |
| 187 | + state. |
185 | 188 |
|
186 | 189 | Returns:
|
187 |
| - The number of common prefix blocks. |
| 190 | + The number of common prefix blocks for all requests in the RUNNING |
| 191 | + state. |
188 | 192 | """
|
189 | 193 |
|
190 | 194 | raise NotImplementedError
|
@@ -264,7 +268,7 @@ def find_longest_cache_hit(
|
264 | 268 | computed_blocks: tuple[list[KVCacheBlock], ...] = tuple(
|
265 | 269 | [] for _ in range(len(kv_cache_group_ids)))
|
266 | 270 | max_num_blocks = max_length // kv_cache_spec.block_size
|
267 |
| - for i, block_hash in zip(range(max_num_blocks), block_hashes): |
| 271 | + for block_hash in itertools.islice(block_hashes, max_num_blocks): |
268 | 272 | # block_hashes is a chain of block hashes. If a block hash is not
|
269 | 273 | # in the cached_block_hash_to_id, the following block hashes are
|
270 | 274 | # not computed yet for sure.
|
|
0 commit comments