|
4 | 4 | from collections.abc import Iterable |
5 | 5 | from typing import Optional |
6 | 6 |
|
7 | | -from vllm.distributed.kv_events import (AllBlocksCleared, BlockRemoved, |
8 | | - BlockStored, KVCacheEvent) |
| 7 | +from vllm.distributed.kv_events import (MEDIUM_GPU, AllBlocksCleared, |
| 8 | + BlockRemoved, BlockStored, |
| 9 | + KVCacheEvent) |
9 | 10 | from vllm.logger import init_logger |
10 | 11 | from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId, |
11 | 12 | FreeKVCacheBlockQueue, KVCacheBlock) |
@@ -156,6 +157,7 @@ def cache_full_blocks( |
156 | 157 | block_size=block_size, |
157 | 158 | lora_id=request.lora_request.id |
158 | 159 | if request.lora_request else None, |
| 160 | + medium=MEDIUM_GPU, |
159 | 161 | )) |
160 | 162 |
|
161 | 163 | def get_new_blocks(self, num_blocks: int) -> list[KVCacheBlock]: |
@@ -218,7 +220,8 @@ def _maybe_evict_cached_block(self, block: KVCacheBlock) -> bool: |
218 | 220 | # we disable hybrid kv cache manager when kv cache event is |
219 | 221 | # enabled, so there is only one group. |
220 | 222 | self.kv_event_queue.append( |
221 | | - BlockRemoved(block_hashes=[block_hash.get_hash_value()])) |
| 223 | + BlockRemoved(block_hashes=[block_hash.get_hash_value()], |
| 224 | + medium=MEDIUM_GPU)) |
222 | 225 | return True |
223 | 226 |
|
224 | 227 | def touch(self, blocks: tuple[list[KVCacheBlock], ...]) -> None: |
|
0 commit comments