Skip to content

Commit 1db44bb

Browse files
linzebingskyloevil
authored andcommitted
[Core] Use sha256 bytes instead of BlockHash to reduce GC overhead (vllm-project#23673)
Signed-off-by: linzebing <[email protected]>
1 parent 11747e3 commit 1db44bb

File tree

15 files changed

+298
-283
lines changed

15 files changed

+298
-283
lines changed

examples/online_serving/kv_events_subscriber.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import zmq
77
from msgspec.msgpack import Decoder
88

9+
from vllm.v1.core.kv_cache_utils import BlockHash
10+
911

1012
#
1113
# Types copied from vllm.distributed.kv_events
@@ -22,16 +24,16 @@ class KVCacheEvent(
2224

2325

2426
class BlockStored(KVCacheEvent):
25-
block_hashes: list[int]
26-
parent_block_hash: Optional[int]
27+
block_hashes: list[BlockHash]
28+
parent_block_hash: Optional[BlockHash]
2729
token_ids: list[int]
2830
block_size: int
2931
lora_id: Optional[int]
3032
medium: Optional[str]
3133

3234

3335
class BlockRemoved(KVCacheEvent):
34-
block_hashes: list[int]
36+
block_hashes: list[BlockHash]
3537
medium: Optional[str]
3638

3739

tests/utils_/test_utils.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -835,22 +835,20 @@ def test_model_specification(parser_with_config, cli_config_file,
835835

836836
@pytest.mark.parametrize("input", [(), ("abc", ), (None, ),
837837
(None, bool, [1, 2, 3])])
838-
@pytest.mark.parametrize("output", [0, 1, 2])
839-
def test_sha256(input: tuple, output: int):
840-
hash = sha256(input)
841-
assert hash is not None
842-
assert isinstance(hash, int)
843-
assert hash != 0
838+
def test_sha256(input: tuple):
839+
digest = sha256(input)
840+
assert digest is not None
841+
assert isinstance(digest, bytes)
842+
assert digest != b""
844843

845-
bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
846-
assert hash == int.from_bytes(hashlib.sha256(bytes).digest(),
847-
byteorder="big")
844+
input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
845+
assert digest == hashlib.sha256(input_bytes).digest()
848846

849847
# hashing again, returns the same value
850-
assert hash == sha256(input)
848+
assert digest == sha256(input)
851849

852850
# hashing different input, returns different value
853-
assert hash != sha256(input + (1, ))
851+
assert digest != sha256(input + (1, ))
854852

855853

856854
@pytest.mark.parametrize(

tests/v1/core/test_kv_cache_utils.py

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,22 @@
66
import pytest
77
import torch
88

9+
import vllm.v1.core.kv_cache_utils as kv_cache_utils
910
from vllm.config import ModelConfig, SchedulerConfig, VllmConfig
1011
from vllm.multimodal.inputs import (MultiModalFeatureSpec,
1112
MultiModalKwargsItem, PlaceholderRange)
1213
from vllm.sampling_params import SamplingParams
13-
from vllm.utils import GiB_bytes, sha256, sha256_cbor_64bit
14+
from vllm.utils import GiB_bytes, sha256, sha256_cbor
1415
from vllm.v1.core.kv_cache_manager import KVCacheManager
1516
# disable yapf here as it formats differently than isort such that both fail
1617
# yapf: disable
1718
from vllm.v1.core.kv_cache_utils import (
18-
FreeKVCacheBlockQueue, KVCacheBlock, PrefixCachingMetrics,
19+
BlockHash, FreeKVCacheBlockQueue, KVCacheBlock, PrefixCachingMetrics,
1920
estimate_max_model_len, generate_block_hash_extra_keys,
2021
get_kv_cache_config, get_max_concurrency_for_kv_cache_config,
2122
get_request_block_hasher, hash_block_tokens, init_none_hash,
22-
is_kv_cache_type_uniform, unify_kv_cache_configs)
23+
is_kv_cache_type_uniform, make_block_hash_with_group_id,
24+
unify_kv_cache_configs)
2325
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
2426
KVCacheGroupSpec, KVCacheTensor,
2527
SlidingWindowSpec)
@@ -88,7 +90,7 @@ def new_sliding_window_spec(block_size=16,
8890
sliding_window=sliding_window)
8991

9092

91-
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor_64bit, hash])
93+
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor])
9294
def test_none_hash(monkeypatch, hash_fn):
9395
import vllm.v1.core.kv_cache_utils
9496

@@ -98,21 +100,20 @@ def test_none_hash(monkeypatch, hash_fn):
98100
reloaded_kv_cache_utils = importlib.reload(vllm.v1.core.kv_cache_utils)
99101
reloaded_kv_cache_utils.init_none_hash(hash_fn)
100102
assert reloaded_kv_cache_utils.NONE_HASH is not None
101-
assert isinstance(reloaded_kv_cache_utils.NONE_HASH, int)
102-
assert reloaded_kv_cache_utils.NONE_HASH != 0
103+
assert isinstance(reloaded_kv_cache_utils.NONE_HASH, bytes)
104+
assert reloaded_kv_cache_utils.NONE_HASH != b""
103105

104106
# case 2: PYTHONHASHSEED is set, use the seed and hash_fn
105107
with monkeypatch.context() as m:
106108
m.setenv('PYTHONHASHSEED', 'python hash seed')
107109
reloaded_kv_cache_utils = importlib.reload(vllm.v1.core.kv_cache_utils)
108110
reloaded_kv_cache_utils.init_none_hash(hash_fn)
109111
assert reloaded_kv_cache_utils.NONE_HASH is not None
110-
assert isinstance(reloaded_kv_cache_utils.NONE_HASH, int)
112+
assert isinstance(reloaded_kv_cache_utils.NONE_HASH, bytes)
111113
assert hash_fn('python hash seed') == reloaded_kv_cache_utils.NONE_HASH
112114

113115

114116
def test_kv_cache_block():
115-
import vllm.v1.core.kv_cache_utils
116117

117118
# Test KVCacheBlock initialization
118119
block = KVCacheBlock(block_id=0)
@@ -127,8 +128,7 @@ def test_kv_cache_block():
127128
assert block.ref_cnt == 0
128129

129130
# Test block hash setting and resetting
130-
block_hash = vllm.v1.core.kv_cache_utils.BlockHash(hash_value=123,
131-
token_ids=(1, 2, 3))
131+
block_hash = make_block_hash_with_group_id(BlockHash(b"abc"), 0)
132132
block.block_hash = block_hash
133133
assert block.block_hash == block_hash
134134

@@ -407,27 +407,23 @@ def test_generate_block_hash_extra_keys_cache_salt():
407407
assert next_mm_idx == 1
408408

409409

410-
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor_64bit, hash])
410+
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor])
411411
def test_hash_block_tokens(hash_fn):
412-
import vllm.v1.core.kv_cache_utils
413412
init_none_hash(hash_fn)
414-
parent_block_hash = 123
413+
parent_block_hash = BlockHash(b"123")
415414
curr_block_token_ids = (1, 2, 3)
416415
extra_keys = ("key1", "key2")
417416

418417
block_hash = hash_block_tokens(hash_fn, parent_block_hash,
419418
curr_block_token_ids, extra_keys)
420-
assert isinstance(block_hash, vllm.v1.core.kv_cache_utils.BlockHash)
421-
assert block_hash.hash_value == hash_fn(
422-
(parent_block_hash, curr_block_token_ids, extra_keys))
423-
assert block_hash.token_ids == curr_block_token_ids
424-
assert block_hash.extra_keys == extra_keys
419+
expected = hash_fn((parent_block_hash, curr_block_token_ids, extra_keys))
420+
assert block_hash == expected
425421

426422

427-
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor_64bit, hash])
423+
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor])
428424
def test_request_block_hasher(hash_fn):
429-
import vllm.v1.core.kv_cache_utils
430-
init_none_hash(hash_fn)
425+
kv_cache_utils.init_none_hash(hash_fn)
426+
431427
request = make_request(
432428
request_id="0",
433429
prompt_token_ids=[_ for _ in range(6)],
@@ -442,19 +438,13 @@ def test_request_block_hasher(hash_fn):
442438

443439
block_hashes = request.block_hashes
444440
assert len(block_hashes) == 2
445-
assert isinstance(block_hashes[0], vllm.v1.core.kv_cache_utils.BlockHash)
446-
assert isinstance(block_hashes[1], vllm.v1.core.kv_cache_utils.BlockHash)
447-
448-
# Check the first block
449-
assert block_hashes[0].token_ids == (0, 1, 2)
450-
assert block_hashes[0].extra_keys == ("hash1", )
441+
assert block_hashes[0] == hash_fn(
442+
(kv_cache_utils.NONE_HASH, (0, 1, 2), ("hash1", )))
443+
assert block_hashes[1] == hash_fn(
444+
(block_hashes[0], (3, 4, 5), ("hash2", )))
451445

452-
# Check the second block
453-
assert block_hashes[1].token_ids == (3, 4, 5)
454-
assert block_hashes[1].extra_keys == ("hash2", )
455446

456-
457-
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor_64bit, hash])
447+
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor])
458448
def test_hash_tokens_different_mm_input(hash_fn):
459449
init_none_hash(hash_fn)
460450

@@ -484,9 +474,9 @@ def test_hash_tokens_different_mm_input(hash_fn):
484474
assert block_hashes1[1] != block_hashes2[1]
485475

486476

487-
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor_64bit, hash])
477+
@pytest.mark.parametrize("hash_fn", [sha256, sha256_cbor])
488478
def test_hash_request_tokens_no_mm_inputs(hash_fn):
489-
init_none_hash(hash_fn)
479+
kv_cache_utils.init_none_hash(hash_fn)
490480

491481
request = make_request(
492482
request_id="0",
@@ -500,10 +490,9 @@ def test_hash_request_tokens_no_mm_inputs(hash_fn):
500490
block_hashes = request.block_hashes
501491

502492
assert len(block_hashes) == 2
503-
assert block_hashes[0].token_ids == (0, 1, 2)
504-
assert block_hashes[0].extra_keys is None
505-
assert block_hashes[1].token_ids == (3, 4, 5)
506-
assert block_hashes[1].extra_keys is None
493+
assert block_hashes[0] == hash_fn(
494+
(kv_cache_utils.NONE_HASH, (0, 1, 2), None))
495+
assert block_hashes[1] == hash_fn((block_hashes[0], (3, 4, 5), None))
507496

508497

509498
def test_metrics():

0 commit comments

Comments
 (0)