16
16
FreeKVCacheBlockQueue , KVCacheBlock , PrefixCachingMetrics ,
17
17
estimate_max_model_len , generate_block_hash_extra_keys ,
18
18
get_kv_cache_config , get_max_concurrency_for_kv_cache_config ,
19
- hash_block_tokens , hash_request_tokens , init_none_hash ,
19
+ get_request_block_hasher , hash_block_tokens , init_none_hash ,
20
20
is_kv_cache_type_uniform , unify_kv_cache_configs )
21
21
from vllm .v1 .kv_cache_interface import (FullAttentionSpec , KVCacheConfig ,
22
22
KVCacheGroupSpec , KVCacheTensor ,
29
29
30
30
def make_request (request_id ,
31
31
prompt_token_ids ,
32
+ block_size = 3 ,
33
+ hash_fn = hash ,
32
34
mm_positions = None ,
33
35
mm_hashes = None ,
34
36
cache_salt = None ):
@@ -37,18 +39,17 @@ def make_request(request_id,
37
39
else :
38
40
multi_modal_inputs = [MultiModalKwargs ({})] * len (mm_positions )
39
41
40
- return Request (
41
- request_id = request_id ,
42
- prompt_token_ids = prompt_token_ids ,
43
- multi_modal_inputs = multi_modal_inputs ,
44
- multi_modal_hashes = mm_hashes ,
45
- multi_modal_placeholders = mm_positions ,
46
- sampling_params = SamplingParams (max_tokens = 17 ),
47
- pooling_params = None ,
48
- eos_token_id = 100 ,
49
- lora_request = None ,
50
- cache_salt = cache_salt ,
51
- )
42
+ return Request (request_id = request_id ,
43
+ prompt_token_ids = prompt_token_ids ,
44
+ multi_modal_inputs = multi_modal_inputs ,
45
+ multi_modal_hashes = mm_hashes ,
46
+ multi_modal_placeholders = mm_positions ,
47
+ sampling_params = SamplingParams (max_tokens = 17 ),
48
+ pooling_params = None ,
49
+ eos_token_id = 100 ,
50
+ lora_request = None ,
51
+ cache_salt = cache_salt ,
52
+ block_hasher = get_request_block_hasher (block_size , hash_fn ))
52
53
53
54
54
55
def new_kv_cache_spec (block_size = 16 ,
@@ -416,22 +417,22 @@ def test_hash_block_tokens(hash_fn):
416
417
417
418
418
419
@pytest .mark .parametrize ("hash_fn" , [sha256 , sha256_cbor_64bit , hash ])
419
- def test_hash_request_tokens (hash_fn ):
420
+ def test_request_block_hasher (hash_fn ):
420
421
import vllm .v1 .core .kv_cache_utils
421
422
init_none_hash (hash_fn )
422
423
request = make_request (
423
424
request_id = 0 ,
424
425
prompt_token_ids = [_ for _ in range (6 )],
426
+ block_size = 3 ,
427
+ hash_fn = hash_fn ,
425
428
mm_positions = [
426
429
PlaceholderRange (offset = 0 , length = 3 ),
427
430
PlaceholderRange (offset = 3 , length = 3 ),
428
431
],
429
432
mm_hashes = ["hash1" , "hash2" ],
430
433
)
431
434
432
- block_size = 3
433
- block_hashes = hash_request_tokens (hash_fn , block_size , request )
434
-
435
+ block_hashes = request .block_hashes
435
436
assert len (block_hashes ) == 2
436
437
assert isinstance (block_hashes [0 ], vllm .v1 .core .kv_cache_utils .BlockHash )
437
438
assert isinstance (block_hashes [1 ], vllm .v1 .core .kv_cache_utils .BlockHash )
@@ -452,6 +453,8 @@ def test_hash_tokens_different_mm_input(hash_fn):
452
453
request1 = make_request (
453
454
request_id = 0 ,
454
455
prompt_token_ids = [_ for _ in range (6 )],
456
+ block_size = 3 ,
457
+ hash_fn = hash_fn ,
455
458
mm_positions = [
456
459
PlaceholderRange (offset = 0 , length = 3 ),
457
460
PlaceholderRange (offset = 3 , length = 3 ),
@@ -467,9 +470,8 @@ def test_hash_tokens_different_mm_input(hash_fn):
467
470
],
468
471
mm_hashes = ["hash3" , "hash2" ],
469
472
)
470
- block_size = 3
471
- block_hashes1 = hash_request_tokens (hash_fn , block_size , request1 )
472
- block_hashes2 = hash_request_tokens (hash_fn , block_size , request2 )
473
+ block_hashes1 = request1 .block_hashes
474
+ block_hashes2 = request2 .block_hashes
473
475
assert block_hashes1 [0 ] != block_hashes2 [0 ]
474
476
assert block_hashes1 [1 ] != block_hashes2 [1 ]
475
477
@@ -481,12 +483,13 @@ def test_hash_request_tokens_no_mm_inputs(hash_fn):
481
483
request = make_request (
482
484
request_id = 0 ,
483
485
prompt_token_ids = [_ for _ in range (6 )],
486
+ block_size = 3 ,
487
+ hash_fn = hash_fn ,
484
488
mm_positions = None ,
485
489
mm_hashes = None ,
486
490
)
487
491
488
- block_size = 3
489
- block_hashes = hash_request_tokens (hash_fn , block_size , request )
492
+ block_hashes = request .block_hashes
490
493
491
494
assert len (block_hashes ) == 2
492
495
assert block_hashes [0 ].token_ids == (0 , 1 , 2 )
@@ -846,6 +849,7 @@ def test_allocate_with_lookahead():
846
849
request = make_request (
847
850
request_id = 0 ,
848
851
prompt_token_ids = [],
852
+ block_size = block_size ,
849
853
mm_positions = None ,
850
854
mm_hashes = None ,
851
855
)
0 commit comments