@@ -117,7 +117,7 @@ def test_prefill(hash_algo):
117
117
blocks = manager .allocate_slots (req0 , 55 ,
118
118
len (computed_blocks .blocks [0 ]) * 16 ,
119
119
computed_blocks )
120
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ]]
120
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], )
121
121
122
122
# Check full block metadata
123
123
parent_block_hash = None
@@ -141,13 +141,13 @@ def test_prefill(hash_algo):
141
141
req1 = make_request ("1" , common_token_ids + unique_token_ids )
142
142
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req1 )
143
143
assert len (manager .req_to_block_hashes [req1 .request_id ]) == 3
144
- assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ]]
144
+ assert computed_blocks .get_block_ids () == ([ 1 , 2 , 3 ], )
145
145
assert num_computed_tokens == 3 * 16
146
146
num_new_tokens = 53 - 3 * 16
147
147
blocks = manager .allocate_slots (req1 , num_new_tokens ,
148
148
len (computed_blocks .blocks [0 ]) * 16 ,
149
149
computed_blocks )
150
- assert blocks .get_block_ids () == [[ 5 ]]
150
+ assert blocks .get_block_ids () == ([ 5 ], )
151
151
for block in computed_blocks .blocks [0 ]:
152
152
assert block .ref_cnt == 2
153
153
@@ -175,13 +175,13 @@ def test_prefill(hash_algo):
175
175
req2 = make_request ("2" , common_token_ids + unique_token_ids )
176
176
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req2 )
177
177
assert len (manager .req_to_block_hashes [req2 .request_id ]) == 3
178
- assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ]]
178
+ assert computed_blocks .get_block_ids () == ([ 1 , 2 , 3 ], )
179
179
assert num_computed_tokens == 3 * 16
180
180
num_new_tokens = 53 - 3 * 16
181
181
blocks = manager .allocate_slots (req2 , num_new_tokens ,
182
182
len (computed_blocks .blocks [0 ]) * 16 ,
183
183
computed_blocks )
184
- assert blocks .get_block_ids () == [[ 6 ]]
184
+ assert blocks .get_block_ids () == ([ 6 ], )
185
185
186
186
# Although we only have 6 free blocks, we have 8 blocks in
187
187
# the free block queue due to lazy removal.
@@ -205,7 +205,7 @@ def test_prefill(hash_algo):
205
205
len (computed_blocks .blocks [0 ]) * 16 ,
206
206
computed_blocks )
207
207
# This block ID order also checks the eviction order.
208
- assert blocks .get_block_ids () == [[ 7 , 8 , 9 , 10 , 4 , 5 , 6 , 3 , 2 , 1 ]]
208
+ assert blocks .get_block_ids () == ([ 7 , 8 , 9 , 10 , 4 , 5 , 6 , 3 , 2 , 1 ], )
209
209
assert manager .block_pool .free_block_queue .num_free_blocks == 0
210
210
assert manager .block_pool .free_block_queue .free_list_head is None
211
211
assert manager .block_pool .free_block_queue .free_list_tail is None
@@ -236,8 +236,8 @@ def test_prefill_hybrid_model():
236
236
blocks = manager .allocate_slots (req0 , 55 ,
237
237
len (computed_blocks .blocks [0 ]) * 16 ,
238
238
computed_blocks )
239
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ], [5 , 6 , 7 , 8 ] ,
240
- [9 , 10 , 11 , 12 ]]
239
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], [5 , 6 , 7 ,
240
+ 8 ], [9 , 10 , 11 , 12 ])
241
241
242
242
# Check full block metadata
243
243
parent_block_hash = None
@@ -263,14 +263,14 @@ def test_prefill_hybrid_model():
263
263
req1 = make_request ("1" , common_token_ids + unique_token_ids )
264
264
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req1 )
265
265
assert len (manager .req_to_block_hashes [req1 .request_id ]) == 3
266
- assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ], [0 , 6 , 7 ] ,
267
- [0 , 10 , 11 ]]
266
+ assert computed_blocks .get_block_ids () == ([ 1 , 2 , 3 ], [0 , 6 ,
267
+ 7 ], [0 , 10 , 11 ])
268
268
assert num_computed_tokens == 3 * 16
269
269
num_new_tokens = 53 - 3 * 16
270
270
blocks = manager .allocate_slots (req1 , num_new_tokens ,
271
271
len (computed_blocks .blocks [0 ]) * 16 ,
272
272
computed_blocks )
273
- assert blocks .get_block_ids () == [[ 13 ], [14 ], [15 ]]
273
+ assert blocks .get_block_ids () == ([ 13 ], [14 ], [15 ])
274
274
for block_per_group in computed_blocks .blocks :
275
275
for block in block_per_group :
276
276
if block != manager .block_pool .null_block :
@@ -374,7 +374,7 @@ def test_prefill_plp():
374
374
blocks = manager .allocate_slots (req0 , 55 ,
375
375
len (computed_blocks .blocks [0 ]) * 16 ,
376
376
computed_blocks )
377
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ]]
377
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], )
378
378
req0_block_hashes = [b .block_hash for b in blocks .blocks [0 ]]
379
379
380
380
# Check full block metadata
@@ -400,13 +400,13 @@ def test_prefill_plp():
400
400
req1 = make_request ("1" , common_token_ids + unique_token_ids )
401
401
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req1 )
402
402
assert len (manager .req_to_block_hashes [req1 .request_id ]) == 3
403
- assert computed_blocks .get_block_ids () == [[ 1 , 2 , 3 ]]
403
+ assert computed_blocks .get_block_ids () == ([ 1 , 2 , 3 ], )
404
404
assert num_computed_tokens == 3 * 16
405
405
num_new_tokens = 53 - 3 * 16
406
406
blocks = manager .allocate_slots (req1 , num_new_tokens ,
407
407
len (computed_blocks .blocks [0 ]) * 16 ,
408
408
computed_blocks )
409
- assert blocks .get_block_ids () == [[ 5 ]]
409
+ assert blocks .get_block_ids () == ([ 5 ], )
410
410
for block in computed_blocks .blocks [0 ]:
411
411
assert block .ref_cnt == 2
412
412
@@ -444,7 +444,7 @@ def test_prefill_plp():
444
444
block_ids = blocks .get_block_ids ()
445
445
# Duplicate cached blocks have different ids but same hashes vs request #0
446
446
assert [b .block_hash for b in blocks .blocks [0 ]] == req0_block_hashes
447
- assert block_ids != [[ 1 , 2 , 3 , 4 ]]
447
+ assert block_ids != ([ 1 , 2 , 3 , 4 ], )
448
448
449
449
# Request #2 block hashes are valid since request #0 hashes are.
450
450
# Check block reference counts.
@@ -474,7 +474,7 @@ def test_decode():
474
474
blocks = manager .allocate_slots (req0 , 55 ,
475
475
len (computed_blocks .blocks [0 ]) * 16 ,
476
476
computed_blocks )
477
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ]]
477
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], )
478
478
479
479
# Append slots without allocating a new block.
480
480
req0 .num_computed_tokens = 55
@@ -546,12 +546,12 @@ def test_evict():
546
546
# Touch the first 2 blocks.
547
547
req2 = make_request ("2" , list (range (2 * 16 + 3 )))
548
548
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req2 )
549
- assert computed_blocks .get_block_ids () == [[ 1 , 2 ]]
549
+ assert computed_blocks .get_block_ids () == ([ 1 , 2 ], )
550
550
assert num_computed_tokens == 2 * 16
551
551
blocks = manager .allocate_slots (req2 , 3 ,
552
552
len (computed_blocks .blocks [0 ]) * 16 ,
553
553
computed_blocks )
554
- assert blocks .get_block_ids () == [[ 10 ]]
554
+ assert blocks .get_block_ids () == ([ 10 ], )
555
555
assert manager .block_pool .free_block_queue .num_free_blocks == 7
556
556
557
557
@@ -865,7 +865,7 @@ def test_mm_prefix_caching():
865
865
blocks = manager .allocate_slots (req0 , 59 ,
866
866
len (computed_blocks .blocks [0 ]) * 16 ,
867
867
computed_blocks )
868
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ]]
868
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], )
869
869
req0 .num_computed_tokens = 59
870
870
871
871
# Append slots without allocating a new block.
@@ -926,7 +926,7 @@ def test_cache_key_salting():
926
926
blocks = manager .allocate_slots (req0 , 59 ,
927
927
len (computed_blocks .blocks [0 ]) * 16 ,
928
928
computed_blocks )
929
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ]]
929
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], )
930
930
req0 .num_computed_tokens = 59
931
931
932
932
# Append slots without allocating a new block.
@@ -1042,7 +1042,7 @@ def test_reset_prefix_cache():
1042
1042
all_token_ids = full_block_token_ids + unique_token_ids
1043
1043
req0 = make_request ("0" , all_token_ids )
1044
1044
blocks = manager .allocate_slots (req0 , 55 )
1045
- assert blocks .get_block_ids () == [[ 1 , 2 , 3 , 4 ]]
1045
+ assert blocks .get_block_ids () == ([ 1 , 2 , 3 , 4 ], )
1046
1046
1047
1047
unique_token_ids = [4 ] * 7
1048
1048
all_token_ids = full_block_token_ids + unique_token_ids
@@ -1053,7 +1053,7 @@ def test_reset_prefix_cache():
1053
1053
blocks = manager .allocate_slots (req1 , 7 ,
1054
1054
len (computed_blocks .blocks [0 ]) * 16 ,
1055
1055
computed_blocks )
1056
- assert blocks .get_block_ids () == [[ 5 ]]
1056
+ assert blocks .get_block_ids () == ([ 5 ], )
1057
1057
1058
1058
# Failed to reset prefix cache because some blocks are not freed yet.
1059
1059
assert not manager .reset_prefix_cache ()
0 commit comments