We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e0986ea commit 201c971Copy full SHA for 201c971
vllm/v1/core/kv_cache_utils.py
@@ -585,6 +585,10 @@ def request_block_hasher(request: Request) -> list[BlockHash]:
585
start_token_idx = len(request.block_hashes) * block_size
586
num_tokens = request.num_tokens
587
588
+ if start_token_idx + block_size > num_tokens:
589
+ # Early stop when there no new full blocks created.
590
+ return []
591
+
592
curr_mm_idx = 0
593
if start_token_idx > 0:
594
# Set curr_mm_idx = -1 to indicate the last mm input.
0 commit comments