[Perf][Easy] Early stop in request_block_hasher (#26112)

Jialin · web-flow · commit 201c971e96c1 · 2025-10-05T16:46:03.000+08:00
Signed-off-by: Jialin Ouyang &lt;Jialin.Ouyang@gmail.com&gt;
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
@@ -585,6 +585,10 @@ def request_block_hasher(request: Request) -> list[BlockHash]:
         start_token_idx = len(request.block_hashes) * block_size
         num_tokens = request.num_tokens
 
+        if start_token_idx + block_size > num_tokens:
+            # Early stop when there no new full blocks created.
+            return []
+
         curr_mm_idx = 0
         if start_token_idx > 0:
             # Set curr_mm_idx = -1 to indicate the last mm input.