Cast bn to int64 to avoid integer overflow

rasmith · micah-wil · commit 5929779ff933 · 2025-09-02T19:52:29.000Z
Signed-off-by: Randall Smith &lt;Randall.Smith@amd.com&gt;
diff --git a/vllm/attention/ops/prefix_prefill.py b/vllm/attention/ops/prefix_prefill.py
@@ -151,7 +151,7 @@ def _fwd_kernel(Q,
         start_n = tl.multiple_of(start_n, BLOCK_SIZE)
         # -- compute qk ----
         bn = tl.load(B_Loc + cur_batch * stride_b_loc_b +
-                     (start_n // BLOCK_SIZE) * stride_b_loc_s)
+                     (start_n // BLOCK_SIZE) * stride_b_loc_s).to(tl.int64)
         # [D,BLOCK_SIZE]
         off_k = (
             bn[None, :] * stride_k_cache_bs + cur_kv_head * stride_k_cache_h +