We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bfe0b20 commit 5929779Copy full SHA for 5929779
vllm/attention/ops/prefix_prefill.py
@@ -151,7 +151,7 @@ def _fwd_kernel(Q,
151
start_n = tl.multiple_of(start_n, BLOCK_SIZE)
152
# -- compute qk ----
153
bn = tl.load(B_Loc + cur_batch * stride_b_loc_b +
154
- (start_n // BLOCK_SIZE) * stride_b_loc_s)
+ (start_n // BLOCK_SIZE) * stride_b_loc_s).to(tl.int64)
155
# [D,BLOCK_SIZE]
156
off_k = (
157
bn[None, :] * stride_k_cache_bs + cur_kv_head * stride_k_cache_h +
0 commit comments