Skip to content

Commit 2435ea7

Browse files
authored
[Bugfix] Make condition in triton kernel constexpr (#22370)
Signed-off-by: Gregory Shtrasberg <[email protected]>
1 parent 4a6b72c commit 2435ea7

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

vllm/attention/ops/chunked_prefill_paged_decode.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def kernel_paged_attention_2d(
6060
stride_v_cache_3: tl.int64, # int
6161
filter_by_query_len: tl.constexpr, # bool
6262
query_start_len_ptr, # [num_seqs+1]
63+
USE_SINKS: tl.constexpr, # bool
6364
):
6465
seq_idx = tl.program_id(0)
6566
kv_head_idx = tl.program_id(1)
@@ -96,7 +97,7 @@ def kernel_paged_attention_2d(
9697

9798
block_table_offset = seq_idx * block_table_stride
9899

99-
if sink_ptr is None:
100+
if not USE_SINKS:
100101
M = tl.full([num_queries_per_kv_padded],
101102
float("-inf"),
102103
dtype=tl.float32)
@@ -386,4 +387,5 @@ def chunked_prefill_paged_decode(
386387
stride_v_cache_3=value_cache.stride(3),
387388
filter_by_query_len=True,
388389
query_start_len_ptr=query_start_loc,
390+
USE_SINKS=sinks is not None,
389391
)

vllm/attention/ops/prefix_prefill.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def _fwd_kernel(Q,
8181
num_unroll_cache: tl.constexpr,
8282
num_unroll_request: tl.constexpr,
8383
SKIP_DECODE: tl.constexpr,
84+
USE_SINKS: tl.constexpr,
8485
MAX_Q_LEN: tl.constexpr = 0,
8586
MAX_CTX_LEN: tl.constexpr = 0):
8687

@@ -127,7 +128,7 @@ def _fwd_kernel(Q,
127128
other=0.0) # [M,D]
128129

129130
# initialize pointer to m and l
130-
if sink_ptr is None:
131+
if not USE_SINKS:
131132
m_i = tl.full([BLOCK_M], float("-inf"), dtype=tl.float32)
132133
else:
133134
m_i = tl.load(
@@ -910,5 +911,6 @@ def context_attention_fwd(q,
910911
num_unroll_request=1,
911912
num_warps=4,
912913
num_stages=1,
914+
USE_SINKS=sinks is not None,
913915
**extra_kargs)
914916
return

0 commit comments

Comments
 (0)