Skip to content

Commit 8f9664d

Browse files
committed
comments
Signed-off-by: Sage Moore <[email protected]>
1 parent f1dbffb commit 8f9664d

File tree

1 file changed

+1
-16
lines changed

1 file changed

+1
-16
lines changed

vllm/attention/backends/mla/common.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,7 +1282,6 @@ def _compute_prefill_context(
12821282
assert prefill_metadata.context_chunk_max_seq_lens is not None
12831283
assert prefill_metadata.context_lens_tensor is not None
12841284

1285-
has_context = prefill_metadata.context_lens_tensor.max() > 0
12861285
output = None
12871286
iters = len(prefill_metadata.context_chunk_seq_tot)
12881287

@@ -1323,21 +1322,7 @@ def _compute_prefill_context(
13231322
[0, q.shape[-1] - v.shape[-1]],
13241323
value=0)
13251324

1326-
if is_hip and envs.VLLM_USE_TRITON_FLASH_ATTN and not has_context:
1327-
attn_output, attn_softmax_lse = self.triton_fa_func(
1328-
q,
1329-
k,
1330-
v_padded,
1331-
None,
1332-
prefill_metadata.query_start_loc,
1333-
prefill_metadata.context_chunk_cu_seq_lens[i],
1334-
prefill_metadata.max_query_len,
1335-
prefill_metadata.context_chunk_max_seq_lens[i],
1336-
False, # causal
1337-
self.scale,
1338-
None, # attn_mask is None unless applying ALiBi mask
1339-
)
1340-
elif is_vllm_fa:
1325+
if is_vllm_fa:
13411326
attn_output, attn_softmax_lse = self.flash_attn_varlen_func(
13421327
q=q,
13431328
k=k,

0 commit comments

Comments
 (0)