File tree Expand file tree Collapse file tree 1 file changed +1
-16
lines changed
vllm/attention/backends/mla Expand file tree Collapse file tree 1 file changed +1
-16
lines changed Original file line number Diff line number Diff line change @@ -1282,7 +1282,6 @@ def _compute_prefill_context(
12821282 assert prefill_metadata .context_chunk_max_seq_lens is not None
12831283 assert prefill_metadata .context_lens_tensor is not None
12841284
1285- has_context = prefill_metadata .context_lens_tensor .max () > 0
12861285 output = None
12871286 iters = len (prefill_metadata .context_chunk_seq_tot )
12881287
@@ -1323,21 +1322,7 @@ def _compute_prefill_context(
13231322 [0 , q .shape [- 1 ] - v .shape [- 1 ]],
13241323 value = 0 )
13251324
1326- if is_hip and envs .VLLM_USE_TRITON_FLASH_ATTN and not has_context :
1327- attn_output , attn_softmax_lse = self .triton_fa_func (
1328- q ,
1329- k ,
1330- v_padded ,
1331- None ,
1332- prefill_metadata .query_start_loc ,
1333- prefill_metadata .context_chunk_cu_seq_lens [i ],
1334- prefill_metadata .max_query_len ,
1335- prefill_metadata .context_chunk_max_seq_lens [i ],
1336- False , # causal
1337- self .scale ,
1338- None , # attn_mask is None unless applying ALiBi mask
1339- )
1340- elif is_vllm_fa :
1325+ if is_vllm_fa :
13411326 attn_output , attn_softmax_lse = self .flash_attn_varlen_func (
13421327 q = q ,
13431328 k = k ,
You can’t perform that action at this time.
0 commit comments