File tree Expand file tree Collapse file tree 2 files changed +3
-17
lines changed
Expand file tree Collapse file tree 2 files changed +3
-17
lines changed Original file line number Diff line number Diff line change @@ -1327,21 +1327,7 @@ def _compute_prefill_context(
13271327 [0 , q .shape [- 1 ] - v .shape [- 1 ]],
13281328 value = 0 )
13291329
1330- if is_hip and envs .VLLM_USE_TRITON_FLASH_ATTN :
1331- attn_output , attn_softmax_lse = self .triton_fa_func (
1332- q ,
1333- k ,
1334- v_padded ,
1335- None ,
1336- prefill_metadata .query_start_loc ,
1337- prefill_metadata .context_chunk_cu_seq_lens [i ],
1338- prefill_metadata .max_query_len ,
1339- prefill_metadata .context_chunk_max_seq_lens [i ],
1340- False , # causal
1341- self .scale ,
1342- None , # attn_mask is None unless applying ALiBi mask
1343- )
1344- elif is_vllm_fa :
1330+ if is_vllm_fa :
13451331 attn_output , attn_softmax_lse = self .flash_attn_varlen_func (
13461332 q = q ,
13471333 k = k ,
Original file line number Diff line number Diff line change @@ -3474,9 +3474,9 @@ def __post_init__(self):
34743474 self .compilation_config .level = CompilationLevel .NO_COMPILATION
34753475
34763476 if self .model_config and self .model_config .use_mla and \
3477- not current_platform .is_cuda ():
3477+ not ( current_platform .is_cuda () or current_platform . is_rocm () ):
34783478 logger .info (
3479- "MLA is enabled on a non-cuda platform; forcing chunked "
3479+ "MLA is enabled on a non-GPU platform; forcing chunked "
34803480 "prefill and prefix caching to be disabled." )
34813481 self .scheduler_config .enable_chunked_prefill = False
34823482 self .scheduler_config .chunked_prefill_enabled = False
You can’t perform that action at this time.
0 commit comments