Skip to content

Commit b193a40

Browse files
author
Aleksandr Malyshev
committed
updated logic for attn selection with default split attn
1 parent 0f826a6 commit b193a40

File tree

1 file changed

+14
-15
lines changed

1 file changed

+14
-15
lines changed

vllm/v1/attention/backends/triton_attn.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -253,21 +253,20 @@ def __init__(
253253
self.force_prefill_decode_attn = \
254254
envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION
255255

256-
if not self.force_prefill_decode_attn:
257-
# If not using prefill decode attention, we use the Triton
258-
# unified attention implementation.
259-
if use_aiter_unified_attention():
260-
logger.info_once(
261-
"Using aiter unified attention for TritonAttentionImpl")
262-
from aiter.ops.triton.unified_attention import (
263-
unified_attention)
264-
self.unified_attention = unified_attention
265-
else:
266-
logger.info_once(
267-
"Using vllm unified attention for TritonAttentionImpl")
268-
from vllm.attention.ops.triton_unified_attention import (
269-
unified_attention)
270-
self.unified_attention = unified_attention
256+
# If not using prefill decode attention, we use the Triton
257+
# unified attention implementation.
258+
if use_aiter_unified_attention():
259+
logger.info_once(
260+
"Using aiter unified attention for TritonAttentionImpl")
261+
from aiter.ops.triton.unified_attention import (
262+
unified_attention)
263+
self.unified_attention = unified_attention
264+
elif not self.force_prefill_decode_attn:
265+
logger.info_once(
266+
"Using vllm unified attention for TritonAttentionImpl")
267+
from vllm.attention.ops.triton_unified_attention import (
268+
unified_attention)
269+
self.unified_attention = unified_attention
271270

272271
self.sinks = sinks
273272
if sinks is not None:

0 commit comments

Comments
 (0)