File tree Expand file tree Collapse file tree 1 file changed +14
-15
lines changed
vllm/v1/attention/backends Expand file tree Collapse file tree 1 file changed +14
-15
lines changed Original file line number Diff line number Diff line change @@ -253,21 +253,20 @@ def __init__(
253
253
self .force_prefill_decode_attn = \
254
254
envs .VLLM_V1_USE_PREFILL_DECODE_ATTENTION
255
255
256
- if not self .force_prefill_decode_attn :
257
- # If not using prefill decode attention, we use the Triton
258
- # unified attention implementation.
259
- if use_aiter_unified_attention ():
260
- logger .info_once (
261
- "Using aiter unified attention for TritonAttentionImpl" )
262
- from aiter .ops .triton .unified_attention import (
263
- unified_attention )
264
- self .unified_attention = unified_attention
265
- else :
266
- logger .info_once (
267
- "Using vllm unified attention for TritonAttentionImpl" )
268
- from vllm .attention .ops .triton_unified_attention import (
269
- unified_attention )
270
- self .unified_attention = unified_attention
256
+ # If not using prefill decode attention, we use the Triton
257
+ # unified attention implementation.
258
+ if use_aiter_unified_attention ():
259
+ logger .info_once (
260
+ "Using aiter unified attention for TritonAttentionImpl" )
261
+ from aiter .ops .triton .unified_attention import (
262
+ unified_attention )
263
+ self .unified_attention = unified_attention
264
+ elif not self .force_prefill_decode_attn :
265
+ logger .info_once (
266
+ "Using vllm unified attention for TritonAttentionImpl" )
267
+ from vllm .attention .ops .triton_unified_attention import (
268
+ unified_attention )
269
+ self .unified_attention = unified_attention
271
270
272
271
self .sinks = sinks
273
272
if sinks is not None :
You can’t perform that action at this time.
0 commit comments