File tree Expand file tree Collapse file tree 3 files changed +1
-8
lines changed
Expand file tree Collapse file tree 3 files changed +1
-8
lines changed Original file line number Diff line number Diff line change @@ -5,7 +5,6 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req
55numba == 0.61.2; python_version > '3.9'
66
77# Dependencies for AMD GPUs
8- numpy==1.26.4
98boto3
109botocore
1110datasets
Original file line number Diff line number Diff line change @@ -535,7 +535,7 @@ def __init__(
535535 f"Head size { head_size } is not supported by PagedAttention. "
536536 f"Supported head sizes are: { supported_head_sizes } ." )
537537
538- self .use_naive_attn = envs . VLLM_USE_SDPA_ATTENTION # Default False
538+ self .use_naive_attn = False
539539 # NOTE: Allow for switching between Triton and CK. Defaulting to triton.
540540 self .use_triton_flash_attn = envs .VLLM_USE_TRITON_FLASH_ATTN
541541 if self .use_triton_flash_attn :
Original file line number Diff line number Diff line change 1414 VLLM_RINGBUFFER_WARNING_INTERVAL : int = 60
1515 VLLM_NCCL_SO_PATH : Optional [str ] = None
1616 LD_LIBRARY_PATH : Optional [str ] = None
17- VLLM_USE_SDPA_ATTENTION : bool = False
1817 VLLM_USE_TRITON_FLASH_ATTN : bool = True
1918 VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT : bool = True
2019 VLLM_USE_ROCM_FP8_FLASH_ATTN : bool = False
@@ -287,11 +286,6 @@ def get_vllm_port() -> Optional[int]:
287286 "LD_LIBRARY_PATH" :
288287 lambda : os .environ .get ("LD_LIBRARY_PATH" , None ),
289288
290- # flag to control if vllm should use naive scaled dot-product attention
291- "VLLM_USE_SDPA_ATTENTION" :
292- lambda : (os .environ .get ("VLLM_USE_SDPA_ATTENTION" , "False" ).lower () in
293- ("true" , "1" )),
294-
295289 # flag to control if vllm should use triton flash attention
296290 "VLLM_USE_TRITON_FLASH_ATTN" :
297291 lambda : (os .environ .get ("VLLM_USE_TRITON_FLASH_ATTN" , "True" ).lower () in
You can’t perform that action at this time.
0 commit comments