Skip to content

Commit aee731f

Browse files
committed
cleanup
1 parent ab92741 commit aee731f

File tree

3 files changed

+1
-8
lines changed

3 files changed

+1
-8
lines changed

requirements/rocm.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req
55
numba == 0.61.2; python_version > '3.9'
66

77
# Dependencies for AMD GPUs
8-
numpy==1.26.4
98
boto3
109
botocore
1110
datasets

vllm/attention/backends/rocm_flash_attn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ def __init__(
535535
f"Head size {head_size} is not supported by PagedAttention. "
536536
f"Supported head sizes are: {supported_head_sizes}.")
537537

538-
self.use_naive_attn = envs.VLLM_USE_SDPA_ATTENTION # Default False
538+
self.use_naive_attn = False
539539
# NOTE: Allow for switching between Triton and CK. Defaulting to triton.
540540
self.use_triton_flash_attn = envs.VLLM_USE_TRITON_FLASH_ATTN
541541
if self.use_triton_flash_attn:

vllm/envs.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
VLLM_RINGBUFFER_WARNING_INTERVAL: int = 60
1515
VLLM_NCCL_SO_PATH: Optional[str] = None
1616
LD_LIBRARY_PATH: Optional[str] = None
17-
VLLM_USE_SDPA_ATTENTION: bool = False
1817
VLLM_USE_TRITON_FLASH_ATTN: bool = True
1918
VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT: bool = True
2019
VLLM_USE_ROCM_FP8_FLASH_ATTN: bool = False
@@ -287,11 +286,6 @@ def get_vllm_port() -> Optional[int]:
287286
"LD_LIBRARY_PATH":
288287
lambda: os.environ.get("LD_LIBRARY_PATH", None),
289288

290-
# flag to control if vllm should use naive scaled dot-product attention
291-
"VLLM_USE_SDPA_ATTENTION":
292-
lambda: (os.environ.get("VLLM_USE_SDPA_ATTENTION", "False").lower() in
293-
("true", "1")),
294-
295289
# flag to control if vllm should use triton flash attention
296290
"VLLM_USE_TRITON_FLASH_ATTN":
297291
lambda: (os.environ.get("VLLM_USE_TRITON_FLASH_ATTN", "True").lower() in

0 commit comments

Comments
 (0)