Skip to content

Commit 0e286e3

Browse files
Add FA MLA to CG tests
Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
1 parent f3c1b3c commit 0e286e3

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

tests/compile/piecewise/test_full_cudagraph.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ class BackendConfig:
6161
"cudagraph_mode": "FULL_AND_PIECEWISE",
6262
},
6363
specific_gpu_arch=(9, 0)),
64+
# FlashAttention MLA on Hopper
65+
"FlashAttentionMLA":
66+
BackendConfig(name="FlashAttentionMLA",
67+
env_vars={
68+
"VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA",
69+
},
70+
comp_config={
71+
"cudagraph_mode": "FULL_DECODE_ONLY",
72+
},
73+
specific_gpu_arch=(9, 0)),
6474
# Cutlass MLA on Blackwell
6575
"CutlassMLA":
6676
BackendConfig(
@@ -102,7 +112,7 @@ class BackendConfig:
102112
test_params_full_cudagraph = []
103113

104114
# deepseek-ai/DeepSeek-V2-Lite with MLA
105-
MLA_backends = ["FlashMLA", "CutlassMLA"]
115+
MLA_backends = ["FlashMLA", "FlashAttentionMLA", "CutlassMLA"]
106116
for mla_backend in MLA_backends:
107117
test_params_full_cudagraph.append(
108118
pytest.param(

tests/v1/cudagraph/test_cudagraph_mode.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ class BackendConfig:
6262
"cudagraph_mode": "FULL_AND_PIECEWISE",
6363
},
6464
specific_gpu_arch=(9, 0)),
65+
# FlashAttention MLA on Hopper
66+
"FlashAttentionMLA":
67+
BackendConfig(name="FlashAttentionMLA",
68+
env_vars={
69+
"VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA",
70+
},
71+
comp_config={
72+
"cudagraph_mode": "FULL_DECODE_ONLY",
73+
},
74+
specific_gpu_arch=(9, 0)),
6575
# FA2
6676
"FA2":
6777
BackendConfig(name="FA2",

0 commit comments

Comments
 (0)