PyTorch FA test fix (#370)

Micky774 · web-flow · commit 0018b1bce024 · 2025-12-01T14:23:51.000-05:00
* Corrected bugs

* Updated documentation
diff --git a/tests/pytorch/fused_attn/test_fused_attn.py b/tests/pytorch/fused_attn/test_fused_attn.py
@@ -220,8 +220,8 @@ def test():
         )
         (
             use_flash_attention,
-            use_fused_attention,
             flash_attention_backend,
+            use_fused_attention,
             fused_attention_backend,
             use_unfused_attention,
             available_backends,
@@ -369,6 +369,7 @@ def test_dot_product_attention(
             and config.attn_mask_type in ["causal", "padding_causal"]
         )
         and (config.window_size[0] == -1 or FlashAttentionUtils.v2_3_plus)
+        and not is_mla
     ):
         flash_attn_supported = True
 
diff --git a/transformer_engine/pytorch/attention/dot_product_attention/utils.py b/transformer_engine/pytorch/attention/dot_product_attention/utils.py
@@ -278,6 +278,8 @@ def get_attention_backend(
     ----------
     use_flash_attention: bool
         Whether the `FlashAttention` backend has been selected.
+    flash_attention_backend: PkgVersion
+        If `use_flash_attention = True`, the version of the selected `FlashAttention` backend.
     use_fused_attention: bool
         Whether the `FusedAttention` backend has been selected.
     fused_attention_backend: tex.NVTE_Fused_Attn_Backend

Original file line number	Diff line number	Diff line change
`@@ -220,8 +220,8 @@ def test():`
`220`	`220`	`)`
`221`	`221`	`(`
`222`	`222`	`use_flash_attention,`
`223`		`- use_fused_attention,`
`224`	`223`	`flash_attention_backend,`
	`224`	`+ use_fused_attention,`
`225`	`225`	`fused_attention_backend,`
`226`	`226`	`use_unfused_attention,`
`227`	`227`	`available_backends,`
`@@ -369,6 +369,7 @@ def test_dot_product_attention(`
`369`	`369`	`and config.attn_mask_type in ["causal", "padding_causal"]`
`370`	`370`	`)`
`371`	`371`	`and (config.window_size[0] == -1 or FlashAttentionUtils.v2_3_plus)`
	`372`	`+ and not is_mla`
`372`	`373`	`):`
`373`	`374`	`flash_attn_supported = True`
`374`	`375`