Default sm_scale to None (#344) (#344)

xuzhao9 · web-flow · commit 16eddff86d6c · 2025-08-21T17:22:20.000-04:00
Summary: To make the behavior consistent with FA4: https://www.internalfb.com/code/fbsource/fbcode/ai_codesign/gen_ai/flash_attention_v2/benchmarks/benchmark_attn.py In FA4 benchmark, sm_scale defaults to None, which further translates to ` 1.0 / math.sqrt(head_dim)`: https://www.internalfb.com/code/fbsource/[f03d88961ee4ecd3f4ee76736d7de904351d295c]/fbcode/ai_codesign/gen_ai/flash_attention_v2/flash_attn/cute/interface.py?lines=112 whereas Tritonbench tries to pin the sm_scale to 1.3. Change Tritonbench behavior to set sm_scale to None by default. Reviewed By: njriasan, jduprat Differential Revision: D80652124
diff --git a/tritonbench/operators/blackwell_attentions/operator.py b/tritonbench/operators/blackwell_attentions/operator.py
@@ -6,6 +6,7 @@
 
 
 import argparse
+import math
 import os
 from contextlib import nullcontext
 
@@ -109,6 +110,9 @@ def parse_op_args(args: List[str]):
     parser.add_argument(
         "--pt2-sdpa", action="store_true", help="Compile SDPA with PT2."
     )
+    parser.add_argument(
+        "--sm-scale", type=Optional[float], default=None, help="softmax scale"
+    )
     parser.add_argument(
         "--input-types",
         type=str,
@@ -138,7 +142,7 @@ def __init__(
         self.native_sdpa = args.native_sdpa
         self.pt2_sdpa = args.pt2_sdpa
         self.input_types = args.input_types
-        self.sm_scale = 1.3
+        self.sm_scale = args.sm_scale if args.sm_scale else 1.0 / math.sqrt(self.D_HEAD)
 
     @register_benchmark()
     def aten(