Address PR feedback

gramalingam · gramalingam · commit aeee0faff4fa · 2025-11-07T08:54:24.000-08:00
Signed-off-by: Ganesan Ramalingam &lt;grama@microsoft.com&gt;
diff --git a/onnxscript/rewriter/ort_fusions/sdpa.py b/onnxscript/rewriter/ort_fusions/sdpa.py
@@ -12,6 +12,18 @@
 
 Dim = Union[int, ir.SymbolicDim]
 
+# This file contains a fusion rule that recognizes various patterns of scaled dot-product attention
+# (SDPA) implementations and replaces them with a single SDPA op. The SDPA op is a temporary fusion
+# op defined in the ai.onnxruntime._fusion domain. Subsequent fusion rules will map it into one
+# of the various ops defined in ORT: MHA, GQA, or Attention depending on the input patterns.
+# The SDPA is a standard scalar dot-product attention with an optional mask input and scaling factor.
+# Currently, it is restricted to query, key, and values of rank 4 with shapes:
+#   Query: [batch_size, num_heads, seq_len, head_size_qk]
+#   Key:   [batch_size, num_heads, seq_len_kv, head_size_qk]
+#          or [batch_size, seq_len_kv, num_heads, head_size_qk])
+#   Value: [batch_size, num_heads, seq_len_kv, head_size_v]
+# The key_format attribute indicates which of the two formats the key uses and can be either "BHSd" or "BSHd".
+
 
 class SDPA(pattern.RewriteRuleClassBase):
     _scale: float | None
diff --git a/onnxscript/rewriter/ort_fusions/sdpa_via_mha.py b/onnxscript/rewriter/ort_fusions/sdpa_via_mha.py
@@ -14,6 +14,7 @@
 
 class SDPAImplementation(pattern.RewriteRuleClassBase):
     def pattern(self, op, query, key, value, key_format):
+        """Pattern matches any call to SDPA. See sdpa.py for documentation on the SDPA op."""
         return op.SDPA(
             query,
             key,