[Fix] fix sdpa_math with scale input (#4993) (#5002)

guanbaoy · guangyey · web-flow · commit c814ead02975 · 2024-11-07T22:27:20.000+08:00
* fix sdpa_math with scale input

* add ut

* fix flake8

---------

Co-authored-by: guangyey &lt;guangye.yu@intel.com&gt;
diff --git a/csrc/gpu/aten/operators/transformers/sdp_utils.h b/csrc/gpu/aten/operators/transformers/sdp_utils.h
@@ -61,7 +61,7 @@ inline c10::SymFloat calculate_scale(
     const at::Tensor& query,
     c10::optional<double> scale) {
   const auto softmax_scale = scale.has_value()
-      ? scale.value()
+      ? (c10::SymFloat(1.0) / scale.value())
       : c10::SymFloat(query.sym_size(-1)).sqrt();
   return c10::SymFloat(softmax_scale);
 }
diff --git a/tests/gpu/examples/test_sdp.py b/tests/gpu/examples/test_sdp.py
@@ -43,3 +43,28 @@ def test_sdp_math_half(self, dtype=torch.float16):
         out_xpu = F.scaled_dot_product_attention(query.xpu(), key.xpu(), value.xpu())
 
         self.assertEqual(out_cpu, out_xpu.cpu().float(), atol=1e-3, rtol=1e-3)
+
+    def test_sdp_math_fp32(self, dtype=torch.float):
+        head_dim = 256
+        seq_lenth = 1
+        k_seq_lenth = 33
+        v_seq_lenth = 33
+        scale = head_dim**-0.5
+        query = torch.rand(1, 16, seq_lenth, head_dim, dtype=dtype)
+        key = torch.rand(1, 16, k_seq_lenth, head_dim, dtype=dtype)
+        value = torch.rand(1, 16, v_seq_lenth, head_dim, dtype=dtype)
+
+        out_cpu = F.scaled_dot_product_attention(
+            query.float(),
+            key.float(),
+            value.float(),
+            scale=scale,
+        )
+        out_xpu = F.scaled_dot_product_attention(
+            query.xpu(),
+            key.xpu(),
+            value.xpu(),
+            scale=scale,
+        )
+
+        self.assertEqual(out_cpu, out_xpu.cpu().float(), atol=1e-3, rtol=1e-3)

Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,7 @@ inline c10::SymFloat calculate_scale(`
`61`	`61`	`const at::Tensor& query,`
`62`	`62`	`c10::optional<double> scale) {`
`63`	`63`	`const auto softmax_scale = scale.has_value()`
`64`		`- ? scale.value()`
	`64`	`+ ? (c10::SymFloat(1.0) / scale.value())`
`65`	`65`	`: c10::SymFloat(query.sym_size(-1)).sqrt();`
`66`	`66`	`return c10::SymFloat(softmax_scale);`
`67`	`67`	`}`