[AMD][GLUON] Fix none scale value for wmma/mfma (#8427)

borontion · web-flow · commit 7ec17cd37730 · 2025-10-10T18:21:26.000-07:00
Unwrap None from constexpr for wmma/mfma scaled. Add corresponding
frontend test.
diff --git a/python/test/gluon/test_frontend.py b/python/test/gluon/test_frontend.py
@@ -2445,6 +2445,29 @@ def kernel():
 """)
 
 
+@pytest.mark.parametrize("target", [HIP_TARGET_CDNA4])
+def test_amd_mfma_scaled_none(target):
+
+    @gluon.jit
+    def kernel():
+        mfma_layout: ttgl.constexpr = ttgl.amd.AMDMFMALayout(4, [16, 16, 128], True, [1, 1])
+        scale_layout: ttgl.constexpr = ttgl.DistributedLinearLayout([],
+                                                                    [[1, 0], [2, 0], [4, 0], [8, 0], [0, 1], [0, 2]],
+                                                                    [], [], [16, 4])
+
+        a = ttgl.full([16, 64], 0x11, ttgl.uint8, ttgl.DotOperandLayout(0, mfma_layout, 16))
+        b = ttgl.full([64, 16], 0x22, ttgl.uint8, ttgl.DotOperandLayout(1, mfma_layout, 16))
+
+        b_scale = ttgl.full([16, 4], 0x01, ttgl.uint8, scale_layout)
+        acc = ttgl.full([16, 16], 0, ttgl.float32, mfma_layout)
+        ttgl.amd.cdna4.mfma_scaled(a, None, 'e2m1', b, b_scale, 'e2m1', acc)
+
+    with pytest.raises(CompilationError) as e:
+        run_parser(kernel, target=target)
+
+    assert "Scales must not be None" in str(e.value)
+
+
 @pytest.mark.parametrize("target", [HIP_TARGET_GFX1250])
 def test_amd_wmma_scaled(target):
 
@@ -2497,6 +2520,32 @@ def kernel():
 """)
 
 
+@pytest.mark.parametrize("target", [HIP_TARGET_GFX1250])
+def test_amd_wmma_scaled_none(target):
+
+    @gluon.jit
+    def kernel():
+        wmma_layout: ttgl.constexpr = ttgl.amd.AMDWMMALayout(3, True, [1, 1], [16, 16, 128])
+        wmma_layout_packed: ttgl.constexpr = ttgl.amd.AMDWMMALayout(3, True, [1, 1], [16, 16, 64])
+        scale_layout: ttgl.constexpr = ttgl.DistributedLinearLayout([[0, 1], [0, 2]],
+                                                                    [[1, 0], [2, 0], [4, 0], [8, 0], [0, 0]], [], [],
+                                                                    [16, 4])
+        a_layout: ttgl.constexpr = ttgl.DotOperandLayout(0, wmma_layout_packed, 16)
+        b_layout: ttgl.constexpr = ttgl.DotOperandLayout(1, wmma_layout_packed, 16)
+
+        a = ttgl.full([16, 64], 0x11, ttgl.uint8, a_layout)
+        b = ttgl.full([64, 16], 0x22, ttgl.uint8, b_layout)
+        b_scale = ttgl.full([16, 4], 0x01, ttgl.uint8, scale_layout)
+        acc = ttgl.full([16, 16], 0, ttgl.float32, wmma_layout)
+
+        ttgl.amd.gfx1250.wmma_scaled(a, None, 'e2m1', b, b_scale, 'e2m1', acc)
+
+    with pytest.raises(CompilationError) as e:
+        run_parser(kernel, target=target)
+
+    assert "Scales must not be None" in str(e.value)
+
+
 @gluon.jit
 def padded_shared_layout_kernel():
     shape: ttgl.constexpr = [64, 64]
diff --git a/python/triton/experimental/gluon/language/amd/cdna4/__init__.py b/python/triton/experimental/gluon/language/amd/cdna4/__init__.py
@@ -1,5 +1,5 @@
 from triton.experimental.gluon.language import _core as ttgl
-from ..._core import builtin, float32
+from ..._core import builtin, float32, _unwrap_if_constexpr
 from ..._layouts import DotOperandLayout
 from .._layouts import AMDMFMALayout
 from ..cdna3 import _buffer_atomic_rmw_impl
@@ -43,6 +43,8 @@ def mfma_scaled(a, a_scale, a_format, b, b_scale, b_format, acc, _semantic=None)
     assert a_format.value in {"e2m1", "e4m3", "e5m2"}, f"Unsupported lhs_format: {a_format.value}"
     assert b_format.value in {"e2m1", "e4m3", "e5m2"}, f"Unsupported rhs_format: {b_format.value}"
 
+    a_scale = _unwrap_if_constexpr(a_scale)
+    b_scale = _unwrap_if_constexpr(b_scale)
     assert a_scale is not None and b_scale is not None, "Scales must not be None"
 
     tensor = _semantic.dot_scaled(a, a_scale, a_format, b, b_scale, b_format, acc, False, True, True, float32)
diff --git a/python/triton/experimental/gluon/language/amd/gfx1250/__init__.py b/python/triton/experimental/gluon/language/amd/gfx1250/__init__.py
@@ -1,8 +1,6 @@
-from ..._core import builtin
+from ..._core import builtin, _unwrap_if_constexpr
 from .._ops import _wmma, _verify_wmma
 from triton.experimental.gluon.language import _core as ttgl
-from triton.experimental.gluon.language._semantic import _check
-from ..._layouts import DotOperandLayout
 from .._layouts import AMDWMMALayout
 from . import tdm
 
@@ -61,6 +59,8 @@ def wmma_scaled(a, a_scale, a_format, b, b_scale, b_format, acc, _semantic=None)
     assert a_format.value in {"e2m1", "e4m3", "e5m2"}, f"Unsupported lhs_format: {a_format.value}"
     assert b_format.value in {"e2m1", "e4m3", "e5m2"}, f"Unsupported rhs_format: {b_format.value}"
 
+    a_scale = _unwrap_if_constexpr(a_scale)
+    b_scale = _unwrap_if_constexpr(b_scale)
     assert a_scale is not None and b_scale is not None, "Scales must not be None"
 
     handle = _semantic.dot_scaled(a, a_scale, a_format, b, b_scale, b_format, acc, fast_math=False, lhs_k_pack=True,