[AMD][GLUON] Expose buffer ops to gfx1250 (#8532)

borontion · web-flow · commit 11af53c4b47b · 2025-10-24T17:20:23.000-07:00
Expose `buffer_load` and `buffer_store`, inherited from CDNA3,
to gfx1250.
diff --git a/python/test/gluon/test_frontend.py b/python/test/gluon/test_frontend.py
@@ -2208,11 +2208,10 @@ def buffer_load_store_kernel(x, y):
     ttgl.amd.cdna4.buffer_store(stored_value=a, ptr=y, offsets=offsets, mask=mask, cache='.cs')
 
 
-@pytest.mark.parametrize("target", [HIP_TARGET_CDNA3, HIP_TARGET_CDNA4])
-def test_buffer_load_store(target):
+def test_buffer_load_store():
     x = MockTensor(ttgl.float32)
     y = MockTensor(ttgl.float32)
-    module = run_parser(buffer_load_store_kernel, *make_args(x, y), target=target)
+    module = run_parser(buffer_load_store_kernel, *make_args(x, y), target=HIP_TARGET_CDNA3)
 
     expecttest.assert_expected_inline(
         anonymize_ir(module.str_nodebug()), """\
@@ -2257,11 +2256,10 @@ def buffer_load_store_with_broadcast_kernel(x, y):
     ttgl.amd.cdna3.buffer_store(stored_value=a, ptr=y, offsets=offsets, mask=mask, cache='.cs')
 
 
-@pytest.mark.parametrize("target", [HIP_TARGET_CDNA3, HIP_TARGET_CDNA4])
-def test_buffer_load_store_with_broadcast(target):
+def test_buffer_load_store_with_broadcast():
     x = MockTensor(ttgl.float16)
     y = MockTensor(ttgl.float16)
-    module = run_parser(buffer_load_store_with_broadcast_kernel, *make_args(x, y), target=target)
+    module = run_parser(buffer_load_store_with_broadcast_kernel, *make_args(x, y), target=HIP_TARGET_CDNA3)
 
     expecttest.assert_expected_inline(
         anonymize_ir(module.str_nodebug()), """\
diff --git a/python/triton/experimental/gluon/language/amd/gfx1250/__init__.py b/python/triton/experimental/gluon/language/amd/gfx1250/__init__.py
@@ -1,10 +1,10 @@
 from ..._core import builtin, _unwrap_if_constexpr
-from .._ops import _wmma, _verify_wmma
+from .._ops import _wmma, _verify_wmma, _mma_scaled
 from .._layouts import AMDWMMALayout
-from .._ops import _mma_scaled
+from ..cdna3 import buffer_load, buffer_store
 from . import tdm
 
-__all__ = ["tdm", "wmma", "wmma_scaled", "get_wmma_scale_layout"]
+__all__ = ["tdm", "wmma", "wmma_scaled", "buffer_load", "buffer_store", "get_wmma_scale_layout"]
 
 
 def _get_wmma_scale_layout(dot_operand_layout, shape, semantic):