[TEST] Check ttng.tc_gen5_mma only for CUDA (triton-lang#6543)

antiagainst · web-flow · commit a8e5788fed5d · 2025-04-20T12:34:37.000-05:00
This fixes test failures on AMD gfx950 architecture.
diff --git a/python/test/unit/language/test_matmul.py b/python/test/unit/language/test_matmul.py
@@ -380,9 +380,10 @@ def test_mxfp(M, N, K, BLOCK_M, BLOCK_N, BLOCK_K, NUM_STAGES, nonKDim, NUM_WARPS
     rtol = 0.0001
     torch.testing.assert_close(ref_out, output, atol=atol, rtol=rtol)
 
-    # Pipelining of dot_scaled requires tmem_copy to be used, which in turn
-    # requires the scales to be in the blocked layout in global memory.
-    assert out.asm["ttgir"].count("ttng.tc_gen5_mma") == 1
+    if is_cuda():
+        # Pipelining of dot_scaled requires tmem_copy to be used, which in turn
+        # requires the scales to be in the blocked layout in global memory.
+        assert out.asm["ttgir"].count("ttng.tc_gen5_mma") == 1
 
 
 def _knob_promote_lhs_to_tmem(monkeypatch):