11// RUN: triton-opt %s --tritongpu-reduce-data-duplication --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch="gfx942" -split-input-file | FileCheck %s --check-prefix=GFX942
22// RUN: triton-opt %s --tritongpu-reduce-data-duplication --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch="gfx950" -split-input-file | FileCheck %s --check-prefix=GFX950
33
4- #mfma = #ttg.amd_mfma <{versionMajor = 2 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
4+ #mfma = #ttg.amd_mfma <{version = 2 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
55#dotop = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =4 }>
66module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
77 // GFX942-LABEL: shortcut_mfma16
@@ -16,7 +16,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
1616
1717// -----
1818
19- #mfma = #ttg.amd_mfma <{versionMajor = 2 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
19+ #mfma = #ttg.amd_mfma <{version = 2 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
2020#dotop = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
2121module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
2222 // GFX942-LABEL: no_shortcut_mfma16
@@ -31,7 +31,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
3131
3232// -----
3333
34- #mfma = #ttg.amd_mfma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
34+ #mfma = #ttg.amd_mfma <{version = 3 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
3535#dotop0 = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
3636
3737module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
@@ -95,7 +95,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
9595
9696// -----
9797
98- #mfma = #ttg.amd_mfma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
98+ #mfma = #ttg.amd_mfma <{version = 3 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
9999#dotop0 = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
100100
101101module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
@@ -112,7 +112,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
112112
113113// -----
114114
115- #mfma = #ttg.amd_mfma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
115+ #mfma = #ttg.amd_mfma <{version = 3 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
116116#dotop0 = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
117117
118118module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
@@ -206,7 +206,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
206206
207207// -----
208208
209- #mfma = #ttg.amd_mfma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
209+ #mfma = #ttg.amd_mfma <{version = 3 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
210210#dotop0 = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
211211
212212module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
@@ -225,7 +225,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
225225
226226#blocked = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [16 , 4 ], warpsPerCTA = [4 , 1 ], order = [0 , 1 ]}>
227227#linear = #ttg.linear <{register = [[0 , 1 ], [0 , 2 ], [0 , 4 ], [0 , 16 ], [0 , 32 ], [0 , 64 ]], lane = [[1 , 0 ], [2 , 0 ], [4 , 0 ], [8 , 0 ], [16 , 0 ], [0 , 8 ]], warp = [[32 , 0 ], [64 , 0 ]], block = []}>
228- #mma = #ttg.amd_mfma <{versionMajor = 4 , versionMinor = 0 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
228+ #mma = #ttg.amd_mfma <{version = 4 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
229229module attributes {" ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
230230 // GFX950-LABEL: mfma_linear_permlane_swap
231231 tt.func public @mfma_linear_permlane_swap (%arg0: tensor <128 x128 xf16 , #mma >) attributes {noinline = false } {
0 commit comments