@@ -37,8 +37,8 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
3737#dotop0 = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
3838
3939module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
40- // GFX942-LABEL: mfma_dot_cvt_bf8_mfma16_v3
41- tt.func public @mfma_dot_cvt_bf8_mfma16_v3 (%arg0: tensor <128 x32 xf8 E5 M2 , #mfma >) {
40+ // GFX942-LABEL: mfma_dot_cvt_bf8_mfma16
41+ tt.func public @mfma_dot_cvt_bf8_mfma16 (%arg0: tensor <128 x32 xf8 E5 M2 , #mfma >) {
4242 // GFX942-NOT: store
4343 // GFX942-NOT: load
4444 // GFX942: rocdl.ds_bpermute
@@ -50,24 +50,6 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
5050
5151// -----
5252
53- #mfma = #ttg.amd_mfma <{version = 4 , warpsPerCTA = [4 , 1 ], instrShape = [16 , 16 ], isTransposed = true }>
54- #dotop0 = #ttg.dot_op <{opIdx = 0 , parent = #mfma , kWidth =8 }>
55-
56- module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 64 : i32 } {
57- // GFX950-LABEL: mfma_dot_cvt_bf8_mfma16_v4
58- tt.func public @mfma_dot_cvt_bf8_mfma16_v4 (%arg0: tensor <128 x32 xf8 E5 M2 , #mfma >) {
59- // GFX950-NOT: rocdl.ds_bpermute
60- // GFX950: llvm.call_intrinsic "llvm.amdgcn.permlane32.swap"
61- // GFX950: llvm.call_intrinsic "llvm.amdgcn.permlane16.swap"
62- // GFX950: llvm.call_intrinsic "llvm.amdgcn.permlane32.swap"
63- // GFX950: llvm.call_intrinsic "llvm.amdgcn.permlane16.swap"
64- %0 = ttg.convert_layout %arg0 : tensor <128 x32 xf8 E5 M2 , #mfma > -> tensor <128 x32 xf8 E5 M2 , #dotop0 >
65- tt.return
66- }
67- }
68-
69- // -----
70-
7153#blocked = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [16 , 4 ], warpsPerCTA = [4 , 1 ], order = [0 , 1 ]}>
7254#linear = #ttg.linear <{register = [[0 , 1 ], [0 , 2 ], [0 , 4 ], [0 , 16 ], [0 , 32 ], [0 , 64 ]], lane = [[1 , 0 ], [2 , 0 ], [4 , 0 ], [8 , 0 ], [16 , 0 ], [0 , 8 ]], warp = [[32 , 0 ], [64 , 0 ]], block = []}>
7355#mma = #ttg.amd_mfma <{version = 4 , warpsPerCTA = [4 , 1 ], instrShape = [32 , 32 ], isTransposed = true }>
0 commit comments