@@ -217,6 +217,22 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
217217
218218// -----
219219
220+ // CHECK: #ttg.nvidia_mma<{versionMajor = 2, versionMinor = 0, warpsPerCTA = [2, 8], instrShape = [16, 8]}>
221+ #blocked = #ttg.blocked <{sizePerThread = [4 , 4 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [16 , 1 ], order = [1 , 0 ]}>
222+ #blocked1 = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [16 , 1 ], order = [1 , 0 ]}>
223+ #blocked2 = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [4 , 4 ], order = [1 , 0 ]}>
224+ module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 16 : i32 , ttg.target = " cuda:100" , " ttg.threads-per-warp" = 32 : i32 } {
225+ // CHECK-label: mmav5_fallback_v2_num_warps
226+ tt.func public @mmav5_fallback_v2_num_warps (%a: tensor <128 x64 xf16 , #blocked2 >, %b: tensor <64 x256 xf16 , #blocked1 >, %c: tensor <128 x256 xf32 , #blocked >) -> tensor <128 x256 xf32 , #blocked > {
227+ %ad = ttg.convert_layout %a : tensor <128 x64 xf16 , #blocked2 > -> tensor <128 x64 xf16 , #ttg.dot_op <{opIdx = 0 , parent = #blocked }>>
228+ %bd = ttg.convert_layout %b : tensor <64 x256 xf16 , #blocked1 > -> tensor <64 x256 xf16 , #ttg.dot_op <{opIdx = 1 , parent = #blocked }>>
229+ %d = tt.dot %ad , %bd , %c , inputPrecision = tf32 : tensor <128 x64 xf16 , #ttg.dot_op <{opIdx = 0 , parent = #blocked }>> * tensor <64 x256 xf16 , #ttg.dot_op <{opIdx = 1 , parent = #blocked }>> -> tensor <128 x256 xf32 , #blocked >
230+ tt.return %d : tensor <128 x256 xf32 , #blocked >
231+ }
232+ }
233+
234+ // -----
235+
220236#blocked = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [1 , 4 ], order = [1 , 0 ]}>
221237#blocked1 = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [1 , 4 ], order = [1 , 0 ]}>
222238#blocked2 = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 32 ], warpsPerCTA = [1 , 4 ], order = [1 , 0 ]}>
0 commit comments