@@ -435,7 +435,7 @@ tt.func @unused_load(%lb : index, %ub : index, %step : index,
435435
436436// -----
437437
438- #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , instrShape = [16 , 16 , 16 ]}>
438+ #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [ 4 , 1 ], instrShape = [16 , 16 , 16 ]}>
439439#blocked1 = #ttg.blocked <{sizePerThread = [1 , 128 ], threadsPerWarp = [32 , 1 ], warpsPerCTA = [4 , 1 ], order = [1 , 0 ]}>
440440#shared = #ttg.nvmma_shared <{swizzlingByteWidth = 128 , transposed = false , elementBitWidth = 16 }>
441441#shared1 = #ttg.nvmma_shared <{swizzlingByteWidth = 128 , transposed = true , elementBitWidth = 16 }>
@@ -493,7 +493,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
493493
494494// -----
495495
496- #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , instrShape = [16 , 16 , 16 ]}>
496+ #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [ 4 , 1 ], instrShape = [16 , 16 , 16 ]}>
497497#blocked1 = #ttg.blocked <{sizePerThread = [1 , 128 ], threadsPerWarp = [32 , 1 ], warpsPerCTA = [4 , 1 ], order = [1 , 0 ]}>
498498#shared = #ttg.nvmma_shared <{swizzlingByteWidth = 128 , transposed = false , elementBitWidth = 32 }>
499499#shared1 = #ttg.nvmma_shared <{swizzlingByteWidth = 128 , transposed = true , elementBitWidth = 32 }>
@@ -547,7 +547,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
547547
548548// -----
549549
550- #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , instrShape = [16 , 16 , 16 ]}>
550+ #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [ 4 , 1 ], instrShape = [16 , 16 , 16 ]}>
551551#blocked1 = #ttg.blocked <{sizePerThread = [1 , 128 ], threadsPerWarp = [32 , 1 ], warpsPerCTA = [4 , 1 ], order = [1 , 0 ]}>
552552#shared = #ttg.nvmma_shared <{swizzlingByteWidth = 128 , transposed = false , elementBitWidth = 16 }>
553553
@@ -840,7 +840,7 @@ tt.func @tma_reuse_barrier(%lb : index, %ub : index, %step : index,
840840
841841// -----
842842
843- #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , instrShape = [16 , 16 , 16 ]}>
843+ #mma = #ttg.nvidia_mma <{versionMajor = 3 , versionMinor = 0 , warpsPerCTA = [ 4 , 1 ], instrShape = [16 , 16 , 16 ]}>
844844#blocked1 = #ttg.blocked <{sizePerThread = [1 , 128 ], threadsPerWarp = [32 , 1 ], warpsPerCTA = [4 , 1 ], order = [1 , 0 ]}>
845845#shared = #ttg.nvmma_shared <{swizzlingByteWidth = 128 , transposed = false , elementBitWidth = 16 }>
846846
0 commit comments