@@ -28,9 +28,9 @@ func.func @set_encoding_LHS_unroll8x8x2_MFMA_I32_16x16x64_I8(
2828// CHECK: %[[EXPAND:.*]] = tensor.expand_shape %[[PACK]]
2929// CHECK-SAME : tensor<2x9x128x64xi8> into tensor<2x9x4x8x4x4x16xi8>
3030// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
31- // CHECK-SAME: ins(%[[EXPAND]] : tensor<2x9x4x8x4x4x16xi8 >)
32- // CHECK-SAME: outs({{.*}} : tensor<2x9x8x4x4x4x16xi8 >)
33- // CHECK-SAME: permutation = [0, 1, 3, 5, 2, 4, 6 ]
31+ // CHECK-SAME: ins(%[[EXPAND]] : tensor<2x9x8x16x4x16xi8 >)
32+ // CHECK-SAME: outs({{.*}} : tensor<2x9x8x4x16x16xi8 >)
33+ // CHECK-SAME: permutation = [0, 1, 2, 4, 3, 5 ]
3434// CHECK: return %[[TRANSPOSE]]
3535
3636// -----
@@ -53,11 +53,11 @@ func.func @set_encoding_RHS_unroll8x8x2_MFMA_I32_16x16x64_I8(
5353// CHECK-SAME: inner_tiles = [128, 64]
5454// CHECK-SAME: : tensor<255x513xi8> -> tensor<5x4x128x64xi8>
5555// CHECK: %[[EXPAND:.*]] = tensor.expand_shape %[[PACK]]
56- // CHECK-SAME: : tensor<5x4x128x64xi8> into tensor<5x4x4x16x2x4x16xi8 >
56+ // CHECK-SAME: : tensor<5x4x128x64xi8> into tensor<5x4x4x2x16x4x16xi8 >
5757// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
58- // CHECK-SAME: ins(%[[EXPAND]] : tensor<5x4x4x16x2x4x16xi8 >)
58+ // CHECK-SAME: ins(%[[EXPAND]] : tensor<5x4x4x2x16x4x16xi8 >)
5959// CHECK-SAME: outs({{.*}} : tensor<5x4x4x2x4x16x16xi8>)
60- // CHECK-SAME: permutation = [0, 1, 2, 4 , 5, 3 , 6]
60+ // CHECK-SAME: permutation = [0, 1, 2, 3 , 5, 4 , 6]
6161// CHECK: return %[[TRANSPOSE]]
6262
6363// -----
@@ -80,11 +80,11 @@ func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x64_I8(
8080// CHECK-SAME: inner_tiles = [128, 128]
8181// CHECK-SAME: : tensor<255x513xi32> -> tensor<2x5x128x128xi32>
8282// CHECK: %[[EXPAND:.*]] = tensor.expand_shape %[[PACK]]
83- // CHECK-SAME: : tensor<2x5x128x128xi32> into tensor<2x5x4x8x4x4x16x2xi32 >
83+ // CHECK-SAME: : tensor<2x5x128x128xi32> into tensor<2x5x8x4x4x4x2x16xi32 >
8484// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
85- // CHECK-SAME: ins(%[[EXPAND]] : tensor<2x5x4x8x4x4x16x2xi32 >)
85+ // CHECK-SAME: ins(%[[EXPAND]] : tensor<2x5x8x4x4x4x2x16xi32 >)
8686// CHECK-SAME: outs({{.*}} : tensor<2x5x4x8x2x4x16x4xi32>)
87- // CHECK-SAME: permutation = [0, 1, 5, 3, 7, 2, 6 , 4]
87+ // CHECK-SAME: permutation = [0, 1, 5, 2, 6, 3, 7 , 4]
8888// CHECK: return %[[TRANSPOSE]]
8989
9090// -----
@@ -103,10 +103,10 @@ func.func @unset_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x64_I8(
103103// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
104104// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
105105// CHECK-SAME: ins(%[[ARG0]] : tensor<2x5x4x8x2x4x16x4xi32>)
106- // CHECK-SAME: outs({{.*}} : tensor<2x5x4x8x4x4x16x2xi32 >)
107- // CHECK-SAME: permutation = [0, 1, 5, 3 , 7, 2, 6, 4 ]
106+ // CHECK-SAME: outs({{.*}} : tensor<2x5x8x4x4x4x2x16xi32 >)
107+ // CHECK-SAME: permutation = [0, 1, 3, 5 , 7, 2, 4, 6 ]
108108// CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
109- // CHECK-SAME: : tensor<2x5x4x8x4x4x16x2xi32 > into tensor<2x5x128x128xi32>
109+ // CHECK-SAME: : tensor<2x5x8x4x4x4x2x16xi32 > into tensor<2x5x128x128xi32>
110110// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]]
111111// CHECK-SAME: outer_dims_perm = [0, 1]
112112// CHECK-SAME: inner_dims_pos = [0, 1]
@@ -139,7 +139,7 @@ func.func @matmul_lowering_MFMA_I32_16x16x64_I8(
139139// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
140140// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
141141// CHECK: func.func @matmul_lowering_MFMA_I32_16x16x64_I8(
142- // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x8x4x4x4x16xi8 >
142+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x8x4x16x16xi8 >
143143// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x4x2x4x16x16xi8>
144144// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x4x8x2x4x16x4xi32>
145145// CHECK-SAME: ) -> tensor<?x?x4x8x2x4x16x4xi32>
@@ -178,7 +178,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x128_F8E4M3FN(
178178// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
179179// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
180180// CHECK: func.func @batch_matmul_lowering_MFMA_F32_16x16x128_F8E4M3FN(
181- // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x4x4x32xf8E4M3FN >
181+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x16x32xf8E4M3FN >
182182// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x4x2x4x16x32xf8E4M3FN>
183183// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x4x8x2x4x16x4xf32>
184184// CHECK-SAME: ) -> tensor<?x?x?x4x8x2x4x16x4xf32>
@@ -213,7 +213,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x32_BF16(
213213// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
214214// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
215215// CHECK: func.func @batch_matmul_lowering_MFMA_F32_16x16x32_BF16(
216- // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x4x4x8xbf16 >
216+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x16x8xbf16 >
217217// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x4x2x4x16x8xbf16>
218218// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x4x8x2x4x16x4xf32>
219219// CHECK-SAME: ) -> tensor<?x?x?x4x8x2x4x16x4xf32>
0 commit comments