Skip to content

Commit 9992921

Browse files
authored
Remove moveCrossThreadOutermost (#22284)
Fixes #22144. E2E matmul tests passed as-is, confirming that this wasn't needed anymore. The diff is adjusting lit-tests. Signed-off-by: Benoit Jacob <[email protected]>
1 parent acbd6de commit 9992921

File tree

9 files changed

+106
-325
lines changed

9 files changed

+106
-325
lines changed

compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_for_iree_ops.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,14 +316,14 @@ func.func @matmul_lowering_f32f32f32_gfx942() attributes {
316316
// CHECK-DAG: %[[TILED_M:.+]] = affine.apply #[[$MAP0]]()[%[[M]]]
317317
// CHECK-DAG: %[[TILED_K:.+]] = affine.apply #[[$MAP1]]()[%[[K]]]
318318
// CHECK: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
319-
// CHECK-SAME: !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?x8x4x4x4x4xf32>>{%[[TILED_M]], %[[TILED_K]]}
319+
// CHECK-SAME: !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?x8x4x16x4xf32>>{%[[TILED_M]], %[[TILED_K]]}
320320
// CHECK: %[[TILED_N:.+]] = affine.apply #[[$MAP0]]()[%[[N]]]
321321
// CHECK: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
322322
// CHECK-SAME: !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?x4x2x4x16x4xf32>>{%[[TILED_N]], %[[TILED_K]]}
323323
// CHECK: %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
324324
// CHECK-SAME: !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x?x4x8x2x4x16x4xf32>>{%[[TILED_M]], %[[TILED_N]]}
325325
// CHECK: %[[LHS:.+]] = iree_tensor_ext.dispatch.tensor.load %[[LHS_BINDING]]
326-
// CHECK-SAME: offsets = [0, 0, 0, 0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4, 4, 4, 4], strides = [1, 1, 1, 1, 1, 1, 1]
326+
// CHECK-SAME: offsets = [0, 0, 0, 0, 0, 0], sizes = [%[[TILED_M]], %[[TILED_K]], 8, 4, 16, 4], strides = [1, 1, 1, 1, 1, 1]
327327
// CHECK: %[[RHS:.+]] = iree_tensor_ext.dispatch.tensor.load %[[RHS_BINDING]]
328328
// CHECK-SAME: offsets = [0, 0, 0, 0, 0, 0, 0], sizes = [%[[TILED_N]], %[[TILED_K]], 4, 2, 4, 16, 4], strides = [1, 1, 1, 1, 1, 1, 1]
329329
// CHECK: %[[OUTS:.+]] = iree_tensor_ext.dispatch.tensor.load %[[OUTS_BINDING]]

compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_gfx1100.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ func.func @matmul_lowering_WMMAR3_F32_16x16x16_F16(
2323
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
2424
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
2525
// CHECK: func.func @matmul_lowering_WMMAR3_F32_16x16x16_F16(
26-
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x1x8x2x16xf16>
27-
// CHECK-SAME: %[[RHS:.+]]: tensor<?x?x4x1x16x16xf16>
26+
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x1x16x16xf16>, %[[RHS:.+]]: tensor<?x?x4x1x16x16xf16>
2827
// CHECK-SAME: %[[ACC:.+]]: tensor<?x?x4x4x8x2x16xf32>
2928
// CHECK-SAME: ) -> tensor<?x?x4x4x8x2x16xf32>
3029
// CHECK: %[[MMA:.+]] = iree_codegen.inner_tiled ins(%[[LHS]], %[[RHS]]) outs(%[[ACC]])

compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_gfx908.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ func.func @matmul_lowering_MFMA_i32_16x16x16_i8(
2424
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
2525
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
2626
// CHECK: func.func @matmul_lowering_MFMA_i32_16x16x16_i8
27-
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x4x4x4x4x4xi8>
28-
// CHECK-SAME: %[[RHS:.+]]: tensor<?x?x4x2x4x16x4x4xi8>
27+
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x4x16x4x4xi8>, %[[RHS:.+]]: tensor<?x?x4x2x4x16x4x4xi8>
2928
// CHECK-SAME: %[[ACC:.+]]: tensor<?x?x4x4x2x4x16x4xi32>
3029
// CHECK: %[[MMA:.+]] = iree_codegen.inner_tiled ins(%[[LHS]], %[[RHS]]) outs(%[[ACC]])
3130
// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]],

compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_gfx90a.mlir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ func.func @matmul_lowering_MFMA_f32_16x16x8_bf16(
2424
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
2525
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
2626
// CHECK: func.func @matmul_lowering_MFMA_f32_16x16x8_bf16(
27-
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x4x4x4x4x2xbf16>
28-
// CHECK-SAME: %[[RHS:.+]]: tensor<?x?x4x2x4x16x4x2xbf16>
27+
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x4x16x4x2xbf16>, %[[RHS:.+]]: tensor<?x?x4x2x4x16x4x2xbf16>
2928
// CHECK-SAME: %[[ACC:.+]]: tensor<?x?x4x4x2x4x16x4xf32>
3029
// CHECK-SAME: ) -> tensor<?x?x4x4x2x4x16x4xf32>
3130
// CHECK: %[[MMA:.+]] = iree_codegen.inner_tiled ins(%[[LHS]], %[[RHS]]) outs(%[[ACC]])
@@ -58,8 +57,7 @@ func.func @matmul_lowering_MFMA_f64_16x16x4_f64(
5857
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
5958
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
6059
// CHECK: func.func @matmul_lowering_MFMA_f64_16x16x4_f64(
61-
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x4x4x4x2xf64>
62-
// CHECK-SAME: %[[RHS:.+]]: tensor<?x?x4x4x16x2xf64>
60+
// CHECK-SAME: %[[LHS:.+]]: tensor<?x?x4x4x16x2xf64>, %[[RHS:.+]]: tensor<?x?x4x4x16x2xf64>
6361
// CHECK-SAME: %[[ACC:.+]]: tensor<?x?x4x4x4x4x16xf64>
6462
// CHECK-SAME: ) -> tensor<?x?x4x4x4x4x16xf64>
6563
// CHECK: %[[MMA:.+]] = iree_codegen.inner_tiled ins(%[[LHS]], %[[RHS]]) outs(%[[ACC]])

compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_gfx942.mlir

Lines changed: 56 additions & 58 deletions
Large diffs are not rendered by default.

compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_gfx950.mlir

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ func.func @set_encoding_LHS_unroll8x8x2_MFMA_I32_16x16x64_I8(
2828
// CHECK: %[[EXPAND:.*]] = tensor.expand_shape %[[PACK]]
2929
// CHECK-SAME : tensor<2x9x128x64xi8> into tensor<2x9x4x8x4x4x16xi8>
3030
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
31-
// CHECK-SAME: ins(%[[EXPAND]] : tensor<2x9x4x8x4x4x16xi8>)
32-
// CHECK-SAME: outs({{.*}} : tensor<2x9x8x4x4x4x16xi8>)
33-
// CHECK-SAME: permutation = [0, 1, 3, 5, 2, 4, 6]
31+
// CHECK-SAME: ins(%[[EXPAND]] : tensor<2x9x8x16x4x16xi8>)
32+
// CHECK-SAME: outs({{.*}} : tensor<2x9x8x4x16x16xi8>)
33+
// CHECK-SAME: permutation = [0, 1, 2, 4, 3, 5]
3434
// CHECK: return %[[TRANSPOSE]]
3535

3636
// -----
@@ -53,11 +53,11 @@ func.func @set_encoding_RHS_unroll8x8x2_MFMA_I32_16x16x64_I8(
5353
// CHECK-SAME: inner_tiles = [128, 64]
5454
// CHECK-SAME: : tensor<255x513xi8> -> tensor<5x4x128x64xi8>
5555
// CHECK: %[[EXPAND:.*]] = tensor.expand_shape %[[PACK]]
56-
// CHECK-SAME: : tensor<5x4x128x64xi8> into tensor<5x4x4x16x2x4x16xi8>
56+
// CHECK-SAME: : tensor<5x4x128x64xi8> into tensor<5x4x4x2x16x4x16xi8>
5757
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
58-
// CHECK-SAME: ins(%[[EXPAND]] : tensor<5x4x4x16x2x4x16xi8>)
58+
// CHECK-SAME: ins(%[[EXPAND]] : tensor<5x4x4x2x16x4x16xi8>)
5959
// CHECK-SAME: outs({{.*}} : tensor<5x4x4x2x4x16x16xi8>)
60-
// CHECK-SAME: permutation = [0, 1, 2, 4, 5, 3, 6]
60+
// CHECK-SAME: permutation = [0, 1, 2, 3, 5, 4, 6]
6161
// CHECK: return %[[TRANSPOSE]]
6262

6363
// -----
@@ -80,11 +80,11 @@ func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x64_I8(
8080
// CHECK-SAME: inner_tiles = [128, 128]
8181
// CHECK-SAME: : tensor<255x513xi32> -> tensor<2x5x128x128xi32>
8282
// CHECK: %[[EXPAND:.*]] = tensor.expand_shape %[[PACK]]
83-
// CHECK-SAME: : tensor<2x5x128x128xi32> into tensor<2x5x4x8x4x4x16x2xi32>
83+
// CHECK-SAME: : tensor<2x5x128x128xi32> into tensor<2x5x8x4x4x4x2x16xi32>
8484
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
85-
// CHECK-SAME: ins(%[[EXPAND]] : tensor<2x5x4x8x4x4x16x2xi32>)
85+
// CHECK-SAME: ins(%[[EXPAND]] : tensor<2x5x8x4x4x4x2x16xi32>)
8686
// CHECK-SAME: outs({{.*}} : tensor<2x5x4x8x2x4x16x4xi32>)
87-
// CHECK-SAME: permutation = [0, 1, 5, 3, 7, 2, 6, 4]
87+
// CHECK-SAME: permutation = [0, 1, 5, 2, 6, 3, 7, 4]
8888
// CHECK: return %[[TRANSPOSE]]
8989

9090
// -----
@@ -103,10 +103,10 @@ func.func @unset_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x64_I8(
103103
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
104104
// CHECK: %[[TRANSPOSE:.*]] = linalg.transpose
105105
// CHECK-SAME: ins(%[[ARG0]] : tensor<2x5x4x8x2x4x16x4xi32>)
106-
// CHECK-SAME: outs({{.*}} : tensor<2x5x4x8x4x4x16x2xi32>)
107-
// CHECK-SAME: permutation = [0, 1, 5, 3, 7, 2, 6, 4]
106+
// CHECK-SAME: outs({{.*}} : tensor<2x5x8x4x4x4x2x16xi32>)
107+
// CHECK-SAME: permutation = [0, 1, 3, 5, 7, 2, 4, 6]
108108
// CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
109-
// CHECK-SAME: : tensor<2x5x4x8x4x4x16x2xi32> into tensor<2x5x128x128xi32>
109+
// CHECK-SAME: : tensor<2x5x8x4x4x4x2x16xi32> into tensor<2x5x128x128xi32>
110110
// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]]
111111
// CHECK-SAME: outer_dims_perm = [0, 1]
112112
// CHECK-SAME: inner_dims_pos = [0, 1]
@@ -139,7 +139,7 @@ func.func @matmul_lowering_MFMA_I32_16x16x64_I8(
139139
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
140140
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
141141
// CHECK: func.func @matmul_lowering_MFMA_I32_16x16x64_I8(
142-
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x8x4x4x4x16xi8>
142+
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x8x4x16x16xi8>
143143
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x4x2x4x16x16xi8>
144144
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x4x8x2x4x16x4xi32>
145145
// CHECK-SAME: ) -> tensor<?x?x4x8x2x4x16x4xi32>
@@ -178,7 +178,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x128_F8E4M3FN(
178178
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
179179
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
180180
// CHECK: func.func @batch_matmul_lowering_MFMA_F32_16x16x128_F8E4M3FN(
181-
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x4x4x32xf8E4M3FN>
181+
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x16x32xf8E4M3FN>
182182
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x4x2x4x16x32xf8E4M3FN>
183183
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x4x8x2x4x16x4xf32>
184184
// CHECK-SAME: ) -> tensor<?x?x?x4x8x2x4x16x4xf32>
@@ -213,7 +213,7 @@ func.func @batch_matmul_lowering_MFMA_F32_16x16x32_BF16(
213213
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
214214
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
215215
// CHECK: func.func @batch_matmul_lowering_MFMA_F32_16x16x32_BF16(
216-
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x4x4x8xbf16>
216+
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x8x4x16x8xbf16>
217217
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x4x2x4x16x8xbf16>
218218
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x4x8x2x4x16x4xf32>
219219
// CHECK-SAME: ) -> tensor<?x?x?x4x8x2x4x16x4xf32>

0 commit comments

Comments
 (0)