Skip to content

Commit b5dee68

Browse files
authored
1 parent 95f504c commit b5dee68

File tree

16 files changed

+63
-34
lines changed

16 files changed

+63
-34
lines changed

build_tools/ci/cpu_comparison/matmul_template/matmul_transpose_a_KxM_KxN.mlir

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ func.func @matmul_transpose_a(%arg0: tensor<${K}x${M}x${TYPE1}>, %arg1: tensor<$
66
%cst = arith.constant ${ZERO} : ${TYPE2}
77
%0 = tensor.empty() : tensor<${M}x${N}x${TYPE2}>
88
%1 = linalg.fill ins(%cst : ${TYPE2}) outs(%0 : tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}>
9-
%2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<${K}x${M}x${TYPE1}>, tensor<${K}x${N}x${TYPE1}>)
9+
%2 = linalg.matmul
10+
indexing_maps = [
11+
affine_map<(d0, d1, d2) -> (d2, d0)>,
12+
affine_map<(d0, d1, d2) -> (d2, d1)>,
13+
affine_map<(d0, d1, d2) -> (d0, d1)>
14+
]
15+
ins(%arg0, %arg1 : tensor<${K}x${M}x${TYPE1}>, tensor<${K}x${N}x${TYPE1}>)
1016
outs(%1: tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}>
1117
return %2: tensor<${M}x${N}x${TYPE2}>
1218
}

build_tools/ci/cpu_comparison/matmul_template/matmul_transpose_b_MxK_NxK.mlir

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ func.func @matmul_transpose_b(%arg0: tensor<${M}x${K}x${TYPE1}>, %arg1: tensor<$
66
%cst = arith.constant ${ZERO} : ${TYPE2}
77
%0 = tensor.empty() : tensor<${M}x${N}x${TYPE2}>
88
%1 = linalg.fill ins(%cst : ${TYPE2}) outs(%0 : tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}>
9-
%2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<${M}x${K}x${TYPE1}>, tensor<${N}x${K}x${TYPE1}>)
9+
%2 = linalg.matmul
10+
indexing_maps = [
11+
affine_map<(d0, d1, d2) -> (d0, d2)>,
12+
affine_map<(d0, d1, d2) -> (d1, d2)>,
13+
affine_map<(d0, d1, d2) -> (d0, d1)>
14+
]
15+
ins(%arg0, %arg1 : tensor<${M}x${K}x${TYPE1}>, tensor<${N}x${K}x${TYPE1}>)
1016
outs(%1: tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}>
1117
return %2: tensor<${M}x${N}x${TYPE2}>
1218
}

build_tools/ci/generate_e2e_matmul_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def generate_function(
261261
acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{acc_type.value}>"
262262

263263
if transpose_rhs:
264-
op_name = "linalg.matmul_transpose_b"
264+
op_name = "linalg.matmul indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>]"
265265
else:
266266
op_name = "linalg.matmul"
267267

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ bool isMatmul(linalg::LinalgOp linalgOp) {
253253
bool isMatmulTransposeA(linalg::LinalgOp linalgOp) {
254254
// Step 0. Test if the op itself is a linalg.matmul_transpose_a op.
255255
if (isa<linalg::MatmulTransposeAOp, linalg::BatchMatmulTransposeAOp>(
256-
linalgOp))
256+
linalgOp.getOperation()))
257257
return true;
258258
if (!isa<linalg::GenericOp>(linalgOp)) return false;
259259

@@ -282,7 +282,7 @@ bool isMatmulTransposeA(linalg::LinalgOp linalgOp) {
282282
bool isMatmulTransposeB(linalg::LinalgOp linalgOp) {
283283
// Step 0. Test if the op itself is a linalg.matmul_transpose_b op.
284284
if (isa<linalg::MatmulTransposeBOp, linalg::BatchMatmulTransposeBOp>(
285-
linalgOp))
285+
linalgOp.getOperation()))
286286
return true;
287287
if (!isa<linalg::GenericOp>(linalgOp)) return false;
288288

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_workgroup_count.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
hal.executable private @test {
33
hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_arch = "chip-tbd"}>) {
44
hal.executable.export public @test_export ordinal(0) layout(#hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer, ReadOnly>]>) count(%arg0: !hal.device) -> (index, index, index) {
5-
%x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice
5+
%x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice()
66
hal.return %x, %y, %z : index, index, index
77
}
88
builtin.module {

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_air.mlir

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,15 @@ builtin.module {
7171
%4 = iree_tensor_ext.dispatch.tensor.load %1, offsets = [0, 0], sizes = [1024, 512], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1024x512xi32>> -> tensor<1024x512xi32>
7272
%5 = tensor.empty() : tensor<256x1024xi32>
7373
%6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<256x1024xi32>) -> tensor<256x1024xi32>
74-
// CHECK: linalg.matmul_transpose_b {lowering_config = #config, packing_config = #packingConfig}
75-
%7 = linalg.matmul_transpose_b ins(%3, %4 : tensor<256x512xi32>, tensor<1024x512xi32>) outs(%6 : tensor<256x1024xi32>) -> tensor<256x1024xi32>
74+
// CHECK: linalg.matmul {lowering_config = #config, packing_config = #packingConfig}
75+
%7 = linalg.matmul
76+
indexing_maps = [
77+
affine_map<(d0, d1, d2) -> (d0, d2)>,
78+
affine_map<(d0, d1, d2) -> (d1, d2)>,
79+
affine_map<(d0, d1, d2) -> (d0, d1)>
80+
]
81+
ins(%3, %4 : tensor<256x512xi32>, tensor<1024x512xi32>)
82+
outs(%6 : tensor<256x1024xi32>) -> tensor<256x1024xi32>
7683
iree_tensor_ext.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [256, 1024], strides = [1, 1] : tensor<256x1024xi32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<256x1024xi32>>
7784
return
7885
}

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu1.mlir

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,15 @@ module {
174174
%4 = iree_tensor_ext.dispatch.tensor.load %1, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<128x256xbf16>> -> tensor<128x256xbf16>
175175
%5 = tensor.empty() : tensor<128x128xf32>
176176
%6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<128x128xf32>) -> tensor<128x128xf32>
177-
// CHECK: linalg.matmul_transpose_b {lowering_config = #config, packing_config = #packingConfig}
178-
%7 = linalg.matmul_transpose_b ins(%3, %4 : tensor<128x256xbf16>, tensor<128x256xbf16>) outs(%6 : tensor<128x128xf32>) -> tensor<128x128xf32>
177+
// CHECK: linalg.matmul {lowering_config = #config, packing_config = #packingConfig}
178+
%7 = linalg.matmul
179+
indexing_maps = [
180+
affine_map<(d0, d1, d2) -> (d0, d2)>,
181+
affine_map<(d0, d1, d2) -> (d1, d2)>,
182+
affine_map<(d0, d1, d2) -> (d0, d1)>
183+
]
184+
ins(%3, %4 : tensor<128x256xbf16>, tensor<128x256xbf16>)
185+
outs(%6 : tensor<128x128xf32>) -> tensor<128x128xf32>
179186
iree_tensor_ext.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 128], strides = [1, 1] : tensor<128x128xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<128x128xf32>>
180187
return
181188
}
@@ -210,8 +217,15 @@ module {
210217
%4 = iree_tensor_ext.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 128], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<256x128xbf16>> -> tensor<256x128xbf16>
211218
%5 = tensor.empty() : tensor<128x128xf32>
212219
%6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<128x128xf32>) -> tensor<128x128xf32>
213-
// CHECK: linalg.matmul_transpose_a {lowering_config = #config, packing_config = #packingConfig}
214-
%7 = linalg.matmul_transpose_a ins(%3, %4 : tensor<256x128xbf16>, tensor<256x128xbf16>) outs(%6 : tensor<128x128xf32>) -> tensor<128x128xf32>
220+
// CHECK: linalg.matmul {lowering_config = #config, packing_config = #packingConfig}
221+
%7 = linalg.matmul
222+
indexing_maps = [
223+
affine_map<(d0, d1, d2) -> (d2, d0)>,
224+
affine_map<(d0, d1, d2) -> (d2, d1)>,
225+
affine_map<(d0, d1, d2) -> (d0, d1)>
226+
]
227+
ins(%3, %4 : tensor<256x128xbf16>, tensor<256x128xbf16>)
228+
outs(%6 : tensor<128x128xf32>) -> tensor<128x128xf32>
215229
iree_tensor_ext.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 128], strides = [1, 1] : tensor<128x128xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<128x128xf32>>
216230
return
217231
}

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/pack_and_transpose_level1.mlir

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ func.func @matmul_transpose_b_dispatch_0_matmul_transpose_b_256x1024x512_i32(%ar
3434
// CHECK: linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %{{.*}} : tensor<256x1024xi32> -> tensor<4x16x64x64xi32>
3535
// CHECK: linalg.generic
3636
// CHECK-SAME: attrs = {lowering_config = #config, packing_config = #packingConfig}
37-
%2 = linalg.matmul_transpose_b {lowering_config = #config, packing_config = #packingConfig} ins(%arg0, %arg1 : tensor<256x512xi32>, tensor<1024x512xi32>) outs(%1 : tensor<256x1024xi32>) -> tensor<256x1024xi32>
37+
%2 = linalg.matmul
38+
indexing_maps = [
39+
affine_map<(d0, d1, d2) -> (d0, d2)>,
40+
affine_map<(d0, d1, d2) -> (d1, d2)>,
41+
affine_map<(d0, d1, d2) -> (d0, d1)>
42+
] {lowering_config = #config, packing_config = #packingConfig} ins(%arg0, %arg1 : tensor<256x512xi32>, tensor<1024x512xi32>) outs(%1 : tensor<256x1024xi32>) -> tensor<256x1024xi32>
3843
return %2 : tensor<256x1024xi32>
3944
}

runtime/src/iree-amd-aie/driver/xrt-lite/cts/executable_cache_test.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
!fdt_res = !iree_tensor_ext.dispatch.tensor<writeonly:tensor<512x512xf32>>
1515
hal.executable.source public @amdaie_fb {
1616
hal.executable.export public @mm_512_512_4096_bf16_f32 ordinal(0) layout(#pipeline_layout) count(%arg0: !hal.device) -> (index, index, index) {
17-
%x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice
17+
%x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice()
1818
hal.return %x, %y, %z : index, index, index
1919
}
2020
builtin.module {

runtime/src/iree-amd-aie/driver/xrt-lite/cts/matmul_dispatch_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ TEST_P(MatMulDispatchTest, DispatchMatmul) {
160160
binding_table.count, &command_buffer));
161161
IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
162162

163-
uint32_t workgroup_count[3] = {1, 1, 1};
164163
IREE_ASSERT_OK(iree_hal_command_buffer_dispatch(
165-
command_buffer, executable_, /*entry_point=*/0, workgroup_count,
164+
command_buffer, executable_, /*entry_point=*/0,
165+
iree_hal_make_static_dispatch_config(1, 1, 1),
166166
iree_const_byte_span_empty(), bindings, IREE_HAL_DISPATCH_FLAG_NONE));
167167

168168
IREE_ASSERT_OK(iree_hal_command_buffer_execution_barrier(

0 commit comments

Comments
 (0)