Skip to content

Commit 722c214

Browse files
authored
[DT] Migrate round_dims_to to iteration_sizes (#20459)
This PR migrates the encoding's `round_to_dims` field to the original operation's (matmul) `iteration_sizes`. The current `round_to_dims` sizes are used by the CPU backend to choose matmul tile sizes and using `iteration_sizes` instead should allow generating more efficient code for narrow matmul problem sizes. Note some considerations for choosing tile sizes based on these iteration sizes: - Static iteration sizes are ceiled by the CPU/GPU backend to a power of two to handle small odd dimension problem sizes, leading to slightly larger tile sizes (e.g. a narrow dimension 5 is ceiled to 8). - Dynamic iteration sizes are assumed to be large, leading to large tile sizes. Performing an analysis to find a tighter range is left for future work. Resolves: #19897 Signed-off-by: Jorn Tuyls <[email protected]>
1 parent fd282b2 commit 722c214

20 files changed

+765
-441
lines changed

compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
66
#map2 = affine_map<(d0, d1, d2) -> (d2, d1)>
77
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
8-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], round_dims_to = array<i64: 16, 16, 16>>
8+
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], iteration_sizes = [?, ?, ?]>
99
#device_target_llvm_cpu = #hal.device.target<"local", [#executable_target_embedded_elf_x86_64_]> : !hal.device
1010
module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
1111
util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {

compiler/plugins/target/VulkanSPIRV/test/materialize_homogeneous_encodings.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
55
#map2 = affine_map<(d0, d1, d2) -> (d2, d1)>
66
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
7-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], round_dims_to = array<i64: 16, 16, 16>>
7+
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], iteration_sizes = [?, ?, ?]>
88
#device_target_vulkan = #hal.device.target<"vulkan", [#executable_target_vulkan_spirv_fb]> : !hal.device
99
module attributes {hal.device.targets = [#device_target_vulkan]} {
1010
util.func public @lhs_encoding(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
@@ -28,7 +28,7 @@ module attributes {hal.device.targets = [#device_target_vulkan]} {
2828
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
2929
#map2 = affine_map<(d0, d1, d2) -> (d2, d1)>
3030
#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
31-
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], round_dims_to = array<i64: 16, 16, 16>>
31+
#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map1, #map2, #map3], iteration_sizes = [?, ?, ?]>
3232
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {target_triple = "x86_64-none-elf", cpu_features = "+avx512f"}>
3333
#device_target_llvm_cpu = #hal.device.target<"local", [#executable_target_embedded_elf_x86_64_]> : !hal.device
3434
#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb">

compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx1100.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
66
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
77
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
8-
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
9-
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
10-
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
8+
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
9+
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
10+
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f16, f16, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
1111
#pipeline_layout_3 = #hal.pipeline.layout<constants = 3, bindings = [
1212
#hal.pipeline.binding<storage_buffer>,
1313
#hal.pipeline.binding<storage_buffer>,

compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx908.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
66
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
77
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
8-
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
9-
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
10-
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
8+
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
9+
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
10+
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [i8, i8, i32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
1111
#pipeline_layout_3 = #hal.pipeline.layout<constants = 3, bindings = [
1212
#hal.pipeline.binding<storage_buffer>,
1313
#hal.pipeline.binding<storage_buffer>,

compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx90a.mlir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
66
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
77
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
8-
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [bf16, bf16, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
9-
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [bf16, bf16, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
10-
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [bf16, bf16, f32], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
8+
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [bf16, bf16, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
9+
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [bf16, bf16, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
10+
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [bf16, bf16, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
1111
#pipeline_layout_3 = #hal.pipeline.layout<constants = 3, bindings = [
1212
#hal.pipeline.binding<storage_buffer>,
1313
#hal.pipeline.binding<storage_buffer>,
@@ -64,9 +64,9 @@ func.func @matmul_lowering_MFMA_f32_16x16x8_bf16() {
6464
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
6565
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
6666
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
67-
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f64, f64, f64], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
68-
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f64, f64, f64], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
69-
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f64, f64, f64], user_indexing_maps = [#map, #map1, #map2], round_dims_to = array<i64: 32, 32, 32>>
67+
#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f64, f64, f64], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
68+
#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f64, f64, f64], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
69+
#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f64, f64, f64], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
7070
#pipeline_layout_3 = #hal.pipeline.layout<constants = 3, bindings = [
7171
#hal.pipeline.binding<storage_buffer>,
7272
#hal.pipeline.binding<storage_buffer>,

0 commit comments

Comments
 (0)