Skip to content

Commit 4e986f8

Browse files
[GPU] Use UkernelDescriptor and deprecate UkernelConfigAttr and GPULowerToUkernelsPass (iree-org#21766)
This commit updates the GPU pipeline to make use of UkernelDescriptor for ukernel based lowerings. And consequentially deprecates UkernelConfigAttr and GPULowerToUkernelsPass. Signed-off-by: Abhishek Varma <[email protected]>
1 parent 3aa9c80 commit 4e986f8

20 files changed

+42
-733
lines changed

compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx908.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ func.func @argmax_2d_f32i64(%arg0 : tensor<1x?xf32>) -> tensor<1xi64> attributes
2727
// CHECK-NOT: lowering_config<{{.*}}ukernel
2828
// CHECK-LABEL: func @argmax_2d_f32i64(
2929
// CHECK: linalg.generic
30-
// CHECK-NOT: hal.executable.objects
30+
// CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor

compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ func.func @argmax_2d_f32i64(%arg0 : tensor<1x?xf32>) -> tensor<1xi64> attributes
2424
// CHECK-LABEL: func @argmax_2d_f32i64(
2525
// CHECK: linalg.generic
2626
// CHECK-SAME: hal.executable.objects = [
27-
// CEHCK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
28-
// CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
27+
// CHECK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
28+
// CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
2929

3030
// -----
3131

@@ -53,8 +53,8 @@ func.func @argmax_4d_unit_parallel_f32i64(%arg0 : tensor<1x1x1x?xf32>) -> tensor
5353
// CHECK-LABEL: func @argmax_4d_unit_parallel_f32i64(
5454
// CHECK: linalg.generic
5555
// CHECK-SAME: hal.executable.objects = [
56-
// CEHCK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
57-
// CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
56+
// CHECK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
57+
// CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
5858

5959
// -----
6060

@@ -82,7 +82,7 @@ func.func @argmax_none_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor<1xi64>
8282
// CHECK-LABEL: func @argmax_none_ukernel_enabled(
8383
// CHECK: linalg.generic
8484
// CHECK-NOT: hal.executable.objects
85-
// CHECK-NOT: iree_gpu.ukernel_config
85+
// CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
8686

8787
// -----
8888

@@ -111,7 +111,7 @@ func.func @argmax_only_argmax_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor
111111
// CHECK: linalg.generic
112112
// CHECK-SAME: hal.executable.objects = [
113113
// CHECK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
114-
// CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
114+
// CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
115115

116116
// -----
117117

@@ -140,7 +140,7 @@ func.func @argmax_only_foo_argmax_bar_ukernel_enabled(%arg0 : tensor<1x?xf32>) -
140140
// CHECK: linalg.generic
141141
// CHECK-SAME: hal.executable.objects = [
142142
// CHECK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
143-
// CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
143+
// CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
144144

145145
// -----
146146

@@ -168,7 +168,7 @@ func.func @argmax_only_foo_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor<1x
168168
// CHECK-LABEL: func @argmax_only_foo_ukernel_enabled(
169169
// CHECK: linalg.generic
170170
// CHECK-NOT: hal.executable.objects
171-
// CHECK-NOT: iree_gpu.ukernel_config
171+
// CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
172172

173173
// -----
174174

@@ -198,6 +198,7 @@ func.func @argmax_2d_f32i64_not_neg_inf_init(%arg0 : tensor<1x?xf32>) -> tensor<
198198
// CHECK-LABEL: func @argmax_2d_f32i64_not_neg_inf_init(
199199
// CHECK: linalg.generic
200200
// CHECK-NOT: hal.executable.objects
201+
// CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
201202

202203
// -----
203204

@@ -239,4 +240,4 @@ func.func @argmax_2d_f32i64_custom_bitcode(%arg0 : tensor<1x?xf32>) -> tensor<1x
239240
// CHECK-SAME: data = dense<[66, 67, -64, -34, 1, 35, 69, 103, -119, -85, -51, -17]> : tensor<12xi8>
240241
// CHECK-SAME: }>
241242
// CHECK-SAME: ]
242-
// CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
243+
// CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>

compiler/plugins/target/ROCM/test/config_ukernel_data_tiled_mma_gfx942.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ func.func @multi_mma_mfma_i32_16x16x32_i8(%a : tensor<1x2x8x4x16x2x8xi8>,
2424
// CHECK-LABEL: @multi_mma_mfma_i32_16x16x32_i8
2525
// CHECK: iree_codegen.inner_tiled
2626
// CHECK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8.gfx942.bc"
27+
// CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8", bitcode>
2728
// CHECK-NOT: promote_operands
2829
// CHECK-SAME: reduction = [0, 0, 0]
29-
// CHECK-SAME: #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8"
30-
// CHECK-SAME: shared_memory_bytes = 8192

compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ iree_compiler_cc_library(
7777
"GPUGreedilyDistributeToThreads.cpp",
7878
"GPUInferMemorySpace.cpp",
7979
"GPULowerToGlobalLoads.cpp",
80-
"GPULowerToUKernels.cpp",
8180
"GPUMultiBuffering.cpp",
8281
"GPUNestedLayoutDistributionPatterns.cpp",
8382
"GPUPackToIntrinsics.cpp",

compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ iree_cc_library(
7070
"GPUGreedilyDistributeToThreads.cpp"
7171
"GPUInferMemorySpace.cpp"
7272
"GPULowerToGlobalLoads.cpp"
73-
"GPULowerToUKernels.cpp"
7473
"GPUMultiBuffering.cpp"
7574
"GPUNestedLayoutDistributionPatterns.cpp"
7675
"GPUPackToIntrinsics.cpp"

compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp

Lines changed: 0 additions & 266 deletions
This file was deleted.

compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -168,19 +168,6 @@ def GPUInferMemorySpacePass :
168168
];
169169
}
170170

171-
def GPULowerToUKernelsPass :
172-
Pass<"iree-codegen-gpu-lower-to-ukernels", ""> {
173-
let summary = "Lower suitable ops to previously-selected microkernels";
174-
let dependentDialects = [
175-
"::mlir::iree_compiler::IREE::Codegen::IREECodegenDialect",
176-
"::mlir::iree_compiler::IREE::GPU::IREEGPUDialect",
177-
"::mlir::arith::ArithDialect",
178-
"::mlir::bufferization::BufferizationDialect",
179-
"::mlir::gpu::GPUDialect",
180-
"::mlir::tensor::TensorDialect",
181-
];
182-
}
183-
184171
def GPUMultiBufferingPass :
185172
InterfacePass<"iree-codegen-gpu-multi-buffering", "mlir::FunctionOpInterface"> {
186173
let summary = "Pass to do multi buffering.";

compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ iree_lit_test_suite(
4040
"gpu_infer_memory_space.mlir",
4141
"gpu_combine_value_barriers.mlir",
4242
"gpu_lower_to_global_loads.mlir",
43-
"gpu_lower_to_ukernels.mlir",
4443
"gpu_nested_layout_contract_amdgpu.mlir",
4544
"gpu_nested_layout_vector_distribution.mlir",
4645
"gpu_nested_layout_vector_distribution_mask.mlir",

0 commit comments

Comments
 (0)