amd-eochoalo
diff --git a/‎compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx908.mlir‎
Lines changed: 1 addition & 1 deletion b/‎compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx908.mlir‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir‎
Lines changed: 10 additions & 9 deletions b/‎compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎compiler/plugins/target/ROCM/test/config_ukernel_data_tiled_mma_gfx942.mlir‎
Lines changed: 1 addition & 2 deletions b/‎compiler/plugins/target/ROCM/test/config_ukernel_data_tiled_mma_gfx942.mlir‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel‎
Lines changed: 0 additions & 1 deletion b/‎compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion b/‎compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp‎
Lines changed: 0 additions & 266 deletions b/‎compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp‎
Lines changed: 0 additions & 266 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td‎
Lines changed: 0 additions & 13 deletions b/‎compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td‎
Lines changed: 0 additions & 13 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel‎
Lines changed: 0 additions & 1 deletion b/‎compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel‎
Lines changed: 0 additions & 1 deletion
@@ -27,4 +27,4 @@ func.func @argmax_2d_f32i64(%arg0 : tensor<1x?xf32>) -> tensor<1xi64> attributes
 //   CHECK-NOT: lowering_config<{{.*}}ukernel
 // CHECK-LABEL: func @argmax_2d_f32i64(
 //       CHECK: linalg.generic
-//   CHECK-NOT: hal.executable.objects
+//   CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
@@ -24,8 +24,8 @@ func.func @argmax_2d_f32i64(%arg0 : tensor<1x?xf32>) -> tensor<1xi64> attributes
 // CHECK-LABEL: func @argmax_2d_f32i64(
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
-//  CEHCK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
+//  CHECK-SAME:   iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
 
 // -----
 
@@ -53,8 +53,8 @@ func.func @argmax_4d_unit_parallel_f32i64(%arg0 : tensor<1x1x1x?xf32>) -> tensor
 // CHECK-LABEL: func @argmax_4d_unit_parallel_f32i64(
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
-//  CEHCK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
+//  CHECK-SAME:   iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
 
 // -----
 
@@ -82,7 +82,7 @@ func.func @argmax_none_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor<1xi64>
 // CHECK-LABEL: func @argmax_none_ukernel_enabled(
 //       CHECK: linalg.generic
 //   CHECK-NOT: hal.executable.objects
-//   CHECK-NOT: iree_gpu.ukernel_config
+//   CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
 
 // -----
 
@@ -111,7 +111,7 @@ func.func @argmax_only_argmax_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
 //  CHECK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
 
 // -----
 
@@ -140,7 +140,7 @@ func.func @argmax_only_foo_argmax_bar_ukernel_enabled(%arg0 : tensor<1x?xf32>) -
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
 //  CHECK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
 
 // -----
 
@@ -168,7 +168,7 @@ func.func @argmax_only_foo_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor<1x
 // CHECK-LABEL: func @argmax_only_foo_ukernel_enabled(
 //       CHECK: linalg.generic
 //   CHECK-NOT: hal.executable.objects
-//   CHECK-NOT: iree_gpu.ukernel_config
+//   CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
 
 // -----
 
@@ -198,6 +198,7 @@ func.func @argmax_2d_f32i64_not_neg_inf_init(%arg0 : tensor<1x?xf32>) -> tensor<
 // CHECK-LABEL: func @argmax_2d_f32i64_not_neg_inf_init(
 //       CHECK: linalg.generic
 //   CHECK-NOT: hal.executable.objects
+//   CHECK-NOT: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor
 
 // -----
 
@@ -239,4 +240,4 @@ func.func @argmax_2d_f32i64_custom_bitcode(%arg0 : tensor<1x?xf32>) -> tensor<1x
 //  CHECK-SAME:         data = dense<[66, 67, -64, -34, 1, 35, 69, 103, -119, -85, -51, -17]> : tensor<12xi8>
 //  CHECK-SAME:       }>
 //  CHECK-SAME:     ]
-//  CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:     iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_argmax_f32i64", bitcode>
@@ -24,7 +24,6 @@ func.func @multi_mma_mfma_i32_16x16x32_i8(%a : tensor<1x2x8x4x16x2x8xi8>,
 // CHECK-LABEL: @multi_mma_mfma_i32_16x16x32_i8
 //       CHECK: iree_codegen.inner_tiled
 //  CHECK-SAME: #hal.executable.object<{path = "iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8.gfx942.bc"
+//  CHECK-SAME: iree_codegen.ukernel = #iree_codegen.ukernel_descriptor<"iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8", bitcode>
 //  CHECK-NOT:  promote_operands
 //  CHECK-SAME: reduction = [0, 0, 0]
-//  CHECK-SAME: #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8"
-//  CHECK-SAME: shared_memory_bytes = 8192
@@ -77,7 +77,6 @@ iree_compiler_cc_library(
         "GPUGreedilyDistributeToThreads.cpp",
         "GPUInferMemorySpace.cpp",
         "GPULowerToGlobalLoads.cpp",
-        "GPULowerToUKernels.cpp",
         "GPUMultiBuffering.cpp",
         "GPUNestedLayoutDistributionPatterns.cpp",
         "GPUPackToIntrinsics.cpp",
 
@@ -70,7 +70,6 @@ iree_cc_library(
     "GPUGreedilyDistributeToThreads.cpp"
     "GPUInferMemorySpace.cpp"
     "GPULowerToGlobalLoads.cpp"
-    "GPULowerToUKernels.cpp"
     "GPUMultiBuffering.cpp"
     "GPUNestedLayoutDistributionPatterns.cpp"
     "GPUPackToIntrinsics.cpp"
 
@@ -168,19 +168,6 @@ def GPUInferMemorySpacePass :
   ];
 }
 
-def GPULowerToUKernelsPass :
-    Pass<"iree-codegen-gpu-lower-to-ukernels", ""> {
-  let summary = "Lower suitable ops to previously-selected microkernels";
-  let dependentDialects = [
-    "::mlir::iree_compiler::IREE::Codegen::IREECodegenDialect",
-    "::mlir::iree_compiler::IREE::GPU::IREEGPUDialect",
-    "::mlir::arith::ArithDialect",
-    "::mlir::bufferization::BufferizationDialect",
-    "::mlir::gpu::GPUDialect",
-    "::mlir::tensor::TensorDialect",
-  ];
-}
-
 def GPUMultiBufferingPass :
     InterfacePass<"iree-codegen-gpu-multi-buffering", "mlir::FunctionOpInterface"> {
   let summary = "Pass to do multi buffering.";
 
@@ -40,7 +40,6 @@ iree_lit_test_suite(
             "gpu_infer_memory_space.mlir",
             "gpu_combine_value_barriers.mlir",
             "gpu_lower_to_global_loads.mlir",
-            "gpu_lower_to_ukernels.mlir",
             "gpu_nested_layout_contract_amdgpu.mlir",
             "gpu_nested_layout_vector_distribution.mlir",
             "gpu_nested_layout_vector_distribution_mask.mlir",