Added option, that allows to disable GpuToGpuOcl path

AndreyPavlenko · AndreyPavlenko · commit d801150c3d19 · 2024-09-30T19:04:14.000+02:00
diff --git a/include/gc/Transforms/Passes.h b/include/gc/Transforms/Passes.h
@@ -116,19 +116,26 @@ void populateFrontendPasses(mlir::OpPassManager &);
 void populateCPUPipeline(mlir::OpPassManager &);
 
 #ifdef GC_USE_IMEX
-struct GPUPipelineOption : PassPipelineOptions<GPUPipelineOption> {
+struct GPUPipelineOptions : PassPipelineOptions<GPUPipelineOptions> {
   Option<bool> isUsmArgs{
       *this, "is-usm-args",
       llvm::cl::desc("Whether to use USM(unified shared memory) func args, in "
                      "which the host and device could access the same buffer "
                      "and there is no need to add memcpy explicitly."),
       llvm::cl::init(true)};
+  Option<bool> useGpuRuntime{
+      *this, "use-gpu-ocl",
+      llvm::cl::desc("Use the GpuToGpuOcl path, that converts the GPU "
+                     "operations to GpuOclRuntime calls."),
+      llvm::cl::init(true)};
   Option<bool> callFinish{
       *this, "call-finish",
-      llvm::cl::desc("Call finish() after each GPU kernel launch."),
+      llvm::cl::desc(
+          "Call finish() after each GPU kernel launch. This option is passed "
+          "to the GpuToGpuOcl path, if use-gpu-ocl is true."),
       llvm::cl::init(false)};
 };
-void populateGPUPipeline(mlir::OpPassManager &, const GPUPipelineOption &);
+void populateGPUPipeline(mlir::OpPassManager &, const GPUPipelineOptions &);
 #endif
 
 #define GEN_PASS_DECL
diff --git a/lib/gc/Transforms/GPU/Pipeline.cpp b/lib/gc/Transforms/GPU/Pipeline.cpp
@@ -29,9 +29,11 @@
 namespace mlir::gc {
 
 void populateGPUPipeline(OpPassManager &pm,
-                         const GPUPipelineOption &pipelineOption) {
-  // Add an argument for the GPU context
-  pm.addNestedPass<func::FuncOp>(createAddContextArg());
+                         const GPUPipelineOptions &pipelineOpts) {
+  if (pipelineOpts.useGpuRuntime) {
+    // Add an argument for the GPU context
+    pm.addNestedPass<func::FuncOp>(createAddContextArg());
+  }
 
   pm.addNestedPass<func::FuncOp>(createIterativeTilingAndFusion());
 
@@ -72,10 +74,9 @@ void populateGPUPipeline(OpPassManager &pm,
 
   imex::InsertGPUAllocsOptions insertGPUAllocsOption{
       /*clientAPI*/ "opencl", /*inRegions*/ false,
-      /*isUsmArgs*/ pipelineOption.isUsmArgs.getValue()};
+      /*isUsmArgs*/ pipelineOpts.isUsmArgs};
   pm.addNestedPass<func::FuncOp>(
       imex::createInsertGPUAllocsPass(insertGPUAllocsOption));
-
   pm.addPass(createGpuKernelOutliningPass());
   pm.addPass(createCanonicalizerPass());
   pm.addPass(imex::createSetSPIRVCapabilitiesPass());
@@ -94,14 +95,25 @@ void populateGPUPipeline(OpPassManager &pm,
   pm.addNestedPass<func::FuncOp>(LLVM::createRequestCWrappersPass());
   pm.addPass(imex::createSerializeSPIRVPass());
   pm.addPass(createConvertVectorToSCFPass());
+
+  if (!pipelineOpts.useGpuRuntime) {
+    pm.addPass(imex::createConvertGPUToGPUXPass());
+  }
+
   pm.addPass(createConvertSCFToCFPass());
   pm.addPass(createConvertControlFlowToLLVMPass());
   pm.addPass(createConvertVectorToLLVMPass());
   pm.addPass(createConvertIndexToLLVMPass());
   pm.addPass(createArithToLLVMConversionPass());
   pm.addPass(createConvertFuncToLLVMPass());
   pm.addPass(createConvertMathToLLVMPass());
-  pm.addPass(createGpuToGpuOcl({pipelineOption.callFinish}));
+
+  if (pipelineOpts.useGpuRuntime) {
+    pm.addPass(createGpuToGpuOcl({pipelineOpts.callFinish}));
+  } else {
+    pm.addPass(imex::createConvertGPUXToLLVMPass());
+  }
+
   pm.addPass(createConvertIndexToLLVMPass());
   pm.addPass(memref::createExpandStridedMetadataPass());
   pm.addPass(createLowerAffinePass());
@@ -110,7 +122,7 @@ void populateGPUPipeline(OpPassManager &pm,
 }
 
 void registerGPUPipeline() {
-  PassPipelineRegistration<GPUPipelineOption>(
+  PassPipelineRegistration<GPUPipelineOptions>(
       "gc-gpu-pipeline", "The GPU pipeline for Graph Compiler with IMEX",
       populateGPUPipeline);
 }
diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir
@@ -1,4 +1,4 @@
-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
 // RUN: | gc-cpu-runner -e main --entry-point-result=void \
 // RUN:   --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
 module{
diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir
@@ -1,4 +1,4 @@
-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
 // RUN: | gc-cpu-runner -e main --entry-point-result=void \
 // RUN:   --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
 module{
diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir
@@ -1,4 +1,4 @@
-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
 // RUN: | gc-cpu-runner -e main --entry-point-result=void \
 // RUN:   --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
 module{
diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir
@@ -1,4 +1,4 @@
-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
 // RUN: | gc-cpu-runner -e main --entry-point-result=void \
 // RUN:   --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
 
diff --git a/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir b/test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir
@@ -1,4 +1,4 @@
-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
 // RUN: | gc-cpu-runner -e main --entry-point-result=void \
 // RUN:   --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
 
diff --git a/test/mlir/test/gc/gpu-runner/lit.local.cfg b/test/mlir/test/gc/gpu-runner/lit.local.cfg
@@ -1,4 +1,2 @@
 if not config.gc_use_imex:
     config.unsupported = True
-else: # FIXME: Remove this when the GPU runner is implemented.
-    config.unsupported = True
diff --git a/test/mlir/test/gc/gpu-runner/mlp.mlir b/test/mlir/test/gc/gpu-runner/mlp.mlir
@@ -1,4 +1,4 @@
-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
 #map0 = affine_map<(d0, d1) -> (d1)>
 #map1 = affine_map<(d0, d1) -> (d0, d1)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d2)>

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \`
	`1`	`+// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \`
`2`	`2`	`// RUN: \| gc-cpu-runner -e main --entry-point-result=void \`
`3`	`3`	`// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime \| FileCheck %s`
`4`	`4`	`module{`