Skip to content

Commit d801150

Browse files
Added option, that allows to disable GpuToGpuOcl path
1 parent b6ed40b commit d801150

File tree

9 files changed

+35
-18
lines changed

9 files changed

+35
-18
lines changed

include/gc/Transforms/Passes.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,26 @@ void populateFrontendPasses(mlir::OpPassManager &);
116116
void populateCPUPipeline(mlir::OpPassManager &);
117117

118118
#ifdef GC_USE_IMEX
119-
struct GPUPipelineOption : PassPipelineOptions<GPUPipelineOption> {
119+
struct GPUPipelineOptions : PassPipelineOptions<GPUPipelineOptions> {
120120
Option<bool> isUsmArgs{
121121
*this, "is-usm-args",
122122
llvm::cl::desc("Whether to use USM(unified shared memory) func args, in "
123123
"which the host and device could access the same buffer "
124124
"and there is no need to add memcpy explicitly."),
125125
llvm::cl::init(true)};
126+
Option<bool> useGpuRuntime{
127+
*this, "use-gpu-ocl",
128+
llvm::cl::desc("Use the GpuToGpuOcl path, that converts the GPU "
129+
"operations to GpuOclRuntime calls."),
130+
llvm::cl::init(true)};
126131
Option<bool> callFinish{
127132
*this, "call-finish",
128-
llvm::cl::desc("Call finish() after each GPU kernel launch."),
133+
llvm::cl::desc(
134+
"Call finish() after each GPU kernel launch. This option is passed "
135+
"to the GpuToGpuOcl path, if use-gpu-ocl is true."),
129136
llvm::cl::init(false)};
130137
};
131-
void populateGPUPipeline(mlir::OpPassManager &, const GPUPipelineOption &);
138+
void populateGPUPipeline(mlir::OpPassManager &, const GPUPipelineOptions &);
132139
#endif
133140

134141
#define GEN_PASS_DECL

lib/gc/Transforms/GPU/Pipeline.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929
namespace mlir::gc {
3030

3131
void populateGPUPipeline(OpPassManager &pm,
32-
const GPUPipelineOption &pipelineOption) {
33-
// Add an argument for the GPU context
34-
pm.addNestedPass<func::FuncOp>(createAddContextArg());
32+
const GPUPipelineOptions &pipelineOpts) {
33+
if (pipelineOpts.useGpuRuntime) {
34+
// Add an argument for the GPU context
35+
pm.addNestedPass<func::FuncOp>(createAddContextArg());
36+
}
3537

3638
pm.addNestedPass<func::FuncOp>(createIterativeTilingAndFusion());
3739

@@ -72,10 +74,9 @@ void populateGPUPipeline(OpPassManager &pm,
7274

7375
imex::InsertGPUAllocsOptions insertGPUAllocsOption{
7476
/*clientAPI*/ "opencl", /*inRegions*/ false,
75-
/*isUsmArgs*/ pipelineOption.isUsmArgs.getValue()};
77+
/*isUsmArgs*/ pipelineOpts.isUsmArgs};
7678
pm.addNestedPass<func::FuncOp>(
7779
imex::createInsertGPUAllocsPass(insertGPUAllocsOption));
78-
7980
pm.addPass(createGpuKernelOutliningPass());
8081
pm.addPass(createCanonicalizerPass());
8182
pm.addPass(imex::createSetSPIRVCapabilitiesPass());
@@ -94,14 +95,25 @@ void populateGPUPipeline(OpPassManager &pm,
9495
pm.addNestedPass<func::FuncOp>(LLVM::createRequestCWrappersPass());
9596
pm.addPass(imex::createSerializeSPIRVPass());
9697
pm.addPass(createConvertVectorToSCFPass());
98+
99+
if (!pipelineOpts.useGpuRuntime) {
100+
pm.addPass(imex::createConvertGPUToGPUXPass());
101+
}
102+
97103
pm.addPass(createConvertSCFToCFPass());
98104
pm.addPass(createConvertControlFlowToLLVMPass());
99105
pm.addPass(createConvertVectorToLLVMPass());
100106
pm.addPass(createConvertIndexToLLVMPass());
101107
pm.addPass(createArithToLLVMConversionPass());
102108
pm.addPass(createConvertFuncToLLVMPass());
103109
pm.addPass(createConvertMathToLLVMPass());
104-
pm.addPass(createGpuToGpuOcl({pipelineOption.callFinish}));
110+
111+
if (pipelineOpts.useGpuRuntime) {
112+
pm.addPass(createGpuToGpuOcl({pipelineOpts.callFinish}));
113+
} else {
114+
pm.addPass(imex::createConvertGPUXToLLVMPass());
115+
}
116+
105117
pm.addPass(createConvertIndexToLLVMPass());
106118
pm.addPass(memref::createExpandStridedMetadataPass());
107119
pm.addPass(createLowerAffinePass());
@@ -110,7 +122,7 @@ void populateGPUPipeline(OpPassManager &pm,
110122
}
111123

112124
void registerGPUPipeline() {
113-
PassPipelineRegistration<GPUPipelineOption>(
125+
PassPipelineRegistration<GPUPipelineOptions>(
114126
"gc-gpu-pipeline", "The GPU pipeline for Graph Compiler with IMEX",
115127
populateGPUPipeline);
116128
}

test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
1+
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
22
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
33
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
44
module{

test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
1+
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
22
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
33
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
44
module{

test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
1+
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
22
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
33
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
44
module{

test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
1+
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
22
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
33
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
44

test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" \
1+
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
22
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
33
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
44

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
if not config.gc_use_imex:
22
config.unsupported = True
3-
else: # FIXME: Remove this when the GPU runner is implemented.
4-
config.unsupported = True

test/mlir/test/gc/gpu-runner/mlp.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false" | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
1+
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" | gc-cpu-runner -e main -entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
22
#map0 = affine_map<(d0, d1) -> (d1)>
33
#map1 = affine_map<(d0, d1) -> (d0, d1)>
44
#map2 = affine_map<(d0, d1, d2) -> (d0, d2)>

0 commit comments

Comments
 (0)