Skip to content

Commit 82f34dd

Browse files
committed
Add GPUToXeVM lowering pipeline pass.
It's the default GPU to XeVM lowering pipeline. It starts by lowering GPU code to the specified compilation target (default is fatbin), then lowers the host code. If XeGPU ops are used, it expects the MLIR code to have XeGPU ops already embedded in gpu code.
1 parent bed17c0 commit 82f34dd

File tree

4 files changed

+193
-1
lines changed

4 files changed

+193
-1
lines changed

mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- Passes.h - GPU NVVM pipeline entry points --------------------------===//
1+
//===- Passes.h - GPU NVVM/XeVM pipeline entry points----------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -60,6 +60,47 @@ struct GPUToNVVMPipelineOptions
6060
llvm::cl::init(false)};
6161
};
6262

63+
// Options for the gpu to xevm pipeline.
64+
struct GPUToXeVMPipelineOptions
65+
: public PassPipelineOptions<GPUToXeVMPipelineOptions> {
66+
// General lowering controls.
67+
PassOptions::Option<int64_t> indexBitWidth{
68+
*this, "index-bitwidth",
69+
llvm::cl::desc("Bitwidth of the index type (host & device)"),
70+
llvm::cl::init(64)};
71+
PassOptions::Option<bool> kernelBarePtrCallConv{
72+
*this, "kernel-bare-ptr-calling-convention",
73+
llvm::cl::desc("Use bare pointer calling convention for device kernels"),
74+
llvm::cl::init(false)};
75+
PassOptions::Option<bool> hostBarePtrCallConv{
76+
*this, "host-bare-ptr-calling-convention",
77+
llvm::cl::desc("Use bare pointer calling convention for host launches"),
78+
llvm::cl::init(false)};
79+
PassOptions::Option<std::string> binaryFormat{
80+
*this, "binary-format",
81+
llvm::cl::desc("Final GPU binary emission format (e.g. fatbin)"),
82+
llvm::cl::init("fatbin")};
83+
// Options mirroring xevm-attach-target (GpuXeVMAttachTarget).
84+
PassOptions::Option<std::string> xevmModuleMatcher{
85+
*this, "xevm-module-matcher",
86+
llvm::cl::desc("Regex to match gpu.module names for XeVM target attach"),
87+
llvm::cl::init("")};
88+
PassOptions::Option<std::string> zebinTriple{
89+
*this, "zebin-triple", llvm::cl::desc("Target triple for XeVM codegen"),
90+
llvm::cl::init("spirv64-unknown-unknown")};
91+
PassOptions::Option<std::string> zebinChip{
92+
*this, "zebin-chip", llvm::cl::desc("Target chip (e.g. pvc, bmg)"),
93+
llvm::cl::init("bmg")};
94+
PassOptions::Option<unsigned> optLevel{
95+
*this, "opt-level",
96+
llvm::cl::desc("Optimization level for attached target/codegen"),
97+
llvm::cl::init(2)};
98+
PassOptions::Option<std::string> cmdOptions{
99+
*this, "igc-cmd-options",
100+
llvm::cl::desc("Additional downstream compiler command line options"),
101+
llvm::cl::init("")};
102+
};
103+
63104
//===----------------------------------------------------------------------===//
64105
// Building and Registering.
65106
//===----------------------------------------------------------------------===//
@@ -70,8 +111,15 @@ struct GPUToNVVMPipelineOptions
70111
void buildLowerToNVVMPassPipeline(OpPassManager &pm,
71112
const GPUToNVVMPipelineOptions &options);
72113

114+
/// Adds the GPU to XeVM pipeline to the given pass manager. Transforms main
115+
/// dialects into XeVM targets. Begins with GPU code regions, then handles host
116+
/// code.
117+
void buildLowerToXeVMPassPipeline(OpPassManager &pm,
118+
const GPUToXeVMPipelineOptions &options);
119+
73120
/// Register all pipeleines for the `gpu` dialect.
74121
void registerGPUToNVVMPipeline();
122+
void registerGPUToXeVMPipeline();
75123

76124
} // namespace gpu
77125
} // namespace mlir

mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
add_mlir_dialect_library(MLIRGPUPipelines
22
GPUToNVVMPipeline.cpp
3+
GPUToXeVMPipeline.cpp
34

45
ADDITIONAL_HEADER_DIRS
56
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
@@ -12,11 +13,15 @@ add_mlir_dialect_library(MLIRGPUPipelines
1213
MLIRLinalgTransforms
1314
MLIRAffineToStandard
1415
MLIRGPUToNVVMTransforms
16+
MLIRXeGPUToXeVM
1517
MLIRIndexToLLVM
1618
MLIRMathToLLVM
1719
MLIRNVGPUToNVVM
1820
MLIRNVVMToLLVM
1921
MLIRReconcileUnrealizedCasts
2022
MLIRSCFToControlFlow
2123
MLIRVectorToSCF
24+
MLIRXeGPUTransforms
25+
MLIRXeGPUToXeVM
26+
MLIRXeVMToLLVM
2227
)
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements a pass for testing the lowering to XeVM as a generally
10+
// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
11+
// XeGPU ops already embedded in gpu code.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
16+
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
17+
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
18+
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
19+
#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
20+
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
21+
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
22+
#include "mlir/Conversion/Passes.h"
23+
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
24+
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
25+
#include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h"
26+
#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
27+
#include "mlir/Dialect/Func/IR/FuncOps.h"
28+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
29+
#include "mlir/Dialect/GPU/Pipelines/Passes.h"
30+
#include "mlir/Dialect/GPU/Transforms/Passes.h"
31+
#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
32+
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
33+
#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
34+
#include "mlir/Pass/PassManager.h"
35+
#include "mlir/Pass/PassOptions.h"
36+
#include "mlir/Target/LLVM/XeVM/Target.h"
37+
#include "mlir/Transforms/Passes.h"
38+
39+
using namespace mlir;
40+
41+
namespace {
42+
//===----------------------------------------------------------------------===//
43+
// Common pipeline
44+
//===----------------------------------------------------------------------===//
45+
void buildCommonPassPipeline(
46+
OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
47+
// builtin.module scope passes
48+
pm.addPass(createCSEPass());
49+
{
50+
GpuXeVMAttachTargetOptions xevmTargetOptions;
51+
xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
52+
xevmTargetOptions.triple = options.zebinTriple;
53+
xevmTargetOptions.chip = options.zebinChip;
54+
xevmTargetOptions.optLevel = options.optLevel;
55+
xevmTargetOptions.cmdOptions = options.cmdOptions;
56+
pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
57+
}
58+
}
59+
60+
//===----------------------------------------------------------------------===//
61+
// GPUModule-specific stuff.
62+
//===----------------------------------------------------------------------===//
63+
void buildGpuPassPipeline(OpPassManager &pm,
64+
const mlir::gpu::GPUToXeVMPipelineOptions &options) {
65+
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute());
66+
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
67+
pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
68+
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
69+
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
70+
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
71+
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUPropagateLayout());
72+
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute());
73+
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
74+
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
75+
pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass());
76+
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
77+
pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize());
78+
pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass());
79+
ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
80+
gpuToLLVMSPVOptions.use64bitIndex = options.indexBitWidth;
81+
pm.addNestedPass<gpu::GPUModuleOp>(
82+
createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
83+
pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeVMToLLVMPass());
84+
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
85+
}
86+
87+
//===----------------------------------------------------------------------===//
88+
// Host Post-GPU pipeline
89+
//===----------------------------------------------------------------------===//
90+
void buildHostPostPipeline(OpPassManager &pm,
91+
const mlir::gpu::GPUToXeVMPipelineOptions &options) {
92+
pm.addNestedPass<func::FuncOp>(LLVM::createLLVMRequestCWrappersPass());
93+
pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
94+
pm.addPass(createReconcileUnrealizedCastsPass());
95+
pm.addPass(createConvertVectorToSCFPass());
96+
pm.addPass(createSCFToControlFlowPass());
97+
pm.addPass(memref::createExpandStridedMetadataPass());
98+
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
99+
{
100+
GpuToLLVMConversionPassOptions gpuToLLVMOptions;
101+
gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
102+
gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
103+
pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
104+
}
105+
pm.addPass(createConvertToLLVMPass());
106+
pm.addPass(createLowerAffinePass());
107+
// gpu-module-to-binary
108+
{
109+
GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
110+
gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
111+
gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
112+
pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
113+
}
114+
pm.addPass(createReconcileUnrealizedCastsPass());
115+
}
116+
} // namespace
117+
118+
void mlir::gpu::buildLowerToXeVMPassPipeline(
119+
OpPassManager &pm, const GPUToXeVMPipelineOptions &options) {
120+
// Common pipelines
121+
buildCommonPassPipeline(pm, options);
122+
123+
// GPUModule-specific stuff
124+
buildGpuPassPipeline(pm, options);
125+
126+
// Host post-GPUModule-specific stuff
127+
buildHostPostPipeline(pm, options);
128+
}
129+
130+
void mlir::gpu::registerGPUToXeVMPipeline() {
131+
PassPipelineRegistration<GPUToXeVMPipelineOptions>(
132+
"gpu-lower-to-xevm-pipeline",
133+
"The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
134+
"code to the "
135+
"specified compilation target (default is fatbin) then lowers the host "
136+
"code.",
137+
buildLowerToXeVMPassPipeline);
138+
}

mlir/lib/RegisterAllPasses.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,5 @@ void mlir::registerAllPasses() {
9898
sparse_tensor::registerSparseTensorPipelines();
9999
tosa::registerTosaToLinalgPipelines();
100100
gpu::registerGPUToNVVMPipeline();
101+
gpu::registerGPUToXeVMPipeline();
101102
}

0 commit comments

Comments
 (0)