Skip to content

Commit 4069666

Browse files
authored
Use gpu_runtime passes in python gpu lowering (#195)
1 parent 8438278 commit 4069666

File tree

7 files changed

+174
-2168
lines changed

7 files changed

+174
-2168
lines changed

mlir/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ target_link_libraries(${MLIR_EXTENSIONS_LIB} PRIVATE
138138
MLIRLinalgTransforms
139139
MLIRTensorTransforms
140140
MLIRMathToSPIRV
141+
MLIRControlFlowToSPIRV
141142
)
142143

143144
target_include_directories(${MLIR_EXTENSIONS_LIB} SYSTEM PRIVATE

mlir/include/mlir-extensions/Conversion/gpu_to_gpu_runtime.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ namespace gpu_runtime {
2525
std::unique_ptr<mlir::Pass> createAbiAttrsPass();
2626
std::unique_ptr<mlir::Pass> createSetSPIRVCapabilitiesPass();
2727
std::unique_ptr<mlir::Pass> createGPUToSpirvPass();
28-
std::unique_ptr<mlir::Pass> createInsertGPUAllocsPass();
28+
std::unique_ptr<mlir::Pass>
29+
createInsertGPUAllocsPass(bool useGpuDealloc = true);
2930
std::unique_ptr<mlir::Pass> createUnstrideMemrefsPass();
3031
std::unique_ptr<mlir::Pass> createSerializeSPIRVPass();
3132
std::unique_ptr<mlir::Pass> createGPUExPass();

mlir/include/mlir-extensions/dialect/plier_util/PlierUtilOps.td

Lines changed: 0 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -137,104 +137,6 @@ def ExtractMemrefMetadataOp
137137
];
138138
}
139139

140-
def CreateGpuStreamOp : PlierUtil_Op<"create_gpu_stream", [NoSideEffect]> {
141-
let results = (outs PlierUtil_OpaqueType : $result);
142-
143-
let hasCanonicalizer = 1;
144-
145-
let builders = [OpBuilder<(ins)>];
146-
}
147-
148-
def DestroyGpuStreamOp : PlierUtil_Op<"destroy_gpu_stream"> {
149-
let arguments = (ins PlierUtil_OpaqueType : $source);
150-
}
151-
152-
def LoadGpuModuleOp : PlierUtil_Op<"load_gpu_module", [NoSideEffect]> {
153-
let arguments = (ins PlierUtil_OpaqueType : $stream, SymbolRefAttr : $module);
154-
let results = (outs PlierUtil_OpaqueType : $result);
155-
156-
let hasCanonicalizer = 1;
157-
158-
let builders = [OpBuilder<(ins "::mlir::Value"
159-
: $stream, "::mlir::gpu::GPUModuleOp"
160-
: $module)>];
161-
}
162-
163-
def DestroyGpuModuleOp : PlierUtil_Op<"destroy_gpu_module"> {
164-
let arguments = (ins PlierUtil_OpaqueType : $source);
165-
}
166-
167-
def GetGpuKernelOp : PlierUtil_Op<"get_gpu_kernel", [NoSideEffect]> {
168-
let arguments = (ins PlierUtil_OpaqueType : $module, SymbolRefAttr : $kernel);
169-
let results = (outs PlierUtil_OpaqueType : $result);
170-
171-
let hasCanonicalizer = 1;
172-
173-
let builders = [OpBuilder<(ins "::mlir::Value"
174-
: $module, "::mlir::gpu::GPUFuncOp"
175-
: $kernel)>];
176-
}
177-
178-
def LaunchGpuKernelOp
179-
: PlierUtil_Op<"launch_gpu_kernel",
180-
[GPU_AsyncOpInterface, AttrSizedOperandSegments]> {
181-
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
182-
PlierUtil_OpaqueType:$stream,
183-
PlierUtil_OpaqueType:$kernel,
184-
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
185-
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
186-
Variadic<AnyType>:$operands);
187-
let results = (outs Optional<GPU_AsyncToken> : $asyncToken);
188-
189-
let skipDefaultBuilders = 1;
190-
let builders = [OpBuilder<(ins "::mlir::Value" : $stream,
191-
"::mlir::Value" : $kernel,
192-
"::mlir::gpu::KernelDim3" : $gridSize,
193-
"::mlir::gpu::KernelDim3" : $blockSize,
194-
"::mlir::ValueRange" : $kernelOperands)>];
195-
}
196-
197-
def DestroyGpuKernelOp : PlierUtil_Op<"destroy_gpu_kernel"> {
198-
let arguments = (ins PlierUtil_OpaqueType : $source);
199-
}
200-
201-
def GPUAllocOp
202-
: PlierUtil_Op<"gpu_alloc", [GPU_AsyncOpInterface, AttrSizedOperandSegments]> {
203-
204-
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
205-
PlierUtil_OpaqueType:$stream,
206-
Variadic<Index>:$dynamicSizes,
207-
Variadic<Index>:$symbolOperands);
208-
let results = (outs Res<AnyMemRef, "", [MemAlloc]>:$memref,
209-
Optional<GPU_AsyncToken>:$asyncToken);
210-
211-
let extraClassDeclaration = [{
212-
::mlir::MemRefType getType() { return memref().getType().cast<::mlir::MemRefType>(); }
213-
}];
214-
}
215-
216-
def GPUSuggestBlockSizeOp : PlierUtil_Op<"suggest_block_size",
217-
[AttrSizedOperandSegments, NoSideEffect]> {
218-
let arguments = (ins Optional<PlierUtil_OpaqueType>:$stream,
219-
Optional<PlierUtil_OpaqueType>:$kernel,
220-
OptionalAttr<SymbolRefAttr>:$kernelRef,
221-
Variadic<Index>:$gridSize);
222-
223-
let results = (outs Variadic<Index>);
224-
225-
let builders = [OpBuilder<(ins "::llvm::Optional<::mlir::Value>" : $stream,
226-
"::mlir::OpFoldResult" : $kernel,
227-
"::mlir::ValueRange" : $gridSize)>];
228-
229-
let extraClassDeclaration = [{
230-
/// The name of the kernel's containing module.
231-
::mlir::StringAttr getKernelModuleName();
232-
233-
/// The name of the kernel.
234-
::mlir::StringAttr getKernelName();
235-
}];
236-
}
237-
238140
def PseudoCopyOp : PlierUtil_Op<"pseudo_copy", [ViewLikeOpInterface]> {
239141
let arguments = (ins AnyType : $source);
240142

mlir/lib/Conversion/gpu_runtime_to_llvm.cpp

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <mlir/Conversion/LLVMCommon/ConversionTarget.h>
2424
#include <mlir/Conversion/LLVMCommon/Pattern.h>
2525
#include <mlir/Conversion/LLVMCommon/TypeConverter.h>
26+
#include <mlir/Dialect/Func/IR/FuncOps.h>
27+
#include <mlir/Dialect/Func/Transforms/FuncConversions.h>
2628
#include <mlir/Dialect/GPU/Passes.h>
2729
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
2830
#include <mlir/Pass/PassManager.h>
@@ -31,18 +33,30 @@
3133

3234
static const char *kGpuAllocShared = "gpu.alloc_shared";
3335

34-
struct LowerUndef : public mlir::ConvertOpToLLVMPattern<plier::UndefOp> {
36+
namespace {
37+
struct LowerTakeContext
38+
: public mlir::ConvertOpToLLVMPattern<plier::TakeContextOp> {
3539
using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
3640

3741
mlir::LogicalResult
38-
matchAndRewrite(plier::UndefOp op, plier::UndefOp::Adaptor /*adaptor*/,
42+
matchAndRewrite(plier::TakeContextOp op,
43+
plier::TakeContextOp::Adaptor adaptor,
3944
mlir::ConversionPatternRewriter &rewriter) const override {
45+
auto srcTypes = op.getResultTypes();
46+
auto count = static_cast<unsigned>(srcTypes.size());
47+
llvm::SmallVector<mlir::Type> newTypes(count);
4048
auto converter = getTypeConverter();
41-
auto type = converter->convertType(op.getType());
42-
if (!type)
43-
return mlir::failure();
49+
assert(converter);
50+
for (auto i : llvm::seq(0u, count)) {
51+
auto oldType = srcTypes[i];
52+
auto newType = converter->convertType(oldType);
53+
newTypes[i] = (newType ? newType : oldType);
54+
}
4455

45-
rewriter.replaceOpWithNewOp<mlir::LLVM::UndefOp>(op, type);
56+
auto initFunc = adaptor.initFunc().getValueOr(mlir::SymbolRefAttr());
57+
auto releaseFunc = adaptor.releaseFunc().getValueOr(mlir::SymbolRefAttr());
58+
rewriter.replaceOpWithNewOp<plier::TakeContextOp>(op, newTypes, initFunc,
59+
releaseFunc);
4660
return mlir::success();
4761
}
4862
};
@@ -816,6 +830,41 @@ struct GPUToLLVMPass
816830
target);
817831
mlir::populateGpuToLLVMConversionPatterns(
818832
converter, patterns, mlir::gpu::getDefaultGpuBinaryAnnotation());
833+
mlir::populateFunctionOpInterfaceTypeConversionPattern<mlir::FuncOp>(
834+
patterns, converter);
835+
mlir::populateReturnOpTypeConversionPattern(patterns, converter);
836+
mlir::populateCallOpTypeConversionPattern(patterns, converter);
837+
838+
target.addDynamicallyLegalOp<mlir::FuncOp>(
839+
[&](mlir::FuncOp op) -> llvm::Optional<bool> {
840+
if (converter.isSignatureLegal(op.getType()) &&
841+
converter.isLegal(&op.getBody()))
842+
return true;
843+
844+
return llvm::None;
845+
});
846+
847+
target.addDynamicallyLegalOp<mlir::func::ReturnOp, plier::TakeContextOp,
848+
mlir::func::CallOp>(
849+
[&](mlir::Operation *op) -> llvm::Optional<bool> {
850+
for (auto range : {mlir::TypeRange(op->getOperandTypes()),
851+
mlir::TypeRange(op->getResultTypes())})
852+
for (auto type : range)
853+
if (converter.isLegal(type))
854+
return true;
855+
856+
return llvm::None;
857+
});
858+
target.addDynamicallyLegalOp<mlir::FuncOp>(
859+
[&](mlir::FuncOp op) -> llvm::Optional<bool> {
860+
auto type = op.getType();
861+
for (auto range : {type.getInputs(), type.getResults()})
862+
for (auto type : range)
863+
if (converter.isLegal(type))
864+
return true;
865+
866+
return llvm::None;
867+
});
819868

820869
patterns.insert<
821870
// clang-format off
@@ -829,7 +878,7 @@ struct GPUToLLVMPass
829878
ConvertGpuAllocPattern,
830879
ConvertGpuDeAllocPattern,
831880
ConvertGpuSuggestBlockSizePattern,
832-
LowerUndef
881+
LowerTakeContext
833882
// clang-format on
834883
>(converter);
835884

@@ -840,6 +889,8 @@ struct GPUToLLVMPass
840889
}
841890
};
842891

892+
} // namespace
893+
843894
// Expose the passes to the outside world
844895
std::unique_ptr<mlir::Pass> gpu_runtime::createEnumerateEventsPass() {
845896
return std::make_unique<EnumerateEventsPass>();

0 commit comments

Comments
 (0)