Skip to content

Commit 6c854b9

Browse files
authored
Add a new Op in the gpuruntime dialect for deallocating memory allocated by gpu.alloc (#187)
1 parent ef18139 commit 6c854b9

File tree

4 files changed

+68
-4
lines changed

4 files changed

+68
-4
lines changed

dpcomp_gpu_runtime/lib/gpu_runtime.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ struct Stream {
350350
return {info, mem, event};
351351
}
352352

353+
void deallocBuffer(void *ptr) { zeMemFree(context.get(), ptr); }
354+
353355
void suggestBlockSize(ze_kernel_handle_t kernel, const uint32_t *gridSize,
354356
uint32_t *blockSize, size_t numDims) {
355357
assert(kernel);
@@ -483,6 +485,12 @@ dpcompGpuAlloc(void *stream, size_t size, size_t alignment, int shared,
483485
});
484486
}
485487

488+
extern "C" DPCOMP_GPU_RUNTIME_EXPORT void dpcompGpuDeAlloc(void *stream,
489+
void *ptr) {
490+
LOG_FUNC();
491+
catchAll([&]() { static_cast<Stream *>(stream)->deallocBuffer(ptr); });
492+
}
493+
486494
extern "C" DPCOMP_GPU_RUNTIME_EXPORT void
487495
dpcompGpuSuggestBlockSize(void *stream, void *kernel, const uint32_t *gridSize,
488496
uint32_t *blockSize, size_t numDims) {

mlir/include/mlir-extensions/dialect/gpu_runtime/IR/GpuRuntimeOps.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ def GPUAllocOp
136136
}];
137137
}
138138

139+
def GPUDeallocOp : GpuRuntime_Op<"gpu_dealloc"> {
140+
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
141+
Res<AnyMemRef, "", [MemAlloc]>:$memref,
142+
GpuRuntime_OpaqueType : $stream);
143+
}
144+
139145
def GPUSuggestBlockSizeOp : GpuRuntime_Op<"suggest_block_size",
140146
[AttrSizedOperandSegments, NoSideEffect]> {
141147
let arguments = (ins Optional<GpuRuntime_OpaqueType>:$stream,

mlir/lib/Conversion/gpu_runtime_to_llvm.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,14 @@ class ConvertOpToGpuRuntimeCallPattern
169169
llvmAllocResPtrType, // result
170170
}};
171171

172+
FunctionCallBuilder deallocCallBuilder = {
173+
"dpcompGpuDeAlloc",
174+
llvmVoidType,
175+
{
176+
llvmPointerType, // stream
177+
llvmPointerType, // memory pointer
178+
}};
179+
172180
FunctionCallBuilder suggestBlockSizeBuilder = {
173181
"dpcompGpuSuggestBlockSize",
174182
llvmVoidType,
@@ -638,6 +646,30 @@ class ConvertGpuAllocPattern
638646
}
639647
};
640648

649+
class ConvertGpuDeAllocPattern
650+
: public ConvertOpToGpuRuntimeCallPattern<gpu_runtime::GPUDeallocOp> {
651+
public:
652+
ConvertGpuDeAllocPattern(mlir::LLVMTypeConverter &converter)
653+
: ConvertOpToGpuRuntimeCallPattern<gpu_runtime::GPUDeallocOp>(converter) {
654+
}
655+
656+
private:
657+
mlir::LogicalResult
658+
matchAndRewrite(gpu_runtime::GPUDeallocOp op,
659+
gpu_runtime::GPUDeallocOp::Adaptor adaptor,
660+
mlir::ConversionPatternRewriter &rewriter) const override {
661+
auto loc = op.getLoc();
662+
mlir::Value pointer =
663+
mlir::MemRefDescriptor(adaptor.memref()).allocatedPtr(rewriter, loc);
664+
auto casted =
665+
rewriter.create<mlir::LLVM::BitcastOp>(loc, llvmPointerType, pointer);
666+
mlir::Value params[] = {adaptor.stream(), casted};
667+
auto res = deallocCallBuilder.create(loc, rewriter, params);
668+
rewriter.replaceOp(op, res.getResults());
669+
return mlir::success();
670+
}
671+
};
672+
641673
class ConvertGpuSuggestBlockSizePattern
642674
: public ConvertOpToGpuRuntimeCallPattern<
643675
gpu_runtime::GPUSuggestBlockSizeOp> {
@@ -761,6 +793,7 @@ struct GPUToLLVMPass
761793
gpu_runtime::DestroyGpuKernelOp,
762794
gpu_runtime::LaunchGpuKernelOp,
763795
gpu_runtime::GPUAllocOp,
796+
gpu_runtime::GPUDeallocOp,
764797
gpu_runtime::GPUSuggestBlockSizeOp
765798
// clang-format on
766799
>();
@@ -780,6 +813,7 @@ struct GPUToLLVMPass
780813
ConvertGpuKernelDestroyPattern,
781814
ConvertGpuKernelLaunchPattern,
782815
ConvertGpuAllocPattern,
816+
ConvertGpuDeAllocPattern,
783817
ConvertGpuSuggestBlockSizePattern,
784818
LowerUndef
785819
// clang-format on

mlir/lib/Conversion/gpu_to_gpu_runtime.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,7 @@ struct InsertGPUAllocs
350350
if (access.hostRead && access.deviceWrite)
351351
builder.create<mlir::memref::CopyOp>(loc, allocResult, param);
352352

353-
// TODO: Add a memref dealloc or gpu dealloc
354-
// builder.create<mlir::memref::DeallocOp>(loc, allocResult);
353+
builder.create<mlir::gpu::DeallocOp>(loc, llvm::None, allocResult);
355354
}
356355
}
357356
};
@@ -1023,6 +1022,23 @@ struct ExpandAllocOp : public mlir::OpRewritePattern<mlir::gpu::AllocOp> {
10231022
}
10241023
};
10251024

1025+
struct ExpandDeallocOp : public mlir::OpRewritePattern<mlir::gpu::DeallocOp> {
1026+
using OpRewritePattern::OpRewritePattern;
1027+
1028+
mlir::LogicalResult
1029+
matchAndRewrite(mlir::gpu::DeallocOp op,
1030+
mlir::PatternRewriter &rewriter) const override {
1031+
auto stream = getGpuStream(rewriter, op);
1032+
if (!stream)
1033+
return mlir::failure();
1034+
1035+
auto res = rewriter.replaceOpWithNewOp<gpu_runtime::GPUDeallocOp>(
1036+
op, op.asyncDependencies(), op.memref(), *stream);
1037+
1038+
return mlir::success();
1039+
}
1040+
};
1041+
10261042
struct ExpandSuggestBlockSizeOp
10271043
: public mlir::OpRewritePattern<gpu_runtime::GPUSuggestBlockSizeOp> {
10281044
using OpRewritePattern::OpRewritePattern;
@@ -1158,8 +1174,8 @@ struct GPUExPass
11581174
auto *ctx = &getContext();
11591175
mlir::RewritePatternSet patterns(ctx);
11601176

1161-
patterns.insert<ExpandLaunchOp, ExpandAllocOp, ExpandSuggestBlockSizeOp>(
1162-
ctx);
1177+
patterns.insert<ExpandLaunchOp, ExpandAllocOp, ExpandDeallocOp,
1178+
ExpandSuggestBlockSizeOp>(ctx);
11631179

11641180
(void)mlir::applyPatternsAndFoldGreedily(getOperation(),
11651181
std::move(patterns));

0 commit comments

Comments
 (0)