Skip to content

Commit 33c3e5f

Browse files
authored
Add shared mem-space for SLM allocs (#397)
Signed-off-by: dchigarev <[email protected]>
1 parent 3e1d943 commit 33c3e5f

File tree

4 files changed

+132
-0
lines changed

4 files changed

+132
-0
lines changed

include/gc/Transforms/Passes.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,14 @@ def AddContextArg : Pass<"add-ctx-arg", "func::FuncOp"> {
101101
}];
102102
}
103103

104+
def AllocsToSLM : Pass<"allocs-to-slm", "func::FuncOp"> {
105+
let summary = "Add 'shared' memory space to memrefs allocated inside a gpu.block.";
106+
let description = [{Add 'shared' memory space to memrefs allocated inside a gpu.block.}];
107+
let dependentDialects = [
108+
"gpu::GPUDialect", "memref::MemRefDialect"
109+
];
110+
}
111+
104112
def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
105113
let summary = "Convert the GPU operations to GpuOclRuntime calls.";
106114
let description = [{
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
//===- AllocsToSLM.cpp - A pass adding shared mem-space attr ----*- C++ -*-===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "gc/Transforms/Passes.h"
10+
11+
#include "mlir/Dialect/Func/IR/FuncOps.h"
12+
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
13+
#include "mlir/Dialect/MemRef/IR/MemRef.h"
14+
#include "mlir/IR/Dialect.h"
15+
#include "mlir/Pass/Pass.h"
16+
#include "mlir/Pass/PassManager.h"
17+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
18+
19+
#include <numeric>
20+
#include <optional>
21+
22+
using namespace mlir;
23+
using namespace mlir::gc;
24+
25+
namespace mlir {
26+
namespace gc {
27+
#define GEN_PASS_DEF_ALLOCSTOSLM
28+
#include "gc/Transforms/Passes.h.inc"
29+
} // namespace gc
30+
} // namespace mlir
31+
32+
namespace {
33+
34+
bool isInGpuLaunch(Operation *op) {
35+
auto launchOp = op->getParentOfType<gpu::LaunchOp>();
36+
return launchOp != nullptr;
37+
}
38+
39+
bool hasAssignedMemSpace(Value value) {
40+
if (auto memrefType = dyn_cast<MemRefType>(value.getType())) {
41+
if (memrefType.getMemorySpace()) {
42+
return true;
43+
}
44+
}
45+
return false;
46+
}
47+
48+
struct ConvertAlloc : public OpRewritePattern<memref::AllocOp> {
49+
using OpRewritePattern<memref::AllocOp>::OpRewritePattern;
50+
51+
ConvertAlloc(MLIRContext *ctx) : OpRewritePattern<memref::AllocOp>(ctx) {}
52+
53+
LogicalResult matchAndRewrite(memref::AllocOp allocOp,
54+
PatternRewriter &rewriter) const override {
55+
if (hasAssignedMemSpace(allocOp->getResult(0))) {
56+
return rewriter.notifyMatchFailure(
57+
allocOp, "Memref already has some memory space attribute");
58+
}
59+
60+
if (!isInGpuLaunch(allocOp)) {
61+
return rewriter.notifyMatchFailure(allocOp,
62+
"Only support allocs in GPU regions");
63+
}
64+
65+
Value memref = allocOp->getResult(0);
66+
MemRefType originalMemRefType = cast<MemRefType>(memref.getType());
67+
68+
IntegerAttr sharedAddressSpace =
69+
IntegerAttr::get(rewriter.getIntegerType(64),
70+
static_cast<int64_t>(gpu::AddressSpace::Private));
71+
72+
// Create a new MemRefType with the desired address space
73+
MemRefType newMemRefType = MemRefType::get(
74+
originalMemRefType.getShape(), originalMemRefType.getElementType(),
75+
originalMemRefType.getLayout(), sharedAddressSpace);
76+
77+
Value newMemRef = rewriter.create<memref::AllocOp>(
78+
allocOp.getLoc(), newMemRefType, allocOp.getOperands());
79+
80+
memref.replaceAllUsesWith(newMemRef);
81+
82+
return success();
83+
}
84+
};
85+
86+
struct AllocsToSLM : public gc::impl::AllocsToSLMBase<AllocsToSLM> {
87+
void runOnOperation() override {
88+
const auto ctx = &getContext();
89+
90+
RewritePatternSet patterns(ctx);
91+
patterns.add<ConvertAlloc>(patterns.getContext());
92+
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
93+
}
94+
};
95+
96+
} // namespace

lib/gc/Transforms/GPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ set_property(GLOBAL APPEND PROPERTY IMEX_LIBS ${IMEX_LIBS})
1212

1313
gc_add_mlir_library(GcGpuPasses
1414
AddContextArg.cpp
15+
AllocsToSLM.cpp
1516
GpuToGpuOcl.cpp
1617
LinalgToXeGPU.cpp
1718
Pipeline.cpp
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: gc-opt %s --allocs-to-slm | FileCheck %s
2+
3+
func.func @entry() {
4+
%c1 = arith.constant 1 : index
5+
6+
// Memory space wasn't assigned as it's allocated outside of gpu.launch block
7+
// CHECK: %[[NEW_MEMREF_0:.*]] = memref.alloc() : memref<16x16xf16>
8+
%0 = memref.alloc() : memref<16x16xf16>
9+
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %c1, %sz_by = %c1, %sz_bz = %c1)
10+
threads(%tx, %ty, %tz) in (%sz_tx = %c1, %sz_ty = %c1, %sz_tz = %c1) {
11+
// Memory space was changed as it's explicitly specifided
12+
// CHECK: %[[NEW_MEMREF_1:.*]] = memref.alloc() : memref<16x16xf16, 1>
13+
%1 = memref.alloc() : memref<16x16xf16, 1>
14+
// Added 'shared' memory space
15+
// CHECK: %[[NEW_MEMREF_2:.*]] = memref.alloc() : memref<16x16xf16, 3>
16+
%2 = memref.alloc() : memref<16x16xf16>
17+
18+
// CHECK: linalg.add ins(%[[NEW_MEMREF_1]], %[[NEW_MEMREF_2]] : memref<16x16xf16, 1>, memref<16x16xf16, 3>) outs(%[[NEW_MEMREF_0]] : memref<16x16xf16>)
19+
linalg.add ins(%1, %2 :memref<16x16xf16, 1>, memref<16x16xf16>) outs(%0 : memref<16x16xf16>)
20+
// CHECK: memref.dealloc %[[NEW_MEMREF_1]] : memref<16x16xf16, 1>
21+
// CHECK: memref.dealloc %[[NEW_MEMREF_2]] : memref<16x16xf16, 3>
22+
memref.dealloc %1 : memref<16x16xf16, 1>
23+
memref.dealloc %2 : memref<16x16xf16>
24+
gpu.terminator
25+
}
26+
return
27+
}

0 commit comments

Comments
 (0)