Skip to content

Commit c60c1ba

Browse files
dhernandez0justinrosner
authored andcommitted
Annotate liveness pass (+ reuseLDS pass changes) (#2015)
* Annotate liveness making some assumptions (write + read pattern for LDS). Also update ReuseLDS pass accordingly. * Addressing PR comments * clang format
1 parent f725b3d commit c60c1ba

31 files changed

+1083
-282
lines changed

mlir/include/mlir/Dialect/Rock/IR/RockOps.td

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -569,17 +569,25 @@ def Rock_GpuAllocOp:
569569
let hasVerifier = 1;
570570
}
571571

572-
// Annotate lifetime of memory allocation on GPU memory hierachy.
573-
def Rock_GpuDeallocOp:
574-
Rock_Op<"dealloc", [MemoryEffects<[MemFree<DefaultResource>]>]>,
575-
Arguments<(ins AnyMemRef:$memref)> {
576-
let summary = "Annotate lifetime of memory allocation on GPU";
572+
// Annotate the start of lifetime of a memory allocation on GPU.
573+
def Rock_LiveInOp : Rock_Op<"live_in">, Arguments<(ins AnyMemRef:$memref)> {
574+
let summary =
575+
"Annotate the start of lifetime of a LDS memory allocation on GPU";
577576
let description = [{
578-
The `rock.dealloc` op annotates lifetime of memory allocation memory on GPU.
579-
- Address space 0 : global.
580-
- Address space 3 : LDS.
581-
- Address space 5 : private (VGPR).
582-
All other values would be considered as allocation on global.
577+
The `rock.live_in` op annotates the start of lifetime of a LDS memory allocation on GPU.
578+
}];
579+
let assemblyFormat = [{
580+
$memref attr-dict `:` type($memref)
581+
}];
582+
let hasVerifier = 1;
583+
}
584+
585+
// Annotate the end of lifetime of a memory allocation on GPU.
586+
def Rock_LiveOutOp : Rock_Op<"live_out">, Arguments<(ins AnyMemRef:$memref)> {
587+
let summary =
588+
"Annotate the end of lifetime of a LDS memory allocation on GPU";
589+
let description = [{
590+
The `rock.live_out` op annotates the end of lifetime of a LDS memory allocation on GPU.
583591
}];
584592
let assemblyFormat = [{
585593
$memref attr-dict `:` type($memref)

mlir/include/mlir/Dialect/Rock/Passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ namespace rock {
5050
#define GEN_PASS_DECL_ROCKFINDFIRSTGEMMINDEXPASS
5151
#define GEN_PASS_DECL_ROCKREMOVEOUTPUTALLOCPASS
5252
#define GEN_PASS_DECL_ROCKBLOCKWISELOADTILETOTHREADWISEPASS
53+
#define GEN_PASS_DECL_ROCKANNOTATELIVENESSPASS
5354

5455
#define GEN_PASS_REGISTRATION
5556
#include "mlir/Dialect/Rock/Passes.h.inc"

mlir/include/mlir/Dialect/Rock/Passes.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ def RockOutputSwizzlePass : Pass<"rock-output-swizzle", "::mlir::func::FuncOp">
159159
}
160160

161161
def RockReuseLDSPass : Pass<"rock-reuse-lds", "::mlir::func::FuncOp"> {
162-
let summary = "This pass re-uses LDS memory by using the lifetime annotations (rock.dealloc)";
162+
let summary = "This pass re-uses LDS memory by using the lifetime "
163+
"annotations (rock.live_in, rock.live_out)";
163164
let dependentDialects = ["rock::RockDialect", "memref::MemRefDialect"];
164165
}
165166

@@ -202,4 +203,11 @@ def RockBlockwiseLoadTileToThreadwisePass
202203
"arith::ArithDialect", "memref::MemRefDialect"];
203204
}
204205

206+
def RockAnnotateLivenessPass
207+
: Pass<"rock-annotate-liveness", "::mlir::func::FuncOp"> {
208+
let summary = "This pass annotates LDS memory with liveness ops "
209+
"(rock.live_in, rock.live_out)";
210+
let dependentDialects = ["rock::RockDialect", "memref::MemRefDialect"];
211+
}
212+
205213
#endif // MLIR_DIALECT_ROCK_PASSES

mlir/include/mlir/Dialect/Rock/utility/loweringUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ FailureOr<VectorDimInfo> getVectorDim(Location loc, Value matrix, Type elemType,
258258
int64_t dPerBlock, int64_t kpack,
259259
bool directToLDS);
260260

261+
// Get the LDS size of the memref
262+
std::optional<int64_t> getWorkgroupMemorySize(MemRefType type);
263+
261264
} // end namespace rock
262265
} // end namespace mlir
263266
#endif

mlir/lib/Conversion/RockToGPU/RockToGPU.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,6 @@ struct MIGPUAllocRewritePattern : public OpRewritePattern<rock::GpuAllocOp> {
102102
}
103103
};
104104

105-
struct MIGPUDeallocRewritePattern
106-
: public OpRewritePattern<rock::GpuDeallocOp> {
107-
using OpRewritePattern<rock::GpuDeallocOp>::OpRewritePattern;
108-
109-
LogicalResult matchAndRewrite(rock::GpuDeallocOp op,
110-
PatternRewriter &b) const override {
111-
112-
b.eraseOp(op);
113-
return mlir::success();
114-
}
115-
};
116-
117105
template <typename Tmi, typename Tgpu>
118106
struct MIOpRewritePattern : public OpRewritePattern<Tmi> {
119107
using OpRewritePattern<Tmi>::OpRewritePattern;
@@ -345,7 +333,7 @@ void LowerRockOpsToGPUPass::runOnOperation() {
345333
RewritePatternSet patterns(ctx);
346334

347335
// rock-lowering
348-
patterns.add<MIGPUAllocRewritePattern, MIGPUDeallocRewritePattern,
336+
patterns.add<MIGPUAllocRewritePattern,
349337
MIOpRewritePattern<rock::WorkgroupBarrierOp, gpu::BarrierOp>,
350338
MIOpRewritePattern<rock::LDSBarrierOp, amdgpu::LDSBarrierOp>,
351339
WorkgroupIdRewritePattern,

mlir/lib/Dialect/Rock/IR/RockDialect.cpp

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,20 +1138,43 @@ LogicalResult GpuAllocOp::verify() {
11381138
}
11391139

11401140
//===-----------------------------------------------------===//
1141-
// GpuDeallocOp
1141+
// LiveInOp
11421142
//===-----------------------------------------------------===//
11431143

1144-
LogicalResult GpuDeallocOp::verify() {
1144+
LogicalResult LiveInOp::verify() {
11451145
// Make sure the input memref defining operation is a GpuAllocOp
1146-
if (auto gpuAlloc = dyn_cast<GpuAllocOp>(getMemref().getDefiningOp())) {
1147-
// Make sure the size is bigger than 0
1148-
if (getByteSize(getMemref().getType()) > 0) {
1149-
return success();
1150-
}
1151-
return emitError("The size of rock.dealloc should be greather than zero.");
1152-
}
1153-
return emitError("The operand of rock.dealloc must be the result of a "
1154-
"rock.alloc operation.");
1146+
if (!isa<GpuAllocOp>(getMemref().getDefiningOp()))
1147+
return emitError("The operand of rock.live_in must be the result of a "
1148+
"rock.alloc operation.");
1149+
1150+
auto memSpace = dyn_cast_or_null<gpu::AddressSpaceAttr>(
1151+
getMemref().getType().getMemorySpace());
1152+
if (!memSpace ||
1153+
(memSpace &&
1154+
memSpace.getValue() != gpu::GPUDialect::getWorkgroupAddressSpace()))
1155+
return emitError("The operand of rock.live_in must a LDS memref");
1156+
1157+
return success();
1158+
}
1159+
1160+
//===-----------------------------------------------------===//
1161+
// LiveOutOp
1162+
//===-----------------------------------------------------===//
1163+
1164+
LogicalResult LiveOutOp::verify() {
1165+
// Make sure the input memref defining operation is a GpuAllocOp
1166+
if (!isa<GpuAllocOp>(getMemref().getDefiningOp()))
1167+
return emitError("The operand of rock.live_out must be the result of a "
1168+
"rock.alloc operation.");
1169+
1170+
auto memSpace = dyn_cast_or_null<gpu::AddressSpaceAttr>(
1171+
getMemref().getType().getMemorySpace());
1172+
if (!memSpace ||
1173+
(memSpace &&
1174+
memSpace.getValue() != gpu::GPUDialect::getWorkgroupAddressSpace()))
1175+
return emitError("The operand of rock.live_out must a LDS memref");
1176+
1177+
return success();
11551178
}
11561179

11571180
//===-----------------------------------------------------===//

mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,14 @@ void rock::buildKernelPipeline(OpPassManager &pm,
183183
funcPm.addPass(createConvertLinalgToAffineLoopsPass());
184184
funcPm.addPass(rock::createRockVectorizeFusionsPass());
185185
}
186+
// We run reuse LDS before the output swizzle pass because it uses a heuristic
187+
// to determine whether to swizzle or not, and that heuristic needs the actual
188+
// LDS usage. After running output swizzle, we'll create a new LDS buffer and
189+
// we need to run reuse LDS again to be able to reuse LDS memory.
190+
funcPm.addPass(rock::createRockAnnotateLivenessPass());
186191
funcPm.addPass(rock::createRockReuseLDSPass());
187192
funcPm.addPass(rock::createRockOutputSwizzlePass());
193+
funcPm.addPass(rock::createRockAnnotateLivenessPass());
188194
funcPm.addPass(rock::createRockReuseLDSPass());
189195

190196
if (!options.enableApplicability) {

mlir/lib/Dialect/Rock/Transforms/AlignTiling.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,9 +1519,6 @@ static LogicalResult insertBlockwiseReduction(
15191519
/*extraViews=*/nullptr,
15201520
getBlockSize(reduceOp->getParentOfType<func::FuncOp>()).value());
15211521

1522-
ViewLikeOpInterface viewOp =
1523-
ldsWorkspace.getDefiningOp<ViewLikeOpInterface>();
1524-
GpuDeallocOp::create(rewriter, loc, viewOp.getViewSource());
15251522
// Create partial reduction views
15261523
ArrayAttr paddedReducedTrStack;
15271524
{

0 commit comments

Comments
 (0)