Skip to content

Commit d083dc6

Browse files
author
Peiming Liu
committed
[mlir][sparse] refactoring: move genAffine to loopEmitter
This is the beginning patch of a sequence of dependent patches that in together provide the affine expression on matched indexing mapping for sparse tensors. This patch itself simply move `genAffine` into loop emitter to be prepared for upcoming patches. D138169 provides support for affine expression on dense dimensions only (except for constant affine expression) D138170 provides support for constant affine expressions on dense dimensions D138171 provides **merger** support for affine expression on sparse dimension (without codegen) D138172 provides **codegen** support (by generating a "filter" loop) for affine expression on sparse dimensions. D138173 fixes a crash on resolveCycle when dealing with affine expressions. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D138168
1 parent 91deae9 commit d083dc6

File tree

3 files changed

+55
-37
lines changed

3 files changed

+55
-37
lines changed

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,14 @@ static Value genIndexAndValueForDense(OpBuilder &builder, Location loc,
9797
SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors,
9898
StringAttr loopTag,
9999
bool hasOutput,
100-
bool isSparseOut)
100+
bool isSparseOut,
101+
ArrayRef<unsigned> topSort)
101102
: loopTag(loopTag), hasOutput(hasOutput), isSparseOut(isSparseOut),
102103
tensors(tensors.begin(), tensors.end()), dimTypes(tensors.size()),
103104
pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()),
104105
ptrBuffer(tensors.size()), idxBuffer(tensors.size()),
105-
valBuffer(tensors.size()), loopStack() {
106+
valBuffer(tensors.size()), loopStack(),
107+
sparsiferLoopLvlMap(topSort.size(), 0) {
106108
for (size_t tid = 0, e = tensors.size(); tid < e; tid++) {
107109
auto t = tensors[tid];
108110
// a scalar or 0-dimension tensors
@@ -126,6 +128,13 @@ SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors,
126128
ptrBuffer[tid].assign(rank, Value());
127129
idxBuffer[tid].assign(rank, Value());
128130
}
131+
132+
for (unsigned i = 0, e = topSort.size(); i < e; i++) {
133+
// This is an inverse map of the topologically sorted loop index from
134+
// sparsifier. This is needed to map the AffineDimExpr back to the loopStack
135+
// index used in loop emitter.
136+
sparsiferLoopLvlMap[topSort[i]] = i;
137+
}
129138
}
130139

131140
void SparseTensorLoopEmitter::initializeLoopEmit(
@@ -216,6 +225,34 @@ void SparseTensorLoopEmitter::enterNewLoopSeq(OpBuilder &builder, Location loc,
216225
prepareLoopOverTensorAtDim(builder, loc, tid, dim);
217226
}
218227

228+
Value SparseTensorLoopEmitter::genAffine(OpBuilder &builder, AffineExpr a,
229+
Location loc) {
230+
switch (a.getKind()) {
231+
case AffineExprKind::DimId: {
232+
unsigned idx = a.cast<AffineDimExpr>().getPosition();
233+
return loopStack[sparsiferLoopLvlMap[idx]].iv;
234+
}
235+
case AffineExprKind::Add: {
236+
auto binOp = a.cast<AffineBinaryOpExpr>();
237+
return builder.create<arith::AddIOp>(
238+
loc, genAffine(builder, binOp.getLHS(), loc),
239+
genAffine(builder, binOp.getRHS(), loc));
240+
}
241+
case AffineExprKind::Mul: {
242+
auto binOp = a.cast<AffineBinaryOpExpr>();
243+
return builder.create<arith::MulIOp>(
244+
loc, genAffine(builder, binOp.getLHS(), loc),
245+
genAffine(builder, binOp.getRHS(), loc));
246+
}
247+
case AffineExprKind::Constant: {
248+
int64_t c = a.cast<AffineConstantExpr>().getValue();
249+
return constantIndex(builder, loc, c);
250+
}
251+
default:
252+
llvm_unreachable("unexpected affine subscript");
253+
}
254+
}
255+
219256
Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
220257
OpBuilder &builder, Location loc, size_t tid, size_t dim,
221258
MutableArrayRef<Value> reduc, bool isParallel, ArrayRef<size_t> extraTids,

mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,16 +351,24 @@ class SparseTensorLoopEmitter {
351351
/// tensor id (tid) used in related functions.
352352
/// If isSparseOut is set, loop emitter assume that the sparse output tensor
353353
/// is empty, and will always generate loops on it based on the dim sizes.
354+
/// An optional array could be provided (by sparsification) to indicate the
355+
/// loop id sequence that will be generated. It is used to establish the
356+
/// mapping between affineDimExpr to the corresponding loop index in the loop
357+
/// stack that are maintained by the loop emitter.
354358
explicit SparseTensorLoopEmitter(ValueRange tensors,
355359
StringAttr loopTag = nullptr,
356360
bool hasOutput = false,
357-
bool isSparseOut = false);
361+
bool isSparseOut = false,
362+
ArrayRef<unsigned> topSort = {});
358363

359364
/// Starts a loop emitting session by generating all the buffers needed to
360365
/// iterate tensors.
361366
void initializeLoopEmit(OpBuilder &builder, Location loc,
362367
OutputUpdater updater = nullptr);
363368

369+
/// Generates a list of operations to compute the affine expression.
370+
Value genAffine(OpBuilder &builder, AffineExpr a, Location loc);
371+
364372
/// Enters a new loop sequence, the loops within the same sequence starts from
365373
/// the break points of previous loop instead of starting over from 0.
366374
/// e.g.,
@@ -544,6 +552,11 @@ class SparseTensorLoopEmitter {
544552
// sequence.
545553
std::vector<Value> loopSeqStack;
546554

555+
// Maps AffineDimExpr to the index of the loop in loopStack.
556+
// TODO: We should probably use a callback function here to make it more
557+
// general.
558+
std::vector<unsigned> sparsiferLoopLvlMap;
559+
547560
// TODO: not yet used, it should track the current level for each tensor
548561
// to help eliminate `dim` paramters from above APIs.
549562
// std::vector<size_t> curLv;

mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct CodeGen {
6161
tensors,
6262
StringAttr::get(context, linalg::GenericOp::getOperationName()),
6363
/*hasOutput=*/true,
64-
/*isSparseOut=*/op != nullptr),
64+
/*isSparseOut=*/op != nullptr, ts),
6565
sparseOut(op), outerParNest(nest), topSort(ts) {
6666
if (op)
6767
insChain = op->get();
@@ -485,38 +485,6 @@ static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder,
485485
});
486486
}
487487

488-
/// Generates an affine expression.
489-
//
490-
// TODO: generalize for sparse tensor subscripts
491-
//
492-
static Value genAffine(CodeGen &codegen, OpBuilder &builder, AffineExpr a,
493-
Location loc) {
494-
switch (a.getKind()) {
495-
case AffineExprKind::DimId: {
496-
unsigned idx = a.cast<AffineDimExpr>().getPosition();
497-
return codegen.getLoopIdxValue(idx); // universal dense index
498-
}
499-
case AffineExprKind::Add: {
500-
auto binOp = a.cast<AffineBinaryOpExpr>();
501-
return builder.create<arith::AddIOp>(
502-
loc, genAffine(codegen, builder, binOp.getLHS(), loc),
503-
genAffine(codegen, builder, binOp.getRHS(), loc));
504-
}
505-
case AffineExprKind::Mul: {
506-
auto binOp = a.cast<AffineBinaryOpExpr>();
507-
return builder.create<arith::MulIOp>(
508-
loc, genAffine(codegen, builder, binOp.getLHS(), loc),
509-
genAffine(codegen, builder, binOp.getRHS(), loc));
510-
}
511-
case AffineExprKind::Constant: {
512-
int64_t c = a.cast<AffineConstantExpr>().getValue();
513-
return constantIndex(builder, loc, c);
514-
}
515-
default:
516-
llvm_unreachable("unexpected affine subscript");
517-
}
518-
}
519-
520488
/// Generates index for load/store on sparse tensor.
521489
static Value genIndex(CodeGen &codegen, linalg::GenericOp op, OpOperand *t) {
522490
auto map = op.getMatchingIndexingMap(t);
@@ -546,7 +514,7 @@ static Value genSubscript(CodeGen &codegen, OpBuilder &builder,
546514
} else {
547515
for (unsigned d = 0; d < rank; d++) {
548516
AffineExpr a = map.getResult(d);
549-
args.push_back(genAffine(codegen, builder, a, op.getLoc()));
517+
args.push_back(codegen.loopEmitter.genAffine(builder, a, op.getLoc()));
550518
}
551519
}
552520
return codegen.loopEmitter.getValBuffer()[tensor];

0 commit comments

Comments
 (0)