Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mlir/include/mlir/Dialect/SCF/IR/SCF.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ SmallVector<Value> replaceAndCastForOpIterArg(RewriterBase &rewriter,
Value replacement,
const ValueTypeCastFnTy &castFn);

/// Helper function to compute the difference between two values. This is used
/// by the loop implementations to compute the trip count.
std::optional<llvm::APSInt> computeUbMinusLb(Value lb, Value ub, bool isSigned);

} // namespace scf
} // namespace mlir
#endif // MLIR_DIALECT_SCF_SCF_H
33 changes: 33 additions & 0 deletions mlir/include/mlir/Dialect/SCF/Utils/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,39 @@ FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
/// 4. Each region iter arg and result has exactly one use
bool isPerfectlyNestedForLoops(MutableArrayRef<LoopLikeOpInterface> loops);

/// Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs'
/// and 'yieldedValues' as the block arguments and yielded values of the loop.
/// The content of the loop body is replicated 'unrollFactor' times, calling
/// 'ivRemapFn' to remap 'iv' for each unrolled body. If specified, annotates
/// the Ops in each unrolled iteration using annotateFn. If provided,
/// 'clonedToSrcOpsMap' is populated with the mappings from the cloned ops to
/// the original op.
void generateUnrolledLoop(
Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
ValueRange iterArgs, ValueRange yieldedValues,
IRMapping *clonedToSrcOpsMap = nullptr);

/// Unroll this scf::Parallel loop by the specified unroll factors. Returns the
/// unrolled loop if the unroll succeded; otherwise returns failure if the loop
/// cannot be unrolled either due to restrictions or to invalid unroll factors.
/// Requires positive loop bounds and step. If specified, annotates the Ops in
/// each unrolled iteration by applying `annotateFn`.
/// If provided, 'clonedToSrcOpsMap' is populated with the mappings from the
/// cloned ops to the original op.
FailureOr<scf::ParallelOp> parallelLoopUnrollByFactors(
scf::ParallelOp op, ArrayRef<uint64_t> unrollFactors,
RewriterBase &rewriter,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
IRMapping *clonedToSrcOpsMap = nullptr);

/// Get constant trip counts for each of the induction variables of the given
/// loop operation. If any of the loop's trip counts is not constant, return an
/// empty vector.
llvm::SmallVector<int64_t>
getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp);

} // namespace mlir

#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_
6 changes: 2 additions & 4 deletions mlir/lib/Dialect/SCF/IR/SCF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,8 @@ static TerminatorTy verifyAndGetTerminator(Operation *op, Region &region,
return nullptr;
}

/// Helper function to compute the difference between two values. This is used
/// by the loop implementations to compute the trip count.
static std::optional<llvm::APSInt> computeUbMinusLb(Value lb, Value ub,
bool isSigned) {
std::optional<llvm::APSInt> mlir::scf::computeUbMinusLb(Value lb, Value ub,
bool isSigned) {
llvm::APSInt diff;
auto addOp = ub.getDefiningOp<arith::AddIOp>();
if (!addOp)
Expand Down
145 changes: 128 additions & 17 deletions mlir/lib/Dialect/SCF/Utils/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,47 +291,61 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
return arith::DivUIOp::create(builder, loc, sum, divisor);
}

/// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
/// unrolled iteration using annotateFn.
static void generateUnrolledLoop(
Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
void mlir::generateUnrolledLoop(
Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
ValueRange iterArgs, ValueRange yieldedValues) {
ValueRange iterArgs, ValueRange yieldedValues,
IRMapping *clonedToSrcOpsMap) {

// Check if the op was cloned from another source op, and return it if found
// (or the same op if not found)
auto findOriginalSrcOp =
[](Operation *op, const IRMapping &clonedToSrcOpsMap) -> Operation * {
Operation *srcOp = op;
// If the source op derives from another op: traverse the chain to find the
// original source op
while (srcOp && clonedToSrcOpsMap.contains(srcOp))
srcOp = clonedToSrcOpsMap.lookup(srcOp);
return srcOp;
};

// Builder to insert unrolled bodies just before the terminator of the body of
// 'forOp'.
// the loop.
auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);

constexpr auto defaultAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
static const auto noopAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
if (!annotateFn)
annotateFn = defaultAnnotateFn;
annotateFn = noopAnnotateFn;

// Keep a pointer to the last non-terminator operation in the original block
// so that we know what to clone (since we are doing this in-place).
Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);

// Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
// Unroll the contents of the loop body (append unrollFactor - 1 additional
// copies).
SmallVector<Value, 4> lastYielded(yieldedValues);

for (unsigned i = 1; i < unrollFactor; i++) {
IRMapping operandMap;

// Prepare operand map.
IRMapping operandMap;
operandMap.map(iterArgs, lastYielded);

// If the induction variable is used, create a remapping to the value for
// this unrolled instance.
if (!forOpIV.use_empty()) {
Value ivUnroll = ivRemapFn(i, forOpIV, builder);
operandMap.map(forOpIV, ivUnroll);
if (!iv.use_empty()) {
Value ivUnroll = ivRemapFn(i, iv, builder);
operandMap.map(iv, ivUnroll);
}

// Clone the original body of 'forOp'.
for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
Operation *clonedOp = builder.clone(*it, operandMap);
Operation *srcOp = &(*it);
Operation *clonedOp = builder.clone(*srcOp, operandMap);
annotateFn(i, clonedOp, builder);
if (clonedToSrcOpsMap)
clonedToSrcOpsMap->map(clonedOp,
findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
}

// Update yielded values.
Expand Down Expand Up @@ -1544,3 +1558,100 @@ bool mlir::isPerfectlyNestedForLoops(
}
return true;
}

llvm::SmallVector<int64_t>
mlir::getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp) {
std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
if (!loBnds || !upBnds || !steps)
return {};
llvm::SmallVector<int64_t> tripCounts;
for (auto [lb, ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
std::optional<llvm::APInt> numIter = constantTripCount(
lb, ub, step, /*isSigned=*/true, scf::computeUbMinusLb);
if (!numIter)
return {};
tripCounts.push_back(numIter->getSExtValue());
}
return tripCounts;
}

FailureOr<scf::ParallelOp> mlir::parallelLoopUnrollByFactors(
scf::ParallelOp op, ArrayRef<uint64_t> unrollFactors,
RewriterBase &rewriter,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
IRMapping *clonedToSrcOpsMap) {
const unsigned numLoops = op.getNumLoops();
assert(llvm::none_of(unrollFactors, [](uint64_t f) { return f == 0; }) &&
"Expected positive unroll factors");
assert((!unrollFactors.empty() && (unrollFactors.size() <= numLoops)) &&
"Expected non-empty unroll factors of size <= to the number of loops");

// Bail out if no valid unroll factors were provided
if (llvm::all_of(unrollFactors, [](uint64_t f) { return f == 1; }))
return rewriter.notifyMatchFailure(
op, "Unrolling not applied if all factors are 1");

// Return if the loop body is empty.
if (llvm::hasSingleElement(op.getBody()->getOperations()))
return rewriter.notifyMatchFailure(op, "Cannot unroll an empty loop body");

// If the provided unroll factors do not cover all the loop dims, they are
// applied to the inner loop dimensions.
const unsigned firstLoopDimIdx = numLoops - unrollFactors.size();

// Make sure that the unroll factors divide the iteration space evenly
// TODO: Support unrolling loops with dynamic iteration spaces.
const llvm::SmallVector<int64_t> tripCounts = getConstLoopTripCounts(op);
if (tripCounts.empty())
return rewriter.notifyMatchFailure(
op, "Failed to compute constant trip counts for the loop. Note that "
"dynamic loop sizes are not supported.");

for (unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
if (tripCounts[dimIdx] % unrollFactor)
return rewriter.notifyMatchFailure(
op, "Unroll factors don't divide the iteration space evenly");
}

std::optional<SmallVector<OpFoldResult>> maybeFoldSteps = op.getLoopSteps();
if (!maybeFoldSteps)
return rewriter.notifyMatchFailure(op, "Failed to retrieve loop steps");
llvm::SmallVector<size_t> steps{};
for (auto step : *maybeFoldSteps)
steps.push_back(static_cast<size_t>(*getConstantIntValue(step)));

for (unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
if (unrollFactor == 1)
continue;
const size_t origStep = steps[dimIdx];
const int64_t newStep = origStep * unrollFactor;
IRMapping clonedToSrcOpsMap;

ValueRange iterArgs = ValueRange(op.getRegionIterArgs());
auto yieldedValues = op.getBody()->getTerminator()->getOperands();

generateUnrolledLoop(
op.getBody(), op.getInductionVars()[dimIdx], unrollFactor,
[&](unsigned i, Value iv, OpBuilder b) {
// iv' = iv + step * i;
const AffineExpr expr = b.getAffineDimExpr(0) + (origStep * i);
const auto map =
b.getDimIdentityMap().dropResult(0).insertResult(expr, 0);
return affine::AffineApplyOp::create(b, iv.getLoc(), map,
ValueRange{iv});
},
/*annotateFn*/ annotateFn, iterArgs, yieldedValues, &clonedToSrcOpsMap);

// Update loop step
auto prevInsertPoint = rewriter.saveInsertionPoint();
rewriter.setInsertionPoint(op);
op.getStepMutable()[dimIdx].assign(
arith::ConstantIndexOp::create(rewriter, op.getLoc(), newStep));
rewriter.restoreInsertionPoint(prevInsertPoint);
}
return op;
}
Loading