-
Notifications
You must be signed in to change notification settings - Fork 15k
[mlir][scf] Add parallelLoopUnrollByFactors() #163806
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-mlir-core @llvm/pr-subscribers-mlir Author: None (fabrizio-indirli) ChangesDefine Full diff: https://github.com/llvm/llvm-project/pull/163806.diff 2 Files Affected:
diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
index ecd829ed14add..5c0908bb71fcf 100644
--- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
@@ -221,6 +221,20 @@ FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
/// 4. Each region iter arg and result has exactly one use
bool isPerfectlyNestedForLoops(MutableArrayRef<LoopLikeOpInterface> loops);
+/// Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs'
+/// and 'yieldedValues' as the block arguments and yielded values of the loop.
+/// The content of the loop body is replicated 'unrollFactor' times, calling 'ivRemapFn' to remap
+/// 'iv' for each unrolled body. If specified, annotates the Ops in each
+/// unrolled iteration using annotateFn.
+/// If provided, 'clonedToSrcOpsMap' is populated with the mappings from the cloned
+/// ops to the original op.
+void generateUnrolledLoop(
+ Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
+ function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
+ function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
+ ValueRange iterArgs, ValueRange yieldedValues,
+ IRMapping* clonedToSrcOpsMap = nullptr);
+
} // namespace mlir
#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 10eae8906ce31..5220ba6c73252 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -291,47 +291,61 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
return arith::DivUIOp::create(builder, loc, sum, divisor);
}
-/// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
-/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
-/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
-/// unrolled iteration using annotateFn.
-static void generateUnrolledLoop(
- Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
+void mlir::generateUnrolledLoop(
+ Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
- ValueRange iterArgs, ValueRange yieldedValues) {
+ ValueRange iterArgs, ValueRange yieldedValues,
+ IRMapping* clonedToSrcOpsMap) {
+
+ // check if the op was cloned from another source op, and return it if found
+ // (or the same op if not found)
+ auto findOriginalSrcOp =
+ [](Operation *op, const IRMapping &clonedToSrcOpsMap) -> Operation * {
+ Operation *srcOp = op;
+ // if the source op derives from another op: traverse the chain to find the
+ // original source op
+ while (srcOp && clonedToSrcOpsMap.contains(srcOp))
+ srcOp = clonedToSrcOpsMap.lookup(srcOp);
+ return srcOp;
+ };
+
// Builder to insert unrolled bodies just before the terminator of the body of
- // 'forOp'.
+ // the loop.
auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
- constexpr auto defaultAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
+ static const auto noopAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
if (!annotateFn)
- annotateFn = defaultAnnotateFn;
+ annotateFn = noopAnnotateFn;
// Keep a pointer to the last non-terminator operation in the original block
// so that we know what to clone (since we are doing this in-place).
Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
- // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
+ // Unroll the contents of the loop body (append unrollFactor - 1 additional
+ // copies).
SmallVector<Value, 4> lastYielded(yieldedValues);
for (unsigned i = 1; i < unrollFactor; i++) {
- IRMapping operandMap;
-
// Prepare operand map.
+ IRMapping operandMap;
operandMap.map(iterArgs, lastYielded);
// If the induction variable is used, create a remapping to the value for
// this unrolled instance.
- if (!forOpIV.use_empty()) {
- Value ivUnroll = ivRemapFn(i, forOpIV, builder);
- operandMap.map(forOpIV, ivUnroll);
+ if (!iv.use_empty()) {
+ Value ivUnroll = ivRemapFn(i, iv, builder);
+ operandMap.map(iv, ivUnroll);
}
// Clone the original body of 'forOp'.
for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
- Operation *clonedOp = builder.clone(*it, operandMap);
+ Operation *srcOp = &(*it);
+ Operation *clonedOp = builder.clone(*srcOp, operandMap);
annotateFn(i, clonedOp, builder);
+ if(clonedToSrcOpsMap)
+ clonedToSrcOpsMap->map(clonedOp,
+ findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
}
// Update yielded values.
|
|
@llvm/pr-subscribers-mlir-scf Author: None (fabrizio-indirli) ChangesDefine Full diff: https://github.com/llvm/llvm-project/pull/163806.diff 2 Files Affected:
diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
index ecd829ed14add..5c0908bb71fcf 100644
--- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
@@ -221,6 +221,20 @@ FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
/// 4. Each region iter arg and result has exactly one use
bool isPerfectlyNestedForLoops(MutableArrayRef<LoopLikeOpInterface> loops);
+/// Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs'
+/// and 'yieldedValues' as the block arguments and yielded values of the loop.
+/// The content of the loop body is replicated 'unrollFactor' times, calling 'ivRemapFn' to remap
+/// 'iv' for each unrolled body. If specified, annotates the Ops in each
+/// unrolled iteration using annotateFn.
+/// If provided, 'clonedToSrcOpsMap' is populated with the mappings from the cloned
+/// ops to the original op.
+void generateUnrolledLoop(
+ Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
+ function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
+ function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
+ ValueRange iterArgs, ValueRange yieldedValues,
+ IRMapping* clonedToSrcOpsMap = nullptr);
+
} // namespace mlir
#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 10eae8906ce31..5220ba6c73252 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -291,47 +291,61 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
return arith::DivUIOp::create(builder, loc, sum, divisor);
}
-/// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
-/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
-/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
-/// unrolled iteration using annotateFn.
-static void generateUnrolledLoop(
- Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
+void mlir::generateUnrolledLoop(
+ Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
- ValueRange iterArgs, ValueRange yieldedValues) {
+ ValueRange iterArgs, ValueRange yieldedValues,
+ IRMapping* clonedToSrcOpsMap) {
+
+ // check if the op was cloned from another source op, and return it if found
+ // (or the same op if not found)
+ auto findOriginalSrcOp =
+ [](Operation *op, const IRMapping &clonedToSrcOpsMap) -> Operation * {
+ Operation *srcOp = op;
+ // if the source op derives from another op: traverse the chain to find the
+ // original source op
+ while (srcOp && clonedToSrcOpsMap.contains(srcOp))
+ srcOp = clonedToSrcOpsMap.lookup(srcOp);
+ return srcOp;
+ };
+
// Builder to insert unrolled bodies just before the terminator of the body of
- // 'forOp'.
+ // the loop.
auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
- constexpr auto defaultAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
+ static const auto noopAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
if (!annotateFn)
- annotateFn = defaultAnnotateFn;
+ annotateFn = noopAnnotateFn;
// Keep a pointer to the last non-terminator operation in the original block
// so that we know what to clone (since we are doing this in-place).
Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
- // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
+ // Unroll the contents of the loop body (append unrollFactor - 1 additional
+ // copies).
SmallVector<Value, 4> lastYielded(yieldedValues);
for (unsigned i = 1; i < unrollFactor; i++) {
- IRMapping operandMap;
-
// Prepare operand map.
+ IRMapping operandMap;
operandMap.map(iterArgs, lastYielded);
// If the induction variable is used, create a remapping to the value for
// this unrolled instance.
- if (!forOpIV.use_empty()) {
- Value ivUnroll = ivRemapFn(i, forOpIV, builder);
- operandMap.map(forOpIV, ivUnroll);
+ if (!iv.use_empty()) {
+ Value ivUnroll = ivRemapFn(i, iv, builder);
+ operandMap.map(iv, ivUnroll);
}
// Clone the original body of 'forOp'.
for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
- Operation *clonedOp = builder.clone(*it, operandMap);
+ Operation *srcOp = &(*it);
+ Operation *clonedOp = builder.clone(*srcOp, operandMap);
annotateFn(i, clonedOp, builder);
+ if(clonedToSrcOpsMap)
+ clonedToSrcOpsMap->map(clonedOp,
+ findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
}
// Update yielded values.
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
8275239 to
6beb038
Compare
|
Looks good but can you call out the new mapping argument and the constexpr change in the commit message? |
0f7cf97 to
95c44d9
Compare
|
Done, thanks for having a look! :) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
Not sure who should I tag to review this, please? |
20d99ba to
d139d6f
Compare
d139d6f to
c72da0e
Compare
|
I added a test, and for that I had to add also a new function |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hey! Thanks for expanding this PR - it really helps to justify your original changes.
I've only had a chance to skim through and left some initial comments. In general, this calls for a bit more testing :) I will take a closer look tomorrow.
| function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr, | ||
| IRMapping *clonedToSrcOpsMap = nullptr); | ||
|
|
||
| namespace scf { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need this namespace?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not so sure of this either: I wrapped it around the namespace because this is was an internal helper function used only by other SCF functions; I exposed it in the header so that it can be used across different files (of the SCF dialect), but I wanted to make it clear that it's specific to SCF. I don't have any strong opinion on this, though
09de4dc to
02ddf25
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates, I am happy with the level of testing. More comments inline :)
| %c16 = arith.constant 16 : index | ||
| %c0 = arith.constant 0 : index | ||
| %c1 = arith.constant 1 : index | ||
| scf.parallel (%arg2, %arg3, %arg4) = (%c0, %c0, %c0) to (%c1, %c16, %c12) step (%c1, %c1, %c1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nit]
| scf.parallel (%arg2, %arg3, %arg4) = (%c0, %c0, %c0) to (%c1, %c16, %c12) step (%c1, %c1, %c1) { | |
| scf.parallel (%iv0, %iv1, %iv2) = (%c0, %c0, %c0) to (%c1, %c16, %c12) step (%c1, %c1, %c1) { |
Same comment elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not a big fan of this tbh because when you run the pass manually mlir-opt will rename al the variable names to %arg0, %arg1, etc. and it becomes harder to map them to the original names in the test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mlir-opt will rename al the variable names to %arg0, %arg1
Indeed, we are not in control of what mlir-opt does with names. So, even %arg2, %arg3, can (and probably will) be replaced with %arg0, %arg2. For this reason, we should optimise for what makes parsing for humans easier.
888e728 to
35cb6c7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thank you for your contribution and for addressing my comments!
My outstanding comments are minor (nits), no need to wait for me take another look. Also, I am travelling today and next week, so won't be able to re-visit this until some later time :) (i.e. feel free to land this)
| %c16 = arith.constant 16 : index | ||
| %c0 = arith.constant 0 : index | ||
| %c1 = arith.constant 1 : index | ||
| scf.parallel (%arg2, %arg3, %arg4) = (%c0, %c0, %c0) to (%c1, %c16, %c12) step (%c1, %c1, %c1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mlir-opt will rename al the variable names to %arg0, %arg1
Indeed, we are not in control of what mlir-opt does with names. So, even %arg2, %arg3, can (and probably will) be replaced with %arg0, %arg2. For this reason, we should optimise for what makes parsing for humans easier.
- In the SCF Utils, add the parallelLoopUnrollByFactors() function to unroll scf::ParallelOp loops according to the specified unroll factors - Add a test pass "TestParallelLoopUnrolling" and the related LIT test - Expose mlir::parallelLoopUnrollByFactors(), mlir::generateUnrolledLoop(), and mlir::scf::computeUbMinusLb() functions in the mlir/Dialect/SCF/Utils/Utils.h header to make thme available to other passes. - In mlir::generateUnrolledLoop(), add also an optional `IRMapping *clonedToSrcOpsMap` argument to map the new cloned operations to their original ones. In the function body, change the default `AnnotateFn` type to `static const` to silence potential warnings about dangling references when a function_ref is assigned to a variable with automatic storage. Signed-off-by: Fabrizio Indirli <[email protected]>
0eeeb38 to
0935f5d
Compare
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/117/builds/14487 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/32977 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/20873 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/205/builds/25988 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/53/builds/21538 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/203/builds/27199 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/204/builds/26011 Here is the relevant piece of the build log for the reference |
This reverts commit 86a2073.
Reverts #163806 due to linking errors on the function `mlir::scf::computeUbMinusLb`
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/172/builds/16135 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/29/builds/17517 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/80/builds/16967 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/152/builds/5863 Here is the relevant piece of the build log for the reference |
…64949) Reverts llvm/llvm-project#163806 due to linking errors on the function `mlir::scf::computeUbMinusLb`
- In the SCF Utils, add the `parallelLoopUnrollByFactors()` function to unroll scf::ParallelOp loops according to the specified unroll factors - Add a test pass "TestParallelLoopUnrolling" and the related LIT test - Expose `mlir::parallelLoopUnrollByFactors()`, `mlir::generateUnrolledLoop()`, and `mlir::scf::computeUbMinusLb()` functions in the mlir/Dialect/SCF/Utils/Utils.h and /IR/SCF.h headers to make them available to other passes. - In `mlir::generateUnrolledLoop()`, add also an optional `IRMapping *clonedToSrcOpsMap` argument to map the new cloned operations to their original ones. In the function body, change the default `AnnotateFn` type to `static const` to silence potential warnings about dangling references when a function_ref is assigned to a variable with automatic storage. Signed-off-by: Fabrizio Indirli <[email protected]>
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/130/builds/15825 Here is the relevant piece of the build log for the reference |
parallelLoopUnrollByFactors()functionto unroll scf::ParallelOp loops according to the specified unroll factors
mlir::parallelLoopUnrollByFactors(),mlir::generateUnrolledLoop(),and
mlir::scf::computeUbMinusLb()functions in themlir/Dialect/SCF/Utils/Utils.h header to make them available to other passes.
mlir::generateUnrolledLoop(), add also an optionalIRMapping *clonedToSrcOpsMapargument to map the new cloned operations to their original ones.
In the function body, change the default
AnnotateFntype tostatic constto silence potential warnings about dangling references when a function_ref
is assigned to a variable with automatic storage.