Skip to content

Commit c8bd1d2

Browse files
author
Marek Sedlacek
committed
Adds loop rotation to runtime loop unrolling, if this makes the loop
computable, which then might enable additional unrolling of the loop. To minimize the possibility of rotation without unrolling this rotation is done right inside of UnrollRuntimeLoopRemainder.
1 parent ab6316e commit c8bd1d2

File tree

10 files changed

+457
-184
lines changed

10 files changed

+457
-184
lines changed

llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
1414
#define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
1515

16+
#include "llvm/ADT/STLExtras.h"
1617
#include "llvm/Support/Compiler.h"
1718

1819
namespace llvm {
@@ -32,12 +33,14 @@ class TargetTransformInfo;
3233
/// header. If the loop header's size exceeds the threshold, the loop rotation
3334
/// will give up. The flag IsUtilMode controls the heuristic used in the
3435
/// LoopRotation. If it is true, the profitability heuristic will be ignored.
35-
LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI,
36-
const TargetTransformInfo *TTI, AssumptionCache *AC,
37-
DominatorTree *DT, ScalarEvolution *SE,
38-
MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
39-
bool RotationOnly, unsigned Threshold,
40-
bool IsUtilMode, bool PrepareForLTO = false);
36+
/// The ProfitabilityCheck function can override general profitability check.
37+
LLVM_ABI bool LoopRotation(
38+
Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
39+
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
40+
const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold,
41+
bool IsUtilMode, bool PrepareForLTO = false,
42+
function_ref<bool(Loop *, ScalarEvolution *)> ProfitabilityCheck =
43+
[](Loop *, ScalarEvolution *) { return false; });
4144

4245
} // namespace llvm
4346

llvm/include/llvm/Transforms/Utils/UnrollLoop.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,14 @@ LLVM_ABI const Loop *addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
5454
LoopInfo *LI,
5555
NewLoopsMap &NewLoops);
5656

57-
/// Represents the result of a \c UnrollLoop invocation.
57+
/// Represents the result of a \c UnrollLoop and \c UnrollAndJamLoop invocation.
5858
enum class LoopUnrollResult {
5959
/// The loop was not modified.
6060
Unmodified,
6161

62+
/// The loop was modified, but not unrolled.
63+
Modified,
64+
6265
/// The loop was partially unrolled -- we still have a loop, but with a
6366
/// smaller trip count. We may also have emitted epilogue loop if the loop
6467
/// had a non-constant trip count.
@@ -69,6 +72,18 @@ enum class LoopUnrollResult {
6972
FullyUnrolled
7073
};
7174

75+
/// Represents the result of a \c UnrollRuntimeLoopRemainder invocation.
76+
enum class LoopReminderUnrollResult {
77+
/// The loop reminder was not modified.
78+
Unmodified,
79+
80+
/// The loop was rotated, but not unrolled.
81+
Rotated,
82+
83+
/// The loop reminder was unrolled.
84+
Unrolled
85+
};
86+
7287
struct UnrollLoopOptions {
7388
unsigned Count;
7489
bool Force;
@@ -90,7 +105,7 @@ LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO,
90105
Loop **RemainderLoop = nullptr,
91106
AAResults *AA = nullptr);
92107

93-
LLVM_ABI bool UnrollRuntimeLoopRemainder(
108+
LLVM_ABI LoopReminderUnrollResult UnrollRuntimeLoopRemainder(
94109
Loop *L, unsigned Count, bool AllowExpensiveTripCount,
95110
bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
96111
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,8 +1357,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
13571357
ULO.RuntimeUnrollMultiExit = UP.RuntimeUnrollMultiExit;
13581358
LoopUnrollResult UnrollResult = UnrollLoop(
13591359
L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1360-
if (UnrollResult == LoopUnrollResult::Unmodified)
1361-
return LoopUnrollResult::Unmodified;
1360+
if (UnrollResult == LoopUnrollResult::Unmodified ||
1361+
UnrollResult == LoopUnrollResult::Modified)
1362+
return UnrollResult;
13621363

13631364
if (RemainderLoop) {
13641365
std::optional<MDNode *> RemainderLoopID =

llvm/lib/Transforms/Utils/LoopRotationUtils.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,19 @@ class LoopRotate {
6969
bool RotationOnly;
7070
bool IsUtilMode;
7171
bool PrepareForLTO;
72+
function_ref<bool(Loop *, ScalarEvolution *)> ProfitabilityCheck;
7273

7374
public:
7475
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
7576
const TargetTransformInfo *TTI, AssumptionCache *AC,
7677
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
7778
const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
78-
bool PrepareForLTO)
79+
bool PrepareForLTO,
80+
function_ref<bool(Loop *, ScalarEvolution *)> ProfitabilityCheck)
7981
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
8082
MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
81-
IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
83+
IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO),
84+
ProfitabilityCheck(ProfitabilityCheck) {}
8285
bool processLoop(Loop *L);
8386

8487
private:
@@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
440443

441444
// Rotate if either the loop latch does *not* exit the loop, or if the loop
442445
// latch was just simplified. Or if we think it will be profitable.
443-
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
444-
!profitableToRotateLoopExitingLatch(L) &&
445-
!canRotateDeoptimizingLatchExit(L))
446+
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch &&
447+
IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) &&
448+
!canRotateDeoptimizingLatchExit(L) && !ProfitabilityCheck(L, SE))
446449
return Rotated;
447450

448451
// Check size of original header and reject loop if it is very big or we can't
@@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) {
10531056

10541057

10551058
/// The utility to convert a loop into a loop with bottom test.
1056-
bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
1057-
AssumptionCache *AC, DominatorTree *DT,
1058-
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
1059-
const SimplifyQuery &SQ, bool RotationOnly = true,
1060-
unsigned Threshold = unsigned(-1),
1061-
bool IsUtilMode = true, bool PrepareForLTO) {
1059+
bool llvm::LoopRotation(
1060+
Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
1061+
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
1062+
const SimplifyQuery &SQ, bool RotationOnly = true,
1063+
unsigned Threshold = unsigned(-1), bool IsUtilMode = true,
1064+
bool PrepareForLTO,
1065+
function_ref<bool(Loop *, ScalarEvolution *)> ProfitabilityCheck) {
10621066
LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
1063-
IsUtilMode, PrepareForLTO);
1067+
IsUtilMode, PrepareForLTO, ProfitabilityCheck);
10641068
return LR.processLoop(L);
10651069
}

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 72 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -486,12 +486,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
486486

487487
// All these values should be taken only after peeling because they might have
488488
// changed.
489-
BasicBlock *Preheader = L->getLoopPreheader();
490-
BasicBlock *Header = L->getHeader();
491489
BasicBlock *LatchBlock = L->getLoopLatch();
492-
SmallVector<BasicBlock *, 4> ExitBlocks;
493-
L->getExitBlocks(ExitBlocks);
494-
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
495490

496491
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
497492
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
@@ -504,42 +499,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
504499
if (MaxTripCount && ULO.Count > MaxTripCount)
505500
ULO.Count = MaxTripCount;
506501

507-
struct ExitInfo {
508-
unsigned TripCount;
509-
unsigned TripMultiple;
510-
unsigned BreakoutTrip;
511-
bool ExitOnTrue;
512-
BasicBlock *FirstExitingBlock = nullptr;
513-
SmallVector<BasicBlock *> ExitingBlocks;
514-
};
515-
DenseMap<BasicBlock *, ExitInfo> ExitInfos;
516-
SmallVector<BasicBlock *, 4> ExitingBlocks;
517-
L->getExitingBlocks(ExitingBlocks);
518-
for (auto *ExitingBlock : ExitingBlocks) {
519-
// The folding code is not prepared to deal with non-branch instructions
520-
// right now.
521-
auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
522-
if (!BI)
523-
continue;
524-
525-
ExitInfo &Info = ExitInfos[ExitingBlock];
526-
Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
527-
Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
528-
if (Info.TripCount != 0) {
529-
Info.BreakoutTrip = Info.TripCount % ULO.Count;
530-
Info.TripMultiple = 0;
531-
} else {
532-
Info.BreakoutTrip = Info.TripMultiple =
533-
(unsigned)std::gcd(ULO.Count, Info.TripMultiple);
534-
}
535-
Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
536-
Info.ExitingBlocks.push_back(ExitingBlock);
537-
LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
538-
<< ": TripCount=" << Info.TripCount
539-
<< ", TripMultiple=" << Info.TripMultiple
540-
<< ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
541-
}
542-
543502
// Are we eliminating the loop control altogether? Note that we can know
544503
// we're eliminating the backedge without knowing exactly which iteration
545504
// of the unrolled body exits.
@@ -552,17 +511,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
552511
if (CompletelyUnroll)
553512
ULO.Runtime = false;
554513

555-
// Go through all exits of L and see if there are any phi-nodes there. We just
556-
// conservatively assume that they're inserted to preserve LCSSA form, which
557-
// means that complete unrolling might break this form. We need to either fix
558-
// it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
559-
// now we just recompute LCSSA for the outer loop, but it should be possible
560-
// to fix it in-place.
561-
bool NeedToFixLCSSA =
562-
PreserveLCSSA && CompletelyUnroll &&
563-
any_of(ExitBlocks,
564-
[](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
565-
566514
// The current loop unroll pass can unroll loops that have
567515
// (1) single latch; and
568516
// (2a) latch is unconditional; or
@@ -587,21 +535,87 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
587535
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
588536
: isEpilogProfitable(L);
589537

538+
LoopReminderUnrollResult UnrollReminderResult =
539+
LoopReminderUnrollResult::Unmodified;
540+
if (ULO.Runtime) {
541+
UnrollReminderResult = UnrollRuntimeLoopRemainder(
542+
L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability,
543+
ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
544+
PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit,
545+
RemainderLoop);
546+
LatchBlock = L->getLoopLatch();
547+
LatchIsExiting = L->isLoopExiting(LatchBlock);
548+
}
549+
590550
if (ULO.Runtime &&
591-
!UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
592-
EpilogProfitability, ULO.UnrollRemainder,
593-
ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
594-
PreserveLCSSA, ULO.SCEVExpansionBudget,
595-
ULO.RuntimeUnrollMultiExit, RemainderLoop)) {
551+
UnrollReminderResult != LoopReminderUnrollResult::Unrolled) {
596552
if (ULO.Force)
597553
ULO.Runtime = false;
598554
else {
599555
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
600556
"generated when assuming runtime trip count\n");
601-
return LoopUnrollResult::Unmodified;
557+
// Loop might have been rotated inside of UnrollRuntimeLoopRemainder and
558+
// this needs to be propagated.
559+
return UnrollReminderResult == LoopReminderUnrollResult::Rotated
560+
? LoopUnrollResult::Modified
561+
: LoopUnrollResult::Unmodified;
562+
;
602563
}
603564
}
604565

566+
BasicBlock *Preheader = L->getLoopPreheader();
567+
BasicBlock *Header = L->getHeader();
568+
SmallVector<BasicBlock *, 4> ExitBlocks;
569+
L->getExitBlocks(ExitBlocks);
570+
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
571+
572+
// Go through all exits of L and see if there are any phi-nodes there. We just
573+
// conservatively assume that they're inserted to preserve LCSSA form, which
574+
// means that complete unrolling might break this form. We need to either fix
575+
// it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
576+
// now we just recompute LCSSA for the outer loop, but it should be possible
577+
// to fix it in-place.
578+
bool NeedToFixLCSSA =
579+
PreserveLCSSA && CompletelyUnroll &&
580+
any_of(ExitBlocks,
581+
[](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
582+
583+
struct ExitInfo {
584+
unsigned TripCount;
585+
unsigned TripMultiple;
586+
unsigned BreakoutTrip;
587+
bool ExitOnTrue;
588+
BasicBlock *FirstExitingBlock = nullptr;
589+
SmallVector<BasicBlock *> ExitingBlocks;
590+
};
591+
DenseMap<BasicBlock *, ExitInfo> ExitInfos;
592+
SmallVector<BasicBlock *, 4> ExitingBlocks;
593+
L->getExitingBlocks(ExitingBlocks);
594+
for (auto *ExitingBlock : ExitingBlocks) {
595+
// The folding code is not prepared to deal with non-branch instructions
596+
// right now.
597+
auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
598+
if (!BI)
599+
continue;
600+
601+
ExitInfo &Info = ExitInfos[ExitingBlock];
602+
Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
603+
Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
604+
if (Info.TripCount != 0) {
605+
Info.BreakoutTrip = Info.TripCount % ULO.Count;
606+
Info.TripMultiple = 0;
607+
} else {
608+
Info.BreakoutTrip = Info.TripMultiple =
609+
(unsigned)std::gcd(ULO.Count, Info.TripMultiple);
610+
}
611+
Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
612+
Info.ExitingBlocks.push_back(ExitingBlock);
613+
LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
614+
<< ": TripCount=" << Info.TripCount
615+
<< ", TripMultiple=" << Info.TripMultiple
616+
<< ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
617+
}
618+
605619
using namespace ore;
606620
// Report the unrolling decision.
607621
if (CompletelyUnroll) {

llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,16 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
239239

240240
// We use the runtime remainder in cases where we don't know trip multiple
241241
if (TripMultiple % Count != 0) {
242-
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
243-
/*UseEpilogRemainder*/ true,
244-
UnrollRemainder, /*ForgetAllSCEV*/ false,
245-
LI, SE, DT, AC, TTI, true,
246-
SCEVCheapExpansionBudget, EpilogueLoop)) {
242+
auto UnrollReminderResult = UnrollRuntimeLoopRemainder(
243+
L, Count, /*AllowExpensiveTripCount*/ false,
244+
/*UseEpilogRemainder*/ true, UnrollRemainder, /*ForgetAllSCEV*/ false,
245+
LI, SE, DT, AC, TTI, true, SCEVCheapExpansionBudget, EpilogueLoop);
246+
if (UnrollReminderResult != LoopReminderUnrollResult::Unrolled) {
247247
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
248248
"generated when assuming runtime trip count\n");
249-
return LoopUnrollResult::Unmodified;
249+
return UnrollReminderResult == LoopReminderUnrollResult::Rotated
250+
? LoopUnrollResult::Modified
251+
: LoopUnrollResult::Unmodified;
250252
}
251253
}
252254

0 commit comments

Comments
 (0)