-
Notifications
You must be signed in to change notification settings - Fork 15k
[LV] Add initial legality checks for early exit loops with side effects #145663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
3c6f7fe
255fdb6
b991d44
7cae713
70769de
553cc93
a6189e2
3bb93d2
231d17a
4e5d4c2
1a9360d
23770b0
9c5436a
022f3e6
21a5682
e80821e
2233dcf
83e10d9
d5aa5ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -251,15 +251,18 @@ struct HistogramInfo { | |||||
| /// induction variable and the different reduction variables. | ||||||
| class LoopVectorizationLegality { | ||||||
| public: | ||||||
| LoopVectorizationLegality( | ||||||
| Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, | ||||||
| TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F, | ||||||
| LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE, | ||||||
| LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, | ||||||
| AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) | ||||||
| LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, | ||||||
| DominatorTree *DT, TargetTransformInfo *TTI, | ||||||
| TargetLibraryInfo *TLI, Function *F, | ||||||
| LoopAccessInfoManager &LAIs, LoopInfo *LI, | ||||||
| OptimizationRemarkEmitter *ORE, | ||||||
| LoopVectorizationRequirements *R, | ||||||
| LoopVectorizeHints *H, DemandedBits *DB, | ||||||
| AssumptionCache *AC, BlockFrequencyInfo *BFI, | ||||||
| ProfileSummaryInfo *PSI, AAResults *AA) | ||||||
| : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs), | ||||||
| ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), | ||||||
| PSI(PSI) {} | ||||||
| ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI), | ||||||
| AA(AA) {} | ||||||
|
|
||||||
| /// ReductionList contains the reduction descriptors for all | ||||||
| /// of the reductions that were found in the loop. | ||||||
|
|
@@ -407,6 +410,14 @@ class LoopVectorizationLegality { | |||||
| return UncountableExitingBB; | ||||||
| } | ||||||
|
|
||||||
| /// Returns true if this is an early exit loop with state-changing or | ||||||
| /// potentially-faulting operations and the IR representing the condition | ||||||
| /// for the uncountable exit must be determined before any of the state | ||||||
| /// changes or potentially faulting operations take place. | ||||||
| bool hasUncountableExitWithSideEffects() const { | ||||||
| return UncountableExitWithSideEffects; | ||||||
| } | ||||||
|
|
||||||
| /// Return true if there is store-load forwarding dependencies. | ||||||
| bool isSafeForAnyStoreLoadForwardDistances() const { | ||||||
| return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); | ||||||
|
|
@@ -524,20 +535,87 @@ class LoopVectorizationLegality { | |||||
| /// Returns true if this is an early exit loop that can be vectorized. | ||||||
| /// Currently, a loop with an uncountable early exit is considered | ||||||
| /// vectorizable if: | ||||||
| /// 1. There are no writes to memory in the loop. | ||||||
| /// 1. Writes to memory do not form a dependence with any load used as | ||||||
|
||||||
| /// 1. Writes to memory do not form a dependence with any load used as | |
| /// 1. Writes to memory access different underlying objects than any load used as |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| /// part of the uncounted exit condition. | |
| /// part of the uncountable exit condition. |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| /// determine whether an uncounted exit will be taken before any operation | |
| /// determine whether an uncountable exit will be taken before any operation |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment looks really helpful, thanks!
Is it worth adding as well that c[i] is not permitted to have more than one use for now, because an outside use complicates things since you now have to use phi for c[i] instead of c[i] itself.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -15,8 +15,10 @@ | |||||
| // | ||||||
|
|
||||||
| #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" | ||||||
| #include "llvm/Analysis/AliasAnalysis.h" | ||||||
| #include "llvm/Analysis/Loads.h" | ||||||
| #include "llvm/Analysis/LoopInfo.h" | ||||||
| #include "llvm/Analysis/MustExecute.h" | ||||||
| #include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||||||
| #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||
| #include "llvm/Analysis/TargetLibraryInfo.h" | ||||||
|
|
@@ -1223,8 +1225,18 @@ bool LoopVectorizationLegality::canVectorizeMemory() { | |||||
| }); | ||||||
| } | ||||||
|
|
||||||
| if (!LAI->canVectorizeMemory()) | ||||||
| if (!LAI->canVectorizeMemory()) { | ||||||
| if (hasUncountableExitWithSideEffects()) { | ||||||
| reportVectorizationFailure( | ||||||
| "Cannot vectorize unsafe dependencies in uncountable exit loop with " | ||||||
| "side effects", | ||||||
| "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, | ||||||
| TheLoop); | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| return canVectorizeIndirectUnsafeDependences(); | ||||||
| } | ||||||
|
|
||||||
| if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) { | ||||||
| reportVectorizationFailure("We don't allow storing to uniform addresses", | ||||||
|
|
@@ -1755,16 +1767,24 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { | |||||
| } | ||||||
| }; | ||||||
|
|
||||||
| bool HasSideEffects = false; | ||||||
| for (auto *BB : TheLoop->blocks()) | ||||||
| for (auto &I : *BB) { | ||||||
| if (I.mayWriteToMemory()) { | ||||||
| // We don't support writes to memory. | ||||||
| if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) { | ||||||
| HasSideEffects = true; | ||||||
| continue; | ||||||
| } | ||||||
|
|
||||||
| // We don't support complex writes to memory. | ||||||
| reportVectorizationFailure( | ||||||
| "Writes to memory unsupported in early exit loops", | ||||||
| "Cannot vectorize early exit loop with writes to memory", | ||||||
| "Complex writes to memory unsupported in early exit loops", | ||||||
| "Cannot vectorize early exit loop with complex writes to memory", | ||||||
| "WritesInEarlyExitLoop", ORE, TheLoop); | ||||||
| return false; | ||||||
| } else if (!IsSafeOperation(&I)) { | ||||||
| } | ||||||
|
||||||
|
|
||||||
| if (!IsSafeOperation(&I)) { | ||||||
| reportVectorizationFailure("Early exit loop contains operations that " | ||||||
| "cannot be speculatively executed", | ||||||
| "UnsafeOperationsEarlyExitLoop", ORE, | ||||||
|
|
@@ -1777,15 +1797,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { | |||||
| assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && | ||||||
| "Expected latch predecessor to be the early exiting block"); | ||||||
|
|
||||||
| Predicates.clear(); | ||||||
| SmallVector<LoadInst *, 4> NonDerefLoads; | ||||||
| if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads, | ||||||
| &Predicates)) { | ||||||
| reportVectorizationFailure("Loop may fault", | ||||||
| "Cannot vectorize non-read-only early exit loop", | ||||||
| "NonReadOnlyEarlyExitLoop", ORE, TheLoop); | ||||||
| // TODO: Handle loops that may fault. | ||||||
| if (!HasSideEffects) { | ||||||
| // Read-only loop. | ||||||
| Predicates.clear(); | ||||||
| if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads, | ||||||
| &Predicates)) { | ||||||
| reportVectorizationFailure( | ||||||
| "Loop may fault", "Cannot vectorize non-read-only early exit loop", | ||||||
| "NonReadOnlyEarlyExitLoop", ORE, TheLoop); | ||||||
| return false; | ||||||
| } | ||||||
| } else if (!canUncountableExitConditionLoadBeMoved( | ||||||
| SingleUncountableExitingBlock)) | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| // Check non-dereferenceable loads if any. | ||||||
| for (LoadInst *LI : NonDerefLoads) { | ||||||
| // Only support unit-stride access for now. | ||||||
|
|
@@ -1813,6 +1840,113 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { | |||||
| "backedge taken count: " | ||||||
| << *SymbolicMaxBTC << '\n'); | ||||||
| UncountableExitingBB = SingleUncountableExitingBlock; | ||||||
| UncountableExitWithSideEffects = HasSideEffects; | ||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved( | ||||||
| BasicBlock *ExitingBlock) { | ||||||
| LoadInst *CriticalUncountableExitConditionLoad = nullptr; | ||||||
|
|
||||||
| // Try to find a load in the critical path for the uncountable exit condition. | ||||||
| // This is currently matching about the simplest form we can, expecting | ||||||
| // only one in-loop load, the result of which is directly compared against | ||||||
| // a loop-invariant value. | ||||||
| // FIXME: We're insisting on a single use for now, because otherwise we will | ||||||
| // need to make PHI nodes for other users. That can be done once the initial | ||||||
| // transform code lands. | ||||||
| auto *Br = cast<BranchInst>(ExitingBlock->getTerminator()); | ||||||
|
|
||||||
| using namespace llvm::PatternMatch; | ||||||
| Value *L = nullptr; | ||||||
| Value *R = nullptr; | ||||||
| if (!match(Br->getCondition(), | ||||||
| m_OneUse(m_ICmp(m_OneUse(m_Value(L)), (m_Value(R)))))) { | ||||||
|
||||||
| m_OneUse(m_ICmp(m_OneUse(m_Value(L)), (m_Value(R)))))) { | |
| m_OneUse(m_ICmp(m_Instruction(L, m_OneUse(m_Load(L))), (m_Value(R)))))) { |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would probably be safer to check for !AddRec here instead of bailing out on isLoopInvariant, which may not be able to identify all invariant cases.
This way we also don't rely on an implementation detail of isDereferenceableAndAlignedInLoop (the check for the pointer being an AddRec, i.e. there's nothing fundamental preventing it to look through things like pointer selects).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?