Skip to content

Commit 011a792

Browse files
arcbbbgithub-actions[bot]
authored andcommitted
Automerge: [LV] Add initial legality checks for loops with unbound loads. (#152422)
This patch splits out the legality checks from PR #151300, following the landing of PR #128593. It is a step toward supporting vectorization of early-exit loops that contain potentially faulting loads. In this commit, an early-exit loop is considered legal for vectorization if it satisfies the following criteria: 1. it is a read-only loop. 2. all potentially faulting loads are unit-stride, which is the only type currently supported by vp.load.ff.
2 parents d7bcc78 + 9876b06 commit 011a792

File tree

7 files changed

+90
-23
lines changed

7 files changed

+90
-23
lines changed

llvm/include/llvm/Analysis/Loads.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,13 @@ LLVM_ABI bool isDereferenceableAndAlignedInLoop(
8585
AssumptionCache *AC = nullptr,
8686
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
8787

88-
/// Return true if the loop \p L cannot fault on any iteration and only
89-
/// contains read-only memory accesses.
90-
LLVM_ABI bool isDereferenceableReadOnlyLoop(
91-
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
92-
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
88+
/// Returns true if the loop contains read-only memory accesses and doesn't
89+
/// throw. Puts loads that may fault into \p NonDereferenceableAndAlignedLoads.
90+
LLVM_ABI bool
91+
isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
92+
AssumptionCache *AC,
93+
SmallVectorImpl<LoadInst *> &NonDereferenceableAndAlignedLoads,
94+
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
9395

9496
/// Return true if we know that executing a load from this value cannot trap.
9597
///

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,12 @@ class LoopVectorizationLegality {
445445
/// Returns a list of all known histogram operations in the loop.
446446
bool hasHistograms() const { return !Histograms.empty(); }
447447

448+
/// Returns potentially faulting loads.
449+
const SmallPtrSetImpl<const Instruction *> &
450+
getPotentiallyFaultingLoads() const {
451+
return PotentiallyFaultingLoads;
452+
}
453+
448454
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
449455
return &PSE;
450456
}
@@ -633,6 +639,9 @@ class LoopVectorizationLegality {
633639
/// may work on the same memory location.
634640
SmallVector<HistogramInfo, 1> Histograms;
635641

642+
/// Hold potentially faulting loads.
643+
SmallPtrSet<const Instruction *, 4> PotentiallyFaultingLoads;
644+
636645
/// BFI and PSI are used to check for profile guided size optimizations.
637646
BlockFrequencyInfo *BFI;
638647
ProfileSummaryInfo *PSI;

llvm/lib/Analysis/Loads.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -859,16 +859,19 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
859859
return isPointerAlwaysReplaceable(From, To, DL);
860860
}
861861

862-
bool llvm::isDereferenceableReadOnlyLoop(
862+
bool llvm::isReadOnlyLoop(
863863
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
864+
SmallVectorImpl<LoadInst *> &NonDereferenceableAndAlignedLoads,
864865
SmallVectorImpl<const SCEVPredicate *> *Predicates) {
865866
for (BasicBlock *BB : L->blocks()) {
866867
for (Instruction &I : *BB) {
867868
if (auto *LI = dyn_cast<LoadInst>(&I)) {
868869
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
869-
return false;
870-
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
870+
NonDereferenceableAndAlignedLoads.push_back(LI);
871+
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
872+
I.mayThrow()) {
871873
return false;
874+
}
872875
}
873876
}
874877
return true;

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,16 +1776,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17761776
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
17771777
"Expected latch predecessor to be the early exiting block");
17781778

1779-
// TODO: Handle loops that may fault.
17801779
Predicates.clear();
1781-
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1782-
&Predicates)) {
1783-
reportVectorizationFailure(
1784-
"Loop may fault",
1785-
"Cannot vectorize potentially faulting early exit loop",
1786-
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1780+
SmallVector<LoadInst *, 4> NonDerefLoads;
1781+
if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1782+
&Predicates)) {
1783+
reportVectorizationFailure("Loop may fault",
1784+
"Cannot vectorize non-read-only early exit loop",
1785+
"NonReadOnlyEarlyExitLoop", ORE, TheLoop);
17871786
return false;
17881787
}
1788+
// Check non-dereferenceable loads if any.
1789+
for (LoadInst *LI : NonDerefLoads) {
1790+
// Only support unit-stride access for now.
1791+
int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1792+
if (Stride != 1) {
1793+
reportVectorizationFailure(
1794+
"Loop contains potentially faulting strided load",
1795+
"Cannot vectorize early exit loop with "
1796+
"strided fault-only-first load",
1797+
"EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1798+
return false;
1799+
}
1800+
PotentiallyFaultingLoads.insert(LI);
1801+
LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
1802+
<< "\n");
1803+
}
17891804

17901805
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
17911806
PSE.getSymbolicMaxBackedgeTakenCount();

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9828,6 +9828,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98289828
return false;
98299829
}
98309830

9831+
if (!LVL.getPotentiallyFaultingLoads().empty()) {
9832+
reportVectorizationFailure("Auto-vectorization of loops with potentially "
9833+
"faulting load is not supported",
9834+
"PotentiallyFaultingLoadsNotSupported", ORE, L);
9835+
return false;
9836+
}
9837+
98319838
// Entrance to the VPlan-native vectorization path. Outer loops are processed
98329839
// here. They may require CFG and instruction level transformations before
98339840
// even evaluating whether vectorization is profitable. Since we cannot modify

llvm/test/Transforms/LoopVectorize/early_exit_legality.ll

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ loop.end:
208208

209209
define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
210210
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas'
211-
; CHECK: LV: Not vectorizing: Loop may fault.
211+
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported.
212212
entry:
213213
%p1 = alloca [42 x i8]
214214
%p2 = alloca [42 x i8]
@@ -238,7 +238,7 @@ loop.end:
238238

239239
define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) {
240240
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_deref_ptrs'
241-
; CHECK: LV: Not vectorizing: Loop may fault.
241+
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported.
242242
entry:
243243
br label %loop
244244

@@ -264,7 +264,7 @@ loop.end:
264264

265265
define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) {
266266
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_unknown_ptrs'
267-
; CHECK: LV: Not vectorizing: Loop may fault.
267+
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported.
268268
entry:
269269
br label %loop
270270

@@ -287,6 +287,32 @@ loop.end:
287287
ret i64 %retval
288288
}
289289

290+
define ptr @same_exit_block_strided_unknown_ptr(ptr %first, ptr %last, i32 %value) {
291+
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_strided_unknown_ptr'
292+
; CHECK: LV: Not vectorizing: Loop contains potentially faulting strided load.
293+
entry:
294+
%cond = icmp eq ptr %first, %last
295+
br i1 %cond, label %return, label %for.body
296+
297+
for.body:
298+
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
299+
%1 = load i32, ptr %first.addr, align 4
300+
%cond2 = icmp eq i32 %1, %value
301+
br i1 %cond2, label %for.end, label %for.inc
302+
303+
for.inc:
304+
%first.next = getelementptr inbounds i32, ptr %first.addr, i64 2
305+
%cond3 = icmp eq ptr %first.next, %last
306+
br i1 %cond3, label %for.end, label %for.body
307+
308+
for.end:
309+
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
310+
br label %return
311+
312+
return:
313+
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
314+
ret ptr %retval
315+
}
290316

291317
; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't
292318
; support this yet.

llvm/unittests/Analysis/LoadsTest.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ define void @f(i32* %p1, i32* %p2, i64 %i) {
120120
EXPECT_TRUE(canReplacePointersInUseIfEqual(IcmpUse, P2, DL));
121121
}
122122

123-
TEST(LoadsTest, IsDerefReadOnlyLoop) {
123+
TEST(LoadsTest, IsReadOnlyLoop) {
124124
LLVMContext C;
125125
std::unique_ptr<Module> M = parseIR(C,
126126
R"IR(
@@ -183,7 +183,8 @@ loop.end:
183183
TargetLibraryInfoImpl TLII(M->getTargetTriple());
184184
TargetLibraryInfo TLI(TLII);
185185

186-
auto IsDerefReadOnlyLoop = [&TLI](Function *F) -> bool {
186+
auto IsReadOnlyLoop =
187+
[&TLI](Function *F, SmallVector<LoadInst *, 4> &NonDerefLoads) -> bool {
187188
AssumptionCache AC(*F);
188189
DominatorTree DT(*F);
189190
LoopInfo LI(DT);
@@ -195,9 +196,13 @@ loop.end:
195196
assert(Header->getName() == "loop");
196197
Loop *L = LI.getLoopFor(Header);
197198

198-
return isDereferenceableReadOnlyLoop(L, &SE, &DT, &AC);
199+
return isReadOnlyLoop(L, &SE, &DT, &AC, NonDerefLoads);
199200
};
200201

201-
ASSERT_TRUE(IsDerefReadOnlyLoop(F1));
202-
ASSERT_FALSE(IsDerefReadOnlyLoop(F2));
202+
SmallVector<LoadInst *, 4> NonDerefLoads;
203+
ASSERT_TRUE(IsReadOnlyLoop(F1, NonDerefLoads));
204+
ASSERT_TRUE(NonDerefLoads.empty());
205+
ASSERT_TRUE(IsReadOnlyLoop(F2, NonDerefLoads));
206+
ASSERT_TRUE((NonDerefLoads.size() == 1) &&
207+
(NonDerefLoads[0]->getName() == "ld1"));
203208
}

0 commit comments

Comments
 (0)