Skip to content

Commit d4f3c25

Browse files
committed
Address review comments
1 parent fb3f73b commit d4f3c25

File tree

5 files changed

+89
-81
lines changed

5 files changed

+89
-81
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,15 @@ void reportVectorizationFailure(const StringRef DebugMsg,
170170
const StringRef OREMsg, const StringRef ORETag,
171171
OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
172172

173+
/// Same as above, but the debug message and optimization remark are identical
174+
inline void reportVectorizationFailure(const StringRef DebugMsg,
175+
const StringRef ORETag,
176+
OptimizationRemarkEmitter *ORE,
177+
Loop *TheLoop,
178+
Instruction *I = nullptr) {
179+
reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I);
180+
}
181+
173182
/// A marker analysis to determine if extra passes should be run after loop
174183
/// vectorization.
175184
struct ShouldRunExtraVectorPasses

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4138,8 +4138,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
41384138
// a bottom-test and a single exiting block. We'd have to handle the fact
41394139
// that not every instruction executes on the last iteration. This will
41404140
// require a lane mask which varies through the vector loop body. (TODO)
4141-
if (Legal->hasUncountableEarlyExit() ||
4142-
TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
4141+
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
41434142
// If there was a tail-folding hint/switch, but we can't fold the tail by
41444143
// masking, fallback to a vectorization with a scalar epilogue.
41454144
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
@@ -4770,8 +4769,7 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
47704769
// non-latch exits properly. It may be fine, but it needs auditted and
47714770
// tested.
47724771
// TODO: Add support for loops with an early exit.
4773-
if (Legal->hasUncountableEarlyExit() ||
4774-
OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch())
4772+
if (OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch())
47754773
return false;
47764774

47774775
return true;
@@ -5019,7 +5017,9 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
50195017
if (!Legal->isSafeForAnyVectorWidth())
50205018
return 1;
50215019

5022-
// We don't attempt to perform interleaving for early exit loops.
5020+
// We don't attempt to perform interleaving for loops with uncountable early
5021+
// exits because the VPInstruction::AnyOf code cannot currently handle
5022+
// multiple parts.
50235023
if (Legal->hasUncountableEarlyExit())
50245024
return 1;
50255025

@@ -7842,7 +7842,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78427842
fixReductionScalarResumeWhenVectorizingEpilog(
78437843
&R, State, State.CFG.VPBB2IRBB[ExitVPBB], BypassBlock);
78447844
}
7845-
78467845
BasicBlock *PH = OrigLoop->getLoopPreheader();
78477846
for (const auto &[IVPhi, _] : Legal->getInductionVars()) {
78487847
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
@@ -10205,24 +10204,23 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020510204
if (LVL.hasUncountableEarlyExit()) {
1020610205
if (!EnableEarlyExitVectorization) {
1020710206
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
10208-
"early exit is not enabled",
10209-
"Auto-vectorization of loops with uncountable "
1021010207
"early exit is not enabled",
1021110208
"UncountableEarlyExitLoopsDisabled", ORE, L);
1021210209
return false;
1021310210
}
1021410211

10215-
// Needed to prevent InnerLoopVectorizer::fixupIVUsers from crashing.
10212+
// In addUsersInExitBlocks we already bail out if there is an outside use
10213+
// of a loop-defined variable, but it ignores induction variables which are
10214+
// handled by InnerLoopVectorizer::fixupIVUsers. We need to bail out if we
10215+
// encounter induction variables too otherwise fixupIVUsers will crash.
1021610216
for (BasicBlock *BB : L->blocks()) {
1021710217
for (Instruction &I : *BB) {
1021810218
for (User *U : I.users()) {
1021910219
Instruction *UI = cast<Instruction>(U);
1022010220
if (!L->contains(UI)) {
1022110221
reportVectorizationFailure(
1022210222
"Auto-vectorization of loops with uncountable "
10223-
"early exit and live-outs is not yet supported",
10224-
"Auto-vectorization of loop with uncountable "
10225-
"early exit and live-outs is not yet supported",
10223+
"early exit and live-outs is not supported",
1022610224
"UncountableEarlyExitLoopLiveOutsUnsupported", ORE, L);
1022710225
return false;
1022810226
}
@@ -10256,11 +10254,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1025610254
if (LVL.hasUncountableEarlyExit()) {
1025710255
BasicBlock *LoopLatch = L->getLoopLatch();
1025810256
if (IAI.requiresScalarEpilogue() ||
10259-
llvm::any_of(LVL.getCountableExitingBlocks(),
10260-
[LoopLatch](BasicBlock *BB) { return BB != LoopLatch; })) {
10257+
any_of(LVL.getCountableExitingBlocks(),
10258+
[LoopLatch](BasicBlock *BB) { return BB != LoopLatch; })) {
1026110259
reportVectorizationFailure("Auto-vectorization of early exit loops "
10262-
"requiring a scalar epilogue is unsupported",
10263-
"Auto-vectorization of early exit loops "
1026410260
"requiring a scalar epilogue is unsupported",
1026510261
"UncountableEarlyExitUnsupported", ORE, L);
1026610262
return false;

llvm/test/Transforms/LoopVectorize/early_exit_legality.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() {
4949
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
5050
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
5151
; CHECK-NEXT: LV: We can vectorize this loop!
52-
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit and live-outs is not yet supported.
52+
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit and live-outs is not supported.
5353
entry:
5454
%p1 = alloca [1024 x i8]
5555
%p2 = alloca [1024 x i8]
@@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
141141
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
142142
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
143143
; CHECK-NEXT: LV: We can vectorize this loop!
144-
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit and live-outs is not yet supported.
144+
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit and live-outs is not supported.
145145
entry:
146146
%p1 = alloca [1024 x i8]
147147
call void @init_mem(ptr %p1, i64 1024)

llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll

Lines changed: 61 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -enable-early-exit-vectorization \
3-
; RUN: | FileCheck %s --check-prefix=MAY_FAULT
2+
; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
43

54
declare void @init_mem(ptr, i64);
65

76

87
define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
9-
; MAY_FAULT-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
10-
; MAY_FAULT-NEXT: entry:
11-
; MAY_FAULT-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1
12-
; MAY_FAULT-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1
13-
; MAY_FAULT-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
14-
; MAY_FAULT-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
15-
; MAY_FAULT-NEXT: br label [[LOOP:%.*]]
16-
; MAY_FAULT: loop:
17-
; MAY_FAULT-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
18-
; MAY_FAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
19-
; MAY_FAULT-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
20-
; MAY_FAULT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
21-
; MAY_FAULT-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
22-
; MAY_FAULT-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
23-
; MAY_FAULT-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
24-
; MAY_FAULT: loop.inc:
25-
; MAY_FAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
26-
; MAY_FAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
27-
; MAY_FAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
28-
; MAY_FAULT: loop.end:
29-
; MAY_FAULT-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
30-
; MAY_FAULT-NEXT: ret i64 [[RETVAL]]
8+
; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
9+
; CHECK-NEXT: entry:
10+
; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1
11+
; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1
12+
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
13+
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
14+
; CHECK-NEXT: br label [[LOOP:%.*]]
15+
; CHECK: loop:
16+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
17+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
18+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
19+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
20+
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
21+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
22+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
23+
; CHECK: loop.inc:
24+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
25+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
26+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
27+
; CHECK: loop.end:
28+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
29+
; CHECK-NEXT: ret i64 [[RETVAL]]
3130
;
3231
entry:
3332
%p1 = alloca [42 x i8]
@@ -57,25 +56,25 @@ loop.end:
5756

5857

5958
define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) {
60-
; MAY_FAULT-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
61-
; MAY_FAULT-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) {
62-
; MAY_FAULT-NEXT: entry:
63-
; MAY_FAULT-NEXT: br label [[LOOP:%.*]]
64-
; MAY_FAULT: loop:
65-
; MAY_FAULT-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
66-
; MAY_FAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
67-
; MAY_FAULT-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
68-
; MAY_FAULT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
69-
; MAY_FAULT-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
70-
; MAY_FAULT-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
71-
; MAY_FAULT-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
72-
; MAY_FAULT: loop.inc:
73-
; MAY_FAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
74-
; MAY_FAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
75-
; MAY_FAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
76-
; MAY_FAULT: loop.end:
77-
; MAY_FAULT-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
78-
; MAY_FAULT-NEXT: ret i64 [[RETVAL]]
59+
; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
60+
; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) {
61+
; CHECK-NEXT: entry:
62+
; CHECK-NEXT: br label [[LOOP:%.*]]
63+
; CHECK: loop:
64+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
65+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
66+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
67+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
68+
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
69+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
70+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
71+
; CHECK: loop.inc:
72+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
73+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
74+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
75+
; CHECK: loop.end:
76+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
77+
; CHECK-NEXT: ret i64 [[RETVAL]]
7978
;
8079
entry:
8180
br label %loop
@@ -101,25 +100,25 @@ loop.end:
101100

102101

103102
define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) {
104-
; MAY_FAULT-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
105-
; MAY_FAULT-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) {
106-
; MAY_FAULT-NEXT: entry:
107-
; MAY_FAULT-NEXT: br label [[LOOP:%.*]]
108-
; MAY_FAULT: loop:
109-
; MAY_FAULT-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
110-
; MAY_FAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
111-
; MAY_FAULT-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
112-
; MAY_FAULT-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
113-
; MAY_FAULT-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
114-
; MAY_FAULT-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
115-
; MAY_FAULT-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
116-
; MAY_FAULT: loop.inc:
117-
; MAY_FAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
118-
; MAY_FAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
119-
; MAY_FAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
120-
; MAY_FAULT: loop.end:
121-
; MAY_FAULT-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
122-
; MAY_FAULT-NEXT: ret i64 [[RETVAL]]
103+
; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
104+
; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) {
105+
; CHECK-NEXT: entry:
106+
; CHECK-NEXT: br label [[LOOP:%.*]]
107+
; CHECK: loop:
108+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
109+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
110+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
111+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
112+
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
113+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
114+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
115+
; CHECK: loop.inc:
116+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
117+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
118+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
119+
; CHECK: loop.end:
120+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
121+
; CHECK-NEXT: ret i64 [[RETVAL]]
123122
;
124123
entry:
125124
br label %loop

llvm/test/Transforms/LoopVectorize/single_early_exit_with_outer_loop.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
declare void @init_mem(ptr, i64);
44

5-
5+
; Tests that the additional middle.split created for handling loops with
6+
; uncountable early exits is correctly adding to the outer loop at depth 1.
67
define void @early_exit_in_outer_loop1() {
78
; CHECK-LABEL: Loop info for function 'early_exit_in_outer_loop1':
89
; CHECK: Loop at depth 1 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split
@@ -39,6 +40,9 @@ loop.inner.end:
3940
br label %loop.outer
4041
}
4142

43+
; Tests that the additional middle.split created for handling loops with
44+
; uncountable early exits is correctly adding to both the outer and middle
45+
; loops at depths 1 and 2, respectively.
4246
define void @early_exit_in_outer_loop2() {
4347
; CHECK-LABEL: Loop info for function 'early_exit_in_outer_loop2':
4448
; CHECK: Loop at depth 1 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split

0 commit comments

Comments
 (0)