Skip to content

Commit 9a20153

Browse files
committed
[LV] Bail out early if runtime checks are known to fail.
There are a number of cases for which SCEV may not be able to prove a predicate will always be true/false, which may be simplified to a constant during expansion (see discussion in #131538). Bail out early if runtime checks are known to always fail, as the vector loop generated later will never execute.
1 parent 1ea085b commit 9a20153

File tree

3 files changed

+26
-88
lines changed

3 files changed

+26
-88
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10095,9 +10095,20 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1009510095
unsigned SelectedIC = std::max(IC, UserIC);
1009610096
// Optimistically generate runtime checks if they are needed. Drop them if
1009710097
// they turn out to not be profitable.
10098-
if (VF.Width.isVector() || SelectedIC > 1)
10098+
if (VF.Width.isVector() || SelectedIC > 1) {
1009910099
Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
1010010100

10101+
// Bail out early if either the SCEV or memory runtime checks are known to
10102+
// fail. In that case, the vector loop would never execute.
10103+
using namespace llvm::PatternMatch;
10104+
if (Checks.getSCEVChecks().first &&
10105+
match(Checks.getSCEVChecks().first, m_One()))
10106+
return false;
10107+
if (Checks.getMemRuntimeChecks().first &&
10108+
match(Checks.getMemRuntimeChecks().first, m_One()))
10109+
return false;
10110+
}
10111+
1010110112
// Check if it is profitable to vectorize with runtime checks.
1010210113
bool ForceVectorization =
1010310114
Hints.getForce() == LoopVectorizeHints::FK_Enabled;

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 4 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -374,66 +374,17 @@ exit:
374374

375375
define void @test_widen_extended_induction(ptr %dst) {
376376
; CHECK-LABEL: @test_widen_extended_induction(
377-
; CHECK-NEXT: iter.check:
378-
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
379-
; CHECK: vector.scevcheck:
380-
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
381-
; CHECK: vector.main.loop.iter.check:
382-
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
383-
; CHECK: vector.ph:
384-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
385-
; CHECK: vector.body:
386-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
387-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
388-
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
389-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
390-
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[OFFSET_IDX]] to i64
391-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST:%.*]], i64 0, i64 [[TMP1]]
392-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
393-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 2
394-
; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP3]], align 1
395-
; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP4]], align 1
396-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
397-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2)
398-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000
399-
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
400-
; CHECK: middle.block:
401-
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
402-
; CHECK: vec.epilog.iter.check:
403-
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
404-
; CHECK: vec.epilog.ph:
405-
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
406-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
407-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
408-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
409-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
410-
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
411-
; CHECK: vec.epilog.vector.body:
412-
; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
413-
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
414-
; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = trunc i32 [[INDEX2]] to i8
415-
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[OFFSET_IDX5]] to i64
416-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST]], i64 0, i64 [[TMP7]]
417-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
418-
; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP9]], align 1
419-
; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX2]], 2
420-
; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2)
421-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 10000
422-
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
423-
; CHECK: vec.epilog.middle.block:
424-
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
425-
; CHECK: vec.epilog.scalar.ph:
426-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
377+
; CHECK-NEXT: entry:
427378
; CHECK-NEXT: br label [[LOOP:%.*]]
428379
; CHECK: loop:
429-
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
380+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
430381
; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64
431-
; CHECK-NEXT: [[ARRAYIDX1449:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST]], i64 0, i64 [[IV_EXT]]
382+
; CHECK-NEXT: [[ARRAYIDX1449:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST:%.*]], i64 0, i64 [[IV_EXT]]
432383
; CHECK-NEXT: store i8 [[IV]], ptr [[ARRAYIDX1449]], align 1
433384
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
434385
; CHECK-NEXT: [[IV_NEXT_EXT:%.*]] = zext i8 [[IV_NEXT]] to i32
435386
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT_EXT]], 10000
436-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], {{!llvm.loop ![0-9]+}}
387+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
437388
; CHECK: exit:
438389
; CHECK-NEXT: ret void
439390
;

llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,18 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
66
; CHECK-LABEL: define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(
77
; CHECK-SAME: i8 [[FOR_START:%.*]], ptr [[DST:%.*]]) {
88
; CHECK-NEXT: [[ENTRY:.*]]:
9-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
10-
; CHECK: [[VECTOR_SCEVCHECK]]:
11-
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
12-
; CHECK: [[VECTOR_PH]]:
13-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[FOR_START]], i64 0
14-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
15-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLAT]], <4 x i8> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
16-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
17-
; CHECK: [[VECTOR_BODY]]:
18-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
19-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
20-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[OFFSET_IDX]]
21-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
22-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4
23-
; CHECK-NEXT: store <4 x i8> [[TMP0]], ptr [[TMP2]], align 1
24-
; CHECK-NEXT: store <4 x i8> [[TMP0]], ptr [[TMP3]], align 1
25-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
26-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], -8
27-
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
28-
; CHECK: [[MIDDLE_BLOCK]]:
29-
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
30-
; CHECK: [[SCALAR_PH]]:
31-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -7, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ], [ 1, %[[VECTOR_SCEVCHECK]] ]
32-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[FOR_START]], %[[MIDDLE_BLOCK]] ], [ [[FOR_START]], %[[ENTRY]] ], [ [[FOR_START]], %[[VECTOR_SCEVCHECK]] ]
339
; CHECK-NEXT: br label %[[LOOP:.*]]
3410
; CHECK: [[LOOP]]:
35-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
36-
; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
11+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
12+
; CHECK-NEXT: [[FOR:%.*]] = phi i8 [ [[FOR_START]], %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
3713
; CHECK-NEXT: [[FOR_NEXT]] = and i8 [[FOR_START]], -1
3814
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
3915
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
4016
; CHECK-NEXT: store i8 [[FOR]], ptr [[GEP_DST]], align 1
4117
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
42-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
18+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
4319
; CHECK: [[EXIT]]:
44-
; CHECK-NEXT: [[FOR_NEXT_LCSSA:%.*]] = phi i8 [ [[FOR_NEXT]], %[[LOOP]] ], [ [[FOR_START]], %[[MIDDLE_BLOCK]] ]
20+
; CHECK-NEXT: [[FOR_NEXT_LCSSA:%.*]] = phi i8 [ [[FOR_NEXT]], %[[LOOP]] ]
4521
; CHECK-NEXT: ret i8 [[FOR_NEXT_LCSSA]]
4622
;
4723
entry:
@@ -86,7 +62,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
8662
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
8763
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
8864
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
89-
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
65+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
9066
; CHECK: [[MIDDLE_BLOCK]]:
9167
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
9268
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
@@ -108,7 +84,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
10884
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
10985
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
11086
; CHECK-NEXT: store i32 0, ptr [[A_GEP]], align 4
111-
; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
87+
; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
11288
; CHECK: [[FOR_END]]:
11389
; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
11490
; CHECK-NEXT: ret i32 [[FOR_LCSSA]]
@@ -169,7 +145,7 @@ define void @sink_dead_inst(ptr %a) {
169145
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
170146
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
171147
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
172-
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
148+
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
173149
; CHECK: [[MIDDLE_BLOCK]]:
174150
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
175151
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT1:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
@@ -190,7 +166,7 @@ define void @sink_dead_inst(ptr %a) {
190166
; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
191167
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
192168
; CHECK-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2
193-
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
169+
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
194170
; CHECK: [[FOR_END]]:
195171
; CHECK-NEXT: ret void
196172
;
@@ -232,7 +208,7 @@ define void @unused_recurrence(ptr %a) {
232208
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
233209
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
234210
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
235-
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
211+
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
236212
; CHECK: [[MIDDLE_BLOCK]]:
237213
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
238214
; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
@@ -247,7 +223,7 @@ define void @unused_recurrence(ptr %a) {
247223
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
248224
; CHECK-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
249225
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
250-
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
226+
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
251227
; CHECK: [[FOR_END]]:
252228
; CHECK-NEXT: ret void
253229
;

0 commit comments

Comments
 (0)