Skip to content

Commit 4947e0f

Browse files
committed
Restrict FindLastIV idiom to single-use reduction phi.
As the following case, the complex semantics of FindLastIV vectorization are not yet supported: ``` for.header: %indvars = phi 0, %indvars.next %rdx.phi = phi 0, %rdx.phi.next br %pred, label %bb0, label %bb1 bb0: ... %select.bb0 = select %cmp0, %rdx.phi, %indvars br label %for.inc bb1: ... %select.bb1 = select %cmp1, %rdx.phi, %indvars br label %for.inc for.inc: ... %rdx.phi.next = phi %select.bb0, %select.bb1 %indvars.next = add nuw nsw i64 %indvars, 1 br %exitcond, label %for.end, label %for.body for.end: ; external use of %rdx.phi.next ``` This patch bails out unsupported idiom during the reduction identification phase, to prevent internal compiler error.
1 parent a852eb6 commit 4947e0f

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,13 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
690690
RecurrenceDescriptor::InstDesc
691691
RecurrenceDescriptor::isFindLastIVPattern(PHINode *OrigPhi, Instruction *I,
692692
ScalarEvolution &SE) {
693+
// TODO: Support the vectorization of FindLastIV when the reduction phi is
694+
// used by more than one select instruction. This vectorization is only
695+
// performed when the SCEV of each increasing induction variable used by the
696+
// select instructions is identical.
697+
if (!OrigPhi->hasOneUse())
698+
return InstDesc(false, I);
699+
693700
// TODO: Match selects with multi-use cmp conditions.
694701
CmpInst::Predicate Pred;
695702
Value *TrueVal, *FalseVal;
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 --check-prefix=CHECK
3+
4+
define i32 @select_icmp_switch(i32 %n, i32 %case, ptr %a, ptr %b) {
5+
; CHECK-VF4IC1-LABEL: define i32 @select_icmp_switch(
6+
; CHECK-VF4IC1-SAME: i32 [[N:%.*]], i32 [[CASE:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) {
7+
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
8+
; CHECK-VF4IC1-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
9+
; CHECK-VF4IC1-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
10+
; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]:
11+
; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
12+
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
13+
; CHECK-VF4IC1: [[FOR_BODY]]:
14+
; CHECK-VF4IC1-NEXT: [[INDVARS:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_INC:.*]] ]
15+
; CHECK-VF4IC1-NEXT: [[RDX_PHI:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[RDX_PHI_NEXT:%.*]], %[[FOR_INC]] ]
16+
; CHECK-VF4IC1-NEXT: switch i32 [[CASE]], label %[[SW_BB0:.*]] [
17+
; CHECK-VF4IC1-NEXT: i32 0, label %[[SW_BB0]]
18+
; CHECK-VF4IC1-NEXT: i32 1, label %[[SW_BB1:.*]]
19+
; CHECK-VF4IC1-NEXT: ]
20+
; CHECK-VF4IC1: [[SW_BB0]]:
21+
; CHECK-VF4IC1-NEXT: [[A_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS]]
22+
; CHECK-VF4IC1-NEXT: [[A_VALUE:%.*]] = load i8, ptr [[A_ARRAYIDX]], align 1
23+
; CHECK-VF4IC1-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A_VALUE]], -1
24+
; CHECK-VF4IC1-NEXT: [[TRUNC_BB0:%.*]] = trunc i64 [[INDVARS]] to i32
25+
; CHECK-VF4IC1-NEXT: [[SELECT_BB0:%.*]] = select i1 [[CMP_A]], i32 [[RDX_PHI]], i32 [[TRUNC_BB0]]
26+
; CHECK-VF4IC1-NEXT: br label %[[FOR_INC]]
27+
; CHECK-VF4IC1: [[SW_BB1]]:
28+
; CHECK-VF4IC1-NEXT: [[B_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDVARS]]
29+
; CHECK-VF4IC1-NEXT: [[B_VALUE:%.*]] = load i8, ptr [[B_ARRAYIDX]], align 1
30+
; CHECK-VF4IC1-NEXT: [[CMP_B:%.*]] = icmp eq i8 [[B_VALUE]], -1
31+
; CHECK-VF4IC1-NEXT: [[TRUNC_BB1:%.*]] = trunc i64 [[INDVARS]] to i32
32+
; CHECK-VF4IC1-NEXT: [[SELECT_BB1:%.*]] = select i1 [[CMP_B]], i32 [[RDX_PHI]], i32 [[TRUNC_BB1]]
33+
; CHECK-VF4IC1-NEXT: br label %[[FOR_INC]]
34+
; CHECK-VF4IC1: [[FOR_INC]]:
35+
; CHECK-VF4IC1-NEXT: [[RDX_PHI_NEXT]] = phi i32 [ [[SELECT_BB0]], %[[SW_BB0]] ], [ [[SELECT_BB1]], %[[SW_BB1]] ]
36+
; CHECK-VF4IC1-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1
37+
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[WIDE_TRIP_COUNT]]
38+
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
39+
; CHECK-VF4IC1: [[FOR_END_LOOPEXIT]]:
40+
; CHECK-VF4IC1-NEXT: [[RDX_PHI_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_PHI_NEXT]], %[[FOR_INC]] ]
41+
; CHECK-VF4IC1-NEXT: br label %[[FOR_END]]
42+
; CHECK-VF4IC1: [[FOR_END]]:
43+
; CHECK-VF4IC1-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_PHI_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
44+
; CHECK-VF4IC1-NEXT: ret i32 [[SELECT_LCSSA]]
45+
;
46+
entry:
47+
%cmp.sgt = icmp sgt i32 %n, 0
48+
br i1 %cmp.sgt, label %for.body.preheader, label %for.end
49+
50+
for.body.preheader:
51+
%wide.trip.count = zext i32 %n to i64
52+
br label %for.body
53+
54+
for.body:
55+
%indvars = phi i64 [ 0, %for.body.preheader ], [ %indvars.next, %for.inc ]
56+
%rdx.phi = phi i32 [ 0, %for.body.preheader ], [ %rdx.phi.next, %for.inc ]
57+
switch i32 %case, label %sw.bb0 [
58+
i32 0, label %sw.bb0
59+
i32 1, label %sw.bb1
60+
]
61+
62+
sw.bb0:
63+
%a.arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars
64+
%a.value = load i8, ptr %a.arrayidx, align 1
65+
%cmp.a = icmp eq i8 %a.value, -1
66+
%trunc.bb0 = trunc i64 %indvars to i32
67+
%select.bb0 = select i1 %cmp.a, i32 %rdx.phi, i32 %trunc.bb0
68+
br label %for.inc
69+
70+
sw.bb1:
71+
%b.arrayidx = getelementptr inbounds i8, ptr %b, i64 %indvars
72+
%b.value = load i8, ptr %b.arrayidx, align 1
73+
%cmp.b = icmp eq i8 %b.value, -1
74+
%trunc.bb1 = trunc i64 %indvars to i32
75+
%select.bb1 = select i1 %cmp.b, i32 %rdx.phi, i32 %trunc.bb1
76+
br label %for.inc
77+
78+
for.inc:
79+
%rdx.phi.next = phi i32 [ %select.bb0, %sw.bb0 ], [ %select.bb1, %sw.bb1 ]
80+
%indvars.next = add nuw nsw i64 %indvars, 1
81+
%exitcond.not = icmp eq i64 %indvars.next, %wide.trip.count
82+
br i1 %exitcond.not, label %for.end, label %for.body
83+
84+
for.end:
85+
%select.lcssa = phi i32 [ %rdx.phi.next, %for.inc ], [ 0, %entry ]
86+
ret i32 %select.lcssa
87+
}
88+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
89+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)