1+ ; REQUIRES: asserts
2+
3+ ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
4+ ; RUN: -force-tail-folding-style=data-with-evl \
5+ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
6+ ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
7+
8+ define void @vp_smax (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
9+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
10+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
11+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
12+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
13+
14+ ; IF-EVL: vector.ph:
15+ ; IF-EVL-NEXT: Successor(s): vector loop
16+
17+ ; IF-EVL: <x1> vector loop: {
18+ ; IF-EVL-NEXT: vector.body:
19+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
20+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
21+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
22+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
23+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
24+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
25+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
26+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
27+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
28+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
29+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
30+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>)
31+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
32+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
33+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
34+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
35+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
36+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
37+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
38+ ; IF-EVL-NEXT: No successors
39+ ; IF-EVL-NEXT: }
40+
41+ entry:
42+ br label %for.body
43+
44+ for.body:
45+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
46+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
47+ %0 = load i32 , ptr %arrayidx , align 4
48+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
49+ %1 = load i32 , ptr %arrayidx3 , align 4
50+ %. = tail call i32 @llvm.smax.i32 (i32 %0 , i32 %1 )
51+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
52+ store i32 %. , ptr %arrayidx11 , align 4
53+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
54+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
55+ br i1 %exitcond.not , label %exit , label %for.body
56+
57+ exit:
58+ ret void
59+ }
60+
61+ define void @vp_smin (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
62+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
63+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
64+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
65+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
66+
67+ ; IF-EVL: vector.ph:
68+ ; IF-EVL-NEXT: Successor(s): vector loop
69+
70+ ; IF-EVL: <x1> vector loop: {
71+ ; IF-EVL-NEXT: vector.body:
72+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
73+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
74+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
75+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
76+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
77+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
78+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
79+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
80+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
81+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
82+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
83+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>)
84+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
85+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
86+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
87+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
88+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
89+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
90+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
91+ ; IF-EVL-NEXT: No successors
92+ ; IF-EVL-NEXT: }
93+
94+ entry:
95+ br label %for.body
96+
97+ for.body:
98+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
99+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
100+ %0 = load i32 , ptr %arrayidx , align 4
101+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
102+ %1 = load i32 , ptr %arrayidx3 , align 4
103+ %. = tail call i32 @llvm.smin.i32 (i32 %0 , i32 %1 )
104+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
105+ store i32 %. , ptr %arrayidx11 , align 4
106+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
107+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
108+ br i1 %exitcond.not , label %exit , label %for.body
109+
110+ exit:
111+ ret void
112+ }
113+
114+ define void @vp_umax (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
115+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
116+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
117+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
118+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
119+
120+ ; IF-EVL: vector.ph:
121+ ; IF-EVL-NEXT: Successor(s): vector loop
122+
123+ ; IF-EVL: <x1> vector loop: {
124+ ; IF-EVL-NEXT: vector.body:
125+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
126+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
127+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
128+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
129+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
130+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
131+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
132+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
133+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
134+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
135+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
136+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>)
137+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
138+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
139+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
140+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
141+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
142+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
143+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
144+ ; IF-EVL-NEXT: No successors
145+ ; IF-EVL-NEXT: }
146+
147+ entry:
148+ br label %for.body
149+
150+ for.body:
151+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
152+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
153+ %0 = load i32 , ptr %arrayidx , align 4
154+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
155+ %1 = load i32 , ptr %arrayidx3 , align 4
156+ %. = tail call i32 @llvm.umax.i32 (i32 %0 , i32 %1 )
157+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
158+ store i32 %. , ptr %arrayidx11 , align 4
159+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
160+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
161+ br i1 %exitcond.not , label %exit , label %for.body
162+
163+ exit:
164+ ret void
165+ }
166+
167+ define void @vp_umin (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
168+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
169+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
170+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
171+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
172+
173+ ; IF-EVL: vector.ph:
174+ ; IF-EVL-NEXT: Successor(s): vector loop
175+
176+ ; IF-EVL: <x1> vector loop: {
177+ ; IF-EVL-NEXT: vector.body:
178+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
179+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
180+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
181+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
182+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
183+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
184+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
185+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
186+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
187+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
188+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
189+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>)
190+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
191+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
192+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
193+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
194+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
195+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
196+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
197+ ; IF-EVL-NEXT: No successors
198+ ; IF-EVL-NEXT: }
199+
200+ entry:
201+ br label %for.body
202+
203+ for.body:
204+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
205+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
206+ %0 = load i32 , ptr %arrayidx , align 4
207+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
208+ %1 = load i32 , ptr %arrayidx3 , align 4
209+ %. = tail call i32 @llvm.umin.i32 (i32 %0 , i32 %1 )
210+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
211+ store i32 %. , ptr %arrayidx11 , align 4
212+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
213+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
214+ br i1 %exitcond.not , label %exit , label %for.body
215+
216+ exit:
217+ ret void
218+ }
219+
220+ declare i32 @llvm.smax.i32 (i32 , i32 )
221+ declare i32 @llvm.smin.i32 (i32 , i32 )
222+ declare i32 @llvm.umax.i32 (i32 , i32 )
223+ declare i32 @llvm.umin.i32 (i32 , i32 )
0 commit comments