@@ -124,6 +124,72 @@ exit:
124124 ret i32 %add
125125}
126126
127+ ; Test that we also get VPExpressions when there is predication.
128+ define i32 @print_partial_reduction_predication (ptr %a , ptr %b , i64 %N ) "target-features" ="+sve" {
129+ ; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
130+ ; CHECK-NEXT: Live-in vp<%0> = VF
131+ ; CHECK-NEXT: Live-in vp<%1> = VF * UF
132+ ; CHECK-NEXT: Live-in ir<%N> = original trip-count
133+ ; CHECK-EMPTY:
134+ ; CHECK-NEXT: ir-bb<entry>:
135+ ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
136+ ; CHECK-EMPTY:
137+ ; CHECK-NEXT: vector.ph:
138+ ; CHECK-NEXT: EMIT vp<%4> = reduction-start-vector ir<0>, ir<0>, ir<4>
139+ ; CHECK-NEXT: EMIT vp<%5> = TC > VF ? TC - VF : 0 ir<%N>
140+ ; CHECK-NEXT: EMIT vp<%index.part.next> = VF * Part + ir<0>
141+ ; CHECK-NEXT: EMIT vp<%active.lane.mask.entry> = active lane mask vp<%index.part.next>, ir<%N>, ir<1>
142+ ; CHECK-NEXT: Successor(s): vector loop
143+ ; CHECK-EMPTY:
144+ ; CHECK-NEXT: <x1> vector loop: {
145+ ; CHECK-NEXT: vector.body:
146+ ; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
147+ ; CHECK-NEXT: ACTIVE-LANE-MASK-PHI vp<%7> = phi vp<%active.lane.mask.entry>, vp<%active.lane.mask.next>
148+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%4>, ir<%add> (VF scaled by 1/4)
149+ ; CHECK-NEXT: vp<%8> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
150+ ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%8>
151+ ; CHECK-NEXT: vp<%9> = vector-pointer ir<%gep.a>
152+ ; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%9>, vp<%7>
153+ ; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
154+ ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%8>
155+ ; CHECK-NEXT: vp<%10> = vector-pointer ir<%gep.b>
156+ ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%10>, vp<%7>
157+ ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
158+ ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
159+ ; CHECK-NEXT: EMIT vp<%11> = select vp<%7>, ir<%mul>, ir<0>
160+ ; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, vp<%11>, vp<%7>
161+ ; CHECK-NEXT: EMIT vp<%index.next> = add vp<%6>, vp<%1>
162+ ; CHECK-NEXT: EMIT vp<%12> = VF * Part + vp<%6>
163+ ; CHECK-NEXT: EMIT vp<%active.lane.mask.next> = active lane mask vp<%12>, vp<%5>, ir<1>
164+ ; CHECK-NEXT: EMIT vp<%13> = not vp<%active.lane.mask.next>
165+ ; CHECK-NEXT: EMIT branch-on-cond vp<%13>
166+ ; CHECK-NEXT: No successors
167+ ; CHECK-NEXT: }
168+ entry:
169+ br label %for.body
170+
171+ for.body: ; preds = %for.body, %entry
172+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
173+ %accum = phi i32 [ 0 , %entry ], [ %add , %for.body ]
174+ %gep.a = getelementptr i8 , ptr %a , i64 %iv
175+ %load.a = load i8 , ptr %gep.a , align 1
176+ %ext.a = zext i8 %load.a to i32
177+ %gep.b = getelementptr i8 , ptr %b , i64 %iv
178+ %load.b = load i8 , ptr %gep.b , align 1
179+ %ext.b = zext i8 %load.b to i32
180+ %mul = mul i32 %ext.b , %ext.a
181+ %add = add i32 %mul , %accum
182+ %iv.next = add i64 %iv , 1
183+ %exitcond.not = icmp eq i64 %iv.next , %N
184+ br i1 %exitcond.not , label %exit , label %for.body , !llvm.loop !1
185+
186+ exit:
187+ ret i32 %add
188+ }
189+
190+
127191!0 = distinct !{!0 , !2 , !3 }
192+ !1 = distinct !{!1 , !2 , !4 }
128193!2 = !{!"llvm.loop.interleave.count" , i32 1 }
129194!3 = !{!"llvm.loop.vectorize.predicate.enable" , i1 false }
195+ !4 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
0 commit comments