@@ -135,28 +135,28 @@ for.inc: ; preds = %for.body, %if.then
135135; CHECK: Cost of 0 for VF 2: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
136136; CHECK: Cost of 0 for VF 2: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
137137; CHECK: Cost of 1 for VF 2: exit condition instruction %cmp.not = icmp eq i32 %dec, 0
138- ; CHECK: Cost of 0 for VF 2: EMIT vp<%2 > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
139- ; CHECK: Cost of 0 for VF 2: vp<%3 > = SCALAR-STEPS vp<%2 >, ir<1>
140- ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<%3 >
141- ; CHECK: Cost of 0 for VF 2: vp<%4 > = SCALAR-STEPS vp<%2 >, ir<1>
142- ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<%4 >
143- ; CHECK: Cost of 0 for VF 2: vp<%5 > = SCALAR-STEPS vp<%2 >, ir<1>
144- ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<%5 >
145- ; CHECK: Cost of 0 for VF 2: vp<%6 > = vector-pointer vp<%next.gep>
146- ; CHECK: Cost of 18 for VF 2: WIDEN ir<%0> = load vp<%6 >
138+ ; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]] > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
139+ ; CHECK: Cost of 0 for VF 2: vp<[[STEPS1:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
140+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]] >
141+ ; CHECK: Cost of 0 for VF 2: vp<[[STEPS2:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
142+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]] >
143+ ; CHECK: Cost of 0 for VF 2: vp<[[STEPS3:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
144+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]] >
145+ ; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR:%.+]] > = vector-pointer vp<%next.gep>
146+ ; CHECK: Cost of 18 for VF 2: WIDEN ir<%0> = load vp<[[VEC_PTR]] >
147147; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32
148- ; CHECK: Cost of 0 for VF 2: vp<%7 > = vector-pointer vp<%next.gep>.2
149- ; CHECK: Cost of 18 for VF 2: WIDEN ir<%1> = load vp<%7 >
148+ ; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR2:%.+]] > = vector-pointer vp<%next.gep>.2
149+ ; CHECK: Cost of 18 for VF 2: WIDEN ir<%1> = load vp<[[VEC_PTR2]] >
150150; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32
151151; CHECK: Cost of 26 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1>
152152; CHECK: Cost of 18 for VF 2: WIDEN ir<%shr> = ashr ir<%mul>, ir<7>
153153; CHECK: Cost of 0 for VF 2: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127>
154154; CHECK: Cost of 22 for VF 2: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127>
155155; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8
156- ; CHECK: Cost of 0 for VF 2: vp<%8 > = vector-pointer vp<%next.gep>.1
157- ; CHECK: Cost of 18 for VF 2: WIDEN store vp<%8 >, ir<%conv4>
158- ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%2 >, vp<%0>
159- ; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%1 >
156+ ; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR3:%.+]] > = vector-pointer vp<%next.gep>.1
157+ ; CHECK: Cost of 18 for VF 2: WIDEN store vp<[[VEC_PTR3]] >, ir<%conv4>
158+ ; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]] >, vp<%0>
159+ ; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<{{.+}} >
160160; CHECK: Cost for VF 2: 130 (Estimated cost per lane: 65.
161161; CHECK: Cost of 1 for VF 4: induction instruction %dec = add i32 %blkCnt.012, -1
162162; CHECK: Cost of 0 for VF 4: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
@@ -167,28 +167,28 @@ for.inc: ; preds = %for.body, %if.then
167167; CHECK: Cost of 0 for VF 4: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
168168; CHECK: Cost of 0 for VF 4: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
169169; CHECK: Cost of 1 for VF 4: exit condition instruction %cmp.not = icmp eq i32 %dec, 0
170- ; CHECK: Cost of 0 for VF 4: EMIT vp<%2 > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
171- ; CHECK: Cost of 0 for VF 4: vp<%3 > = SCALAR-STEPS vp<%2 >, ir<1>
172- ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<%3 >
173- ; CHECK: Cost of 0 for VF 4: vp<%4 > = SCALAR-STEPS vp<%2 >, ir<1>
174- ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<%4 >
175- ; CHECK: Cost of 0 for VF 4: vp<%5 > = SCALAR-STEPS vp<%2 >, ir<1>
176- ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<%5 >
177- ; CHECK: Cost of 0 for VF 4: vp<%6 > = vector-pointer vp<%next.gep>
178- ; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load vp<%6 >
170+ ; CHECK: Cost of 0 for VF 4: EMIT vp<[[CAN_IV:%.]] > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
171+ ; CHECK: Cost of 0 for VF 4: vp<[[STEPS1:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
172+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]] >
173+ ; CHECK: Cost of 0 for VF 4: vp<[[STEPS2:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
174+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]] >
175+ ; CHECK: Cost of 0 for VF 4: vp<[[STEPS3:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
176+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]] >
177+ ; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR1:%.+]] > = vector-pointer vp<%next.gep>
178+ ; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load vp<[[VEC_PTR1]] >
179179; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32
180- ; CHECK: Cost of 0 for VF 4: vp<%7 > = vector-pointer vp<%next.gep>.2
181- ; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load vp<%7 >
180+ ; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR2:%.+]] > = vector-pointer vp<%next.gep>.2
181+ ; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load vp<[[VEC_PTR2]] >
182182; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32
183183; CHECK: Cost of 2 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1>
184184; CHECK: Cost of 2 for VF 4: WIDEN ir<%shr> = ashr ir<%mul>, ir<7>
185185; CHECK: Cost of 0 for VF 4: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127>
186186; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127>
187187; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8
188- ; CHECK: Cost of 0 for VF 4: vp<%8 > = vector-pointer vp<%next.gep>.1
189- ; CHECK: Cost of 2 for VF 4: WIDEN store vp<%8 >, ir<%conv4>
190- ; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%2 >, vp<%0>
191- ; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%1 >
188+ ; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR2:%.+]] > = vector-pointer vp<%next.gep>.1
189+ ; CHECK: Cost of 2 for VF 4: WIDEN store vp<[[VEC_PTR2]] >, ir<%conv4>
190+ ; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]] >, vp<%0>
191+ ; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<{{.+}} >
192192; CHECK: Cost for VF 4: 14 (Estimated cost per lane: 3.
193193; CHECK: Cost of 1 for VF 8: induction instruction %dec = add i32 %blkCnt.012, -1
194194; CHECK: Cost of 0 for VF 8: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
@@ -199,28 +199,28 @@ for.inc: ; preds = %for.body, %if.then
199199; CHECK: Cost of 0 for VF 8: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
200200; CHECK: Cost of 0 for VF 8: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
201201; CHECK: Cost of 1 for VF 8: exit condition instruction %cmp.not = icmp eq i32 %dec, 0
202- ; CHECK: Cost of 0 for VF 8: EMIT vp<%2 > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
203- ; CHECK: Cost of 0 for VF 8: vp<%3 > = SCALAR-STEPS vp<%2 >, ir<1>
204- ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<%3 >
205- ; CHECK: Cost of 0 for VF 8: vp<%4 > = SCALAR-STEPS vp<%2 >, ir<1>
206- ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<%4 >
207- ; CHECK: Cost of 0 for VF 8: vp<%5 > = SCALAR-STEPS vp<%2 >, ir<1>
208- ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<%5 >
209- ; CHECK: Cost of 0 for VF 8: vp<%6 > = vector-pointer vp<%next.gep>
210- ; CHECK: Cost of 2 for VF 8: WIDEN ir<%0> = load vp<%6 >
202+ ; CHECK: Cost of 0 for VF 8: EMIT vp<[[CAN_IV:%.+]] > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
203+ ; CHECK: Cost of 0 for VF 8: vp<[[STEPS1:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
204+ ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]] >
205+ ; CHECK: Cost of 0 for VF 8: vp<[[STEPS2:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
206+ ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]] >
207+ ; CHECK: Cost of 0 for VF 8: vp<[[STEPS3:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
208+ ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]] >
209+ ; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR1:%.+]] > = vector-pointer vp<%next.gep>
210+ ; CHECK: Cost of 2 for VF 8: WIDEN ir<%0> = load vp<[[VEC_PTR1]] >
211211; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32
212- ; CHECK: Cost of 0 for VF 8: vp<%7 > = vector-pointer vp<%next.gep>.2
213- ; CHECK: Cost of 2 for VF 8: WIDEN ir<%1> = load vp<%7 >
212+ ; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR2:%.+]] > = vector-pointer vp<%next.gep>.2
213+ ; CHECK: Cost of 2 for VF 8: WIDEN ir<%1> = load vp<[[VEC_PTR2]] >
214214; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32
215215; CHECK: Cost of 4 for VF 8: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1>
216216; CHECK: Cost of 4 for VF 8: WIDEN ir<%shr> = ashr ir<%mul>, ir<7>
217217; CHECK: Cost of 0 for VF 8: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127>
218218; CHECK: Cost of 4 for VF 8: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127>
219219; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8
220- ; CHECK: Cost of 0 for VF 8: vp<%8 > = vector-pointer vp<%next.gep>.1
221- ; CHECK: Cost of 2 for VF 8: WIDEN store vp<%8 >, ir<%conv4>
222- ; CHECK: Cost of 0 for VF 8: EMIT vp<%index.next> = add nuw vp<%2 >, vp<%0>
223- ; CHECK: Cost of 0 for VF 8: EMIT branch-on-count vp<%index.next>, vp<%1 >
220+ ; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR3:%.+]] > = vector-pointer vp<%next.gep>.1
221+ ; CHECK: Cost of 2 for VF 8: WIDEN store vp<[[VEC_PTR3]] >, ir<%conv4>
222+ ; CHECK: Cost of 0 for VF 8: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]] >, vp<{{.+}}
223+ ; CHECK: Cost of 0 for VF 8: EMIT branch-on-count vp<%index.next>, vp<{{.+}} >
224224; CHECK: Cost for VF 8: 26 (Estimated cost per lane: 3.
225225; CHECK: Cost of 1 for VF 16: induction instruction %dec = add i32 %blkCnt.012, -1
226226; CHECK: Cost of 0 for VF 16: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
@@ -231,28 +231,28 @@ for.inc: ; preds = %for.body, %if.then
231231; CHECK: Cost of 0 for VF 16: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
232232; CHECK: Cost of 0 for VF 16: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
233233; CHECK: Cost of 1 for VF 16: exit condition instruction %cmp.not = icmp eq i32 %dec, 0
234- ; CHECK: Cost of 0 for VF 16: EMIT vp<%2 > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
235- ; CHECK: Cost of 0 for VF 16: vp<%3 > = SCALAR-STEPS vp<%2 >, ir<1>
236- ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<%3 >
237- ; CHECK: Cost of 0 for VF 16: vp<%4 > = SCALAR-STEPS vp<%2 >, ir<1>
238- ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<%4 >
239- ; CHECK: Cost of 0 for VF 16: vp<%5 > = SCALAR-STEPS vp<%2 >, ir<1>
240- ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<%5 >
241- ; CHECK: Cost of 0 for VF 16: vp<%6 > = vector-pointer vp<%next.gep>
242- ; CHECK: Cost of 2 for VF 16: WIDEN ir<%0> = load vp<%6 >
234+ ; CHECK: Cost of 0 for VF 16: EMIT vp<[[CAN_IV:%.+]] > = CANONICAL-INDUCTION ir<0>, vp<%index.next>
235+ ; CHECK: Cost of 0 for VF 16: vp<[[STEPS1:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
236+ ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]] >
237+ ; CHECK: Cost of 0 for VF 16: vp<[[STEPS2:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
238+ ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]] >
239+ ; CHECK: Cost of 0 for VF 16: vp<[[STEPS3:%.+]] > = SCALAR-STEPS vp<[[CAN_IV]] >, ir<1>
240+ ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]] >
241+ ; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR:%.+]] > = vector-pointer vp<%next.gep>
242+ ; CHECK: Cost of 2 for VF 16: WIDEN ir<%0> = load vp<[[VEC_PTR]] >
243243; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32
244- ; CHECK: Cost of 0 for VF 16: vp<%7 > = vector-pointer vp<%next.gep>.2
245- ; CHECK: Cost of 2 for VF 16: WIDEN ir<%1> = load vp<%7 >
244+ ; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR1:%.+]] > = vector-pointer vp<%next.gep>.2
245+ ; CHECK: Cost of 2 for VF 16: WIDEN ir<%1> = load vp<[[VEC_PTR1]] >
246246; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32
247247; CHECK: Cost of 8 for VF 16: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1>
248248; CHECK: Cost of 8 for VF 16: WIDEN ir<%shr> = ashr ir<%mul>, ir<7>
249249; CHECK: Cost of 0 for VF 16: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127>
250250; CHECK: Cost of 8 for VF 16: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127>
251251; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8
252- ; CHECK: Cost of 0 for VF 16: vp<%8 > = vector-pointer vp<%next.gep>.1
253- ; CHECK: Cost of 2 for VF 16: WIDEN store vp<%8 >, ir<%conv4>
254- ; CHECK: Cost of 0 for VF 16: EMIT vp<%index.next> = add nuw vp<%2 >, vp<%0 >
255- ; CHECK: Cost of 0 for VF 16: EMIT branch-on-count vp<%index.next>, vp<%1 >
252+ ; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR2:%.+]] > = vector-pointer vp<%next.gep>.1
253+ ; CHECK: Cost of 2 for VF 16: WIDEN store vp<[[VEC_PTR2]] >, ir<%conv4>
254+ ; CHECK: Cost of 0 for VF 16: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]] >, vp<{{.+}} >
255+ ; CHECK: Cost of 0 for VF 16: EMIT branch-on-count vp<%index.next>, vp<{{.+}} >
256256; CHECK: Cost for VF 16: 50
257257; CHECK: LV: Selecting VF: 16.
258258define void @cheap_icmp (ptr nocapture readonly %pSrcA , ptr nocapture readonly %pSrcB , ptr nocapture %pDst , i32 %blockSize ) #0 {
0 commit comments