@@ -192,6 +192,97 @@ exit:
192192 ret void
193193}
194194
195+ define void @redundant_iv_trunc_for_cse (ptr noalias %src , ptr noalias %dst , i64 %n ) #0 {
196+ ; CHECK-LABEL: define void @redundant_iv_trunc_for_cse(
197+ ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
198+ ; CHECK-NEXT: [[ENTRY:.*:]]
199+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
200+ ; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
201+ ; CHECK: [[VECTOR_PH]]:
202+ ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
203+ ; CHECK-NEXT: [[TMP2:%.*]] = mul <vscale x 4 x i32> [[TMP1]], splat (i32 1)
204+ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP2]]
205+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
206+ ; CHECK: [[VECTOR_BODY]]:
207+ ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
208+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
209+ ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[VECTOR_BODY]] ]
210+ ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
211+ ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
212+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP3]], i64 0
213+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
214+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[EVL_BASED_IV]]
215+ ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]])
216+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer
217+ ; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i32> [[VEC_IND1]], splat (i32 16)
218+ ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[VEC_IND]]
219+ ; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 4 x i32> [[PREDPHI]] to <vscale x 4 x i8>
220+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[EVL_BASED_IV]]
221+ ; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]])
222+ ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
223+ ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
224+ ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
225+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
226+ ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <vscale x 4 x i32> [[VEC_IND1]], [[BROADCAST_SPLAT]]
227+ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
228+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
229+ ; CHECK: [[MIDDLE_BLOCK]]:
230+ ; CHECK-NEXT: br label %[[EXIT:.*]]
231+ ; CHECK: [[SCALAR_PH:.*]]:
232+ ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
233+ ; CHECK: [[LOOP_HEADER]]:
234+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
235+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
236+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
237+ ; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[L]], 0
238+ ; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32
239+ ; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
240+ ; CHECK: [[THEN]]:
241+ ; CHECK-NEXT: [[TRUNC_IV_2:%.*]] = trunc i64 [[IV]] to i32
242+ ; CHECK-NEXT: [[SHL_IV:%.*]] = shl i32 [[TRUNC_IV_2]], 16
243+ ; CHECK-NEXT: br label %[[LOOP_LATCH]]
244+ ; CHECK: [[LOOP_LATCH]]:
245+ ; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[SHL_IV]], %[[THEN]] ], [ [[TRUNC_IV]], %[[LOOP_HEADER]] ]
246+ ; CHECK-NEXT: [[TRUNC_P:%.*]] = trunc i32 [[P]] to i8
247+ ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
248+ ; CHECK-NEXT: store i8 [[TRUNC_P]], ptr [[GEP_DST]], align 1
249+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
250+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
251+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
252+ ; CHECK: [[EXIT]]:
253+ ; CHECK-NEXT: ret void
254+ ;
255+ entry:
256+ br label %loop.header
257+
258+ loop.header:
259+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop.latch ]
260+ %gep.src = getelementptr inbounds i32 , ptr %src , i64 %iv
261+ %l = load i32 , ptr %gep.src
262+ %c.0 = icmp eq i32 %l , 0
263+ %trunc.iv = trunc i64 %iv to i32
264+ br i1 %c.0 , label %then , label %loop.latch
265+
266+ then:
267+ %trunc.iv.2 = trunc i64 %iv to i32
268+ %shl.iv = shl i32 %trunc.iv.2 , 16
269+ br label %loop.latch
270+
271+ loop.latch:
272+ %p = phi i32 [ %shl.iv , %then ], [ %trunc.iv , %loop.header ]
273+ %trunc.p = trunc i32 %p to i8
274+ %gep.dst = getelementptr inbounds i8 , ptr %dst , i64 %iv
275+ store i8 %trunc.p , ptr %gep.dst , align 1
276+ %iv.next = add i64 %iv , 1
277+ %ec = icmp eq i64 %iv , %n
278+ br i1 %ec , label %exit , label %loop.header
279+
280+ exit:
281+ ret void
282+ }
283+
284+
285+
195286attributes #0 = { "target-features" ="+64bit,+v,+zvl256b" }
196287attributes #1 = { "target-cpu" ="sifive-p670" }
197288;.
@@ -206,4 +297,5 @@ attributes #1 = { "target-cpu"="sifive-p670" }
206297; CHECK: [[META8]] = !{!"llvm.loop.unroll.runtime.disable"}
207298; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]]}
208299; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META8]]}
300+ ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META8]]}
209301;.
0 commit comments