@@ -192,6 +192,97 @@ exit:
192
192
ret void
193
193
}
194
194
195
+ define void @redundant_iv_trunc_for_cse (ptr noalias %src , ptr noalias %dst , i64 %n ) #0 {
196
+ ; CHECK-LABEL: define void @redundant_iv_trunc_for_cse(
197
+ ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
198
+ ; CHECK-NEXT: [[ENTRY:.*:]]
199
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
200
+ ; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
201
+ ; CHECK: [[VECTOR_PH]]:
202
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
203
+ ; CHECK-NEXT: [[TMP2:%.*]] = mul <vscale x 4 x i32> [[TMP1]], splat (i32 1)
204
+ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP2]]
205
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
206
+ ; CHECK: [[VECTOR_BODY]]:
207
+ ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
208
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
209
+ ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[VECTOR_BODY]] ]
210
+ ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
211
+ ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
212
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP3]], i64 0
213
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
214
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[EVL_BASED_IV]]
215
+ ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]])
216
+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer
217
+ ; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i32> [[VEC_IND1]], splat (i32 16)
218
+ ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[VEC_IND]]
219
+ ; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 4 x i32> [[PREDPHI]] to <vscale x 4 x i8>
220
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[EVL_BASED_IV]]
221
+ ; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]])
222
+ ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
223
+ ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
224
+ ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
225
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
226
+ ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <vscale x 4 x i32> [[VEC_IND1]], [[BROADCAST_SPLAT]]
227
+ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
228
+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
229
+ ; CHECK: [[MIDDLE_BLOCK]]:
230
+ ; CHECK-NEXT: br label %[[EXIT:.*]]
231
+ ; CHECK: [[SCALAR_PH:.*]]:
232
+ ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
233
+ ; CHECK: [[LOOP_HEADER]]:
234
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
235
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
236
+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
237
+ ; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[L]], 0
238
+ ; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32
239
+ ; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
240
+ ; CHECK: [[THEN]]:
241
+ ; CHECK-NEXT: [[TRUNC_IV_2:%.*]] = trunc i64 [[IV]] to i32
242
+ ; CHECK-NEXT: [[SHL_IV:%.*]] = shl i32 [[TRUNC_IV_2]], 16
243
+ ; CHECK-NEXT: br label %[[LOOP_LATCH]]
244
+ ; CHECK: [[LOOP_LATCH]]:
245
+ ; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[SHL_IV]], %[[THEN]] ], [ [[TRUNC_IV]], %[[LOOP_HEADER]] ]
246
+ ; CHECK-NEXT: [[TRUNC_P:%.*]] = trunc i32 [[P]] to i8
247
+ ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
248
+ ; CHECK-NEXT: store i8 [[TRUNC_P]], ptr [[GEP_DST]], align 1
249
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
250
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
251
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
252
+ ; CHECK: [[EXIT]]:
253
+ ; CHECK-NEXT: ret void
254
+ ;
255
+ entry:
256
+ br label %loop.header
257
+
258
+ loop.header:
259
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop.latch ]
260
+ %gep.src = getelementptr inbounds i32 , ptr %src , i64 %iv
261
+ %l = load i32 , ptr %gep.src
262
+ %c.0 = icmp eq i32 %l , 0
263
+ %trunc.iv = trunc i64 %iv to i32
264
+ br i1 %c.0 , label %then , label %loop.latch
265
+
266
+ then:
267
+ %trunc.iv.2 = trunc i64 %iv to i32
268
+ %shl.iv = shl i32 %trunc.iv.2 , 16
269
+ br label %loop.latch
270
+
271
+ loop.latch:
272
+ %p = phi i32 [ %shl.iv , %then ], [ %trunc.iv , %loop.header ]
273
+ %trunc.p = trunc i32 %p to i8
274
+ %gep.dst = getelementptr inbounds i8 , ptr %dst , i64 %iv
275
+ store i8 %trunc.p , ptr %gep.dst , align 1
276
+ %iv.next = add i64 %iv , 1
277
+ %ec = icmp eq i64 %iv , %n
278
+ br i1 %ec , label %exit , label %loop.header
279
+
280
+ exit:
281
+ ret void
282
+ }
283
+
284
+
285
+
195
286
attributes #0 = { "target-features" ="+64bit,+v,+zvl256b" }
196
287
attributes #1 = { "target-cpu" ="sifive-p670" }
197
288
;.
@@ -206,4 +297,5 @@ attributes #1 = { "target-cpu"="sifive-p670" }
206
297
; CHECK: [[META8]] = !{!"llvm.loop.unroll.runtime.disable"}
207
298
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]]}
208
299
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META8]]}
300
+ ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META8]]}
209
301
;.
0 commit comments