@@ -300,6 +300,115 @@ loopexit:
300
300
ret void
301
301
}
302
302
303
+ define void @different_load_store_pairs (ptr %src.1 , ptr %src.2 , ptr %dst.1 , ptr %dst.2 , i64 %n ) {
304
+ ; CHECK-LABEL: @different_load_store_pairs(
305
+ ; CHECK-NEXT: entry:
306
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX19:%.*]], 4
307
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
308
+ ; CHECK: vector.memcheck:
309
+ ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[UMAX19]], 2
310
+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1:%.*]], i64 [[TMP0]]
311
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[UMAX19]], 3
312
+ ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST_2:%.*]], i64 [[TMP1]]
313
+ ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 [[TMP0]]
314
+ ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 [[TMP1]]
315
+ ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP1]]
316
+ ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP]]
317
+ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
318
+ ; CHECK-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP2]]
319
+ ; CHECK-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SRC_1]], [[SCEVGEP]]
320
+ ; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
321
+ ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
322
+ ; CHECK-NEXT: [[BOUND07:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP3]]
323
+ ; CHECK-NEXT: [[BOUND18:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP]]
324
+ ; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = and i1 [[BOUND07]], [[BOUND18]]
325
+ ; CHECK-NEXT: [[CONFLICT_RDX10:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT9]]
326
+ ; CHECK-NEXT: [[BOUND011:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP2]]
327
+ ; CHECK-NEXT: [[BOUND112:%.*]] = icmp ult ptr [[SRC_1]], [[SCEVGEP1]]
328
+ ; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]]
329
+ ; CHECK-NEXT: [[CONFLICT_RDX14:%.*]] = or i1 [[CONFLICT_RDX10]], [[FOUND_CONFLICT13]]
330
+ ; CHECK-NEXT: [[BOUND015:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP3]]
331
+ ; CHECK-NEXT: [[BOUND116:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP1]]
332
+ ; CHECK-NEXT: [[FOUND_CONFLICT17:%.*]] = and i1 [[BOUND015]], [[BOUND116]]
333
+ ; CHECK-NEXT: [[CONFLICT_RDX18:%.*]] = or i1 [[CONFLICT_RDX14]], [[FOUND_CONFLICT17]]
334
+ ; CHECK-NEXT: br i1 [[CONFLICT_RDX18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
335
+ ; CHECK: vector.ph:
336
+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[UMAX19]], -4
337
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
338
+ ; CHECK: vector.body:
339
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
340
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[INDEX]]
341
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22:![0-9]+]]
342
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[INDEX]]
343
+ ; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8, !alias.scope [[META25:![0-9]+]]
344
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nusw i32, ptr [[DST_1]], i64 [[INDEX]]
345
+ ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP4]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]]
346
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw i64, ptr [[DST_2]], i64 [[INDEX]]
347
+ ; CHECK-NEXT: store <4 x i64> [[WIDE_LOAD20]], ptr [[TMP5]], align 8, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]]
348
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
349
+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
350
+ ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
351
+ ; CHECK: middle.block:
352
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX19]], [[N_VEC]]
353
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
354
+ ; CHECK: scalar.ph:
355
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
356
+ ; CHECK-NEXT: br label [[LOOP:%.*]]
357
+ ; CHECK: loop:
358
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
359
+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[IV]]
360
+ ; CHECK-NEXT: [[LD_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
361
+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[IV]]
362
+ ; CHECK-NEXT: [[LD_SRC_2:%.*]] = load i64, ptr [[GEP_SRC_2]], align 8
363
+ ; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr nusw i32, ptr [[DST_1]], i64 [[IV]]
364
+ ; CHECK-NEXT: store i32 [[LD_SRC_1]], ptr [[GEP_DST_1]], align 4
365
+ ; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr nusw i64, ptr [[DST_2]], i64 [[IV]]
366
+ ; CHECK-NEXT: store i64 [[LD_SRC_2]], ptr [[GEP_DST_2]], align 8
367
+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
368
+ ; CHECK-NEXT: [[COND:%.*]] = icmp ult i64 [[IV_NEXT]], [[UMAX19]]
369
+ ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP34:![0-9]+]]
370
+ ; CHECK: exit:
371
+ ; CHECK-NEXT: ret void
372
+ ;
373
+ ; FORCED_OPTSIZE-LABEL: @different_load_store_pairs(
374
+ ; FORCED_OPTSIZE-NEXT: entry:
375
+ ; FORCED_OPTSIZE-NEXT: br label [[LOOP:%.*]]
376
+ ; FORCED_OPTSIZE: loop:
377
+ ; FORCED_OPTSIZE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
378
+ ; FORCED_OPTSIZE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC_1:%.*]], i64 [[IV]]
379
+ ; FORCED_OPTSIZE-NEXT: [[LD_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
380
+ ; FORCED_OPTSIZE-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i64, ptr [[SRC_2:%.*]], i64 [[IV]]
381
+ ; FORCED_OPTSIZE-NEXT: [[LD_SRC_2:%.*]] = load i64, ptr [[GEP_SRC_2]], align 8
382
+ ; FORCED_OPTSIZE-NEXT: [[GEP_DST_1:%.*]] = getelementptr nusw i32, ptr [[DST_1:%.*]], i64 [[IV]]
383
+ ; FORCED_OPTSIZE-NEXT: store i32 [[LD_SRC_1]], ptr [[GEP_DST_1]], align 4
384
+ ; FORCED_OPTSIZE-NEXT: [[GEP_DST_2:%.*]] = getelementptr nusw i64, ptr [[DST_2:%.*]], i64 [[IV]]
385
+ ; FORCED_OPTSIZE-NEXT: store i64 [[LD_SRC_2]], ptr [[GEP_DST_2]], align 8
386
+ ; FORCED_OPTSIZE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
387
+ ; FORCED_OPTSIZE-NEXT: [[COND:%.*]] = icmp ult i64 [[IV_NEXT]], [[N:%.*]]
388
+ ; FORCED_OPTSIZE-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
389
+ ; FORCED_OPTSIZE: exit:
390
+ ; FORCED_OPTSIZE-NEXT: ret void
391
+ ;
392
+ entry:
393
+ br label %loop
394
+
395
+ loop:
396
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
397
+ %gep.src.1 = getelementptr i32 , ptr %src.1 , i64 %iv
398
+ %ld.src.1 = load i32 , ptr %gep.src.1
399
+ %gep.src.2 = getelementptr i64 , ptr %src.2 , i64 %iv
400
+ %ld.src.2 = load i64 , ptr %gep.src.2
401
+ %gep.dst.1 = getelementptr nusw i32 , ptr %dst.1 , i64 %iv
402
+ store i32 %ld.src.1 , ptr %gep.dst.1
403
+ %gep.dst.2 = getelementptr nusw i64 , ptr %dst.2 , i64 %iv
404
+ store i64 %ld.src.2 , ptr %gep.dst.2
405
+ %iv.next = add nuw nsw i64 %iv , 1
406
+ %cond = icmp ult i64 %iv.next , %n
407
+ br i1 %cond , label %loop , label %exit
408
+
409
+ exit:
410
+ ret void
411
+ }
303
412
304
413
define dso_local void @forced_optsize (ptr noalias nocapture readonly %x_p , ptr noalias nocapture readonly %y_p , ptr noalias nocapture %z_p ) minsize optsize {
305
414
; CHECK-LABEL: @forced_optsize(
@@ -318,15 +427,15 @@ define dso_local void @forced_optsize(ptr noalias nocapture readonly %x_p, ptr n
318
427
; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 8
319
428
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
320
429
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
321
- ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22 :![0-9]+]]
430
+ ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35 :![0-9]+]]
322
431
; CHECK: middle.block:
323
432
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
324
433
; CHECK: scalar.ph:
325
434
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
326
435
; CHECK: for.cond.cleanup:
327
436
; CHECK-NEXT: ret void
328
437
; CHECK: for.body:
329
- ; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]]
438
+ ; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP36 :![0-9]+]]
330
439
;
331
440
; FORCED_OPTSIZE-LABEL: @forced_optsize(
332
441
; FORCED_OPTSIZE-NEXT: entry:
0 commit comments