@@ -387,7 +387,7 @@ exit:
387387 ret i64 %res
388388}
389389
390- ; TODO: The existing assumptions should be strong enough to vectorize this.
390+ ; The existing assumptions is strong enough to vectorize this.
391391define ptr @find_deref_pointer_distance_align_attribute_argument (ptr align 2 %first , ptr align 2 %last ) nofree nosync {
392392; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_attribute_argument(
393393; CHECK-SAME: ptr align 2 [[FIRST:%.*]], ptr align 2 [[LAST:%.*]]) #[[ATTR0]] {
@@ -401,18 +401,55 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi
401401; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
402402; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
403403; CHECK: [[LOOP_HEADER_PREHEADER]]:
404+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2
405+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]]
406+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1
407+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
408+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
409+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
410+ ; CHECK: [[VECTOR_PH]]:
411+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
412+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
413+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2
414+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]]
415+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
416+ ; CHECK: [[VECTOR_BODY]]:
417+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
418+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
419+ ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
420+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2
421+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1)
422+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
423+ ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
424+ ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
425+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
426+ ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
427+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
428+ ; CHECK: [[MIDDLE_SPLIT]]:
429+ ; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
430+ ; CHECK: [[MIDDLE_BLOCK]]:
431+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
432+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
433+ ; CHECK: [[VECTOR_EARLY_EXIT]]:
434+ ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
435+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
436+ ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
437+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
438+ ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]]
439+ ; CHECK: [[SCALAR_PH]]:
440+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ]
404441; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
405442; CHECK: [[LOOP_HEADER]]:
406- ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST ]], %[[LOOP_HEADER_PREHEADER ]] ]
443+ ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ]
407444; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2
408445; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1
409- ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT:.* ]], label %[[LOOP_LATCH]]
446+ ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]]
410447; CHECK: [[LOOP_LATCH]]:
411448; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2
412449; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]]
413- ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
450+ ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]
414451; CHECK: [[EXIT_LOOPEXIT]]:
415- ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ]
452+ ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ]
416453; CHECK-NEXT: br label %[[EXIT]]
417454; CHECK: [[EXIT]]:
418455; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], %[[EXIT_LOOPEXIT]] ]
@@ -444,7 +481,7 @@ exit:
444481 ret ptr %first.addr.0.lcssa.i
445482}
446483
447- ; TODO: The existing assumptions should be strong enough to vectorize this.
484+ ; The existing assumptions is strong enough to vectorize this.
448485define ptr @find_deref_pointer_distance_align_assumption (ptr %first , ptr %last ) nofree nosync {
449486; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_assumption(
450487; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]]) #[[ATTR0]] {
@@ -458,18 +495,55 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last)
458495; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
459496; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
460497; CHECK: [[LOOP_HEADER_PREHEADER]]:
498+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2
499+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]]
500+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1
501+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
502+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
503+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
504+ ; CHECK: [[VECTOR_PH]]:
505+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
506+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
507+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2
508+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]]
509+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
510+ ; CHECK: [[VECTOR_BODY]]:
511+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
512+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
513+ ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
514+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2
515+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1)
516+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
517+ ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
518+ ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
519+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
520+ ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
521+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
522+ ; CHECK: [[MIDDLE_SPLIT]]:
523+ ; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
524+ ; CHECK: [[MIDDLE_BLOCK]]:
525+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
526+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
527+ ; CHECK: [[VECTOR_EARLY_EXIT]]:
528+ ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
529+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
530+ ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
531+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
532+ ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]]
533+ ; CHECK: [[SCALAR_PH]]:
534+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ]
461535; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
462536; CHECK: [[LOOP_HEADER]]:
463- ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST ]], %[[LOOP_HEADER_PREHEADER ]] ]
537+ ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ]
464538; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2
465539; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1
466- ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT:.* ]], label %[[LOOP_LATCH]]
540+ ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]]
467541; CHECK: [[LOOP_LATCH]]:
468542; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2
469543; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]]
470- ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
544+ ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP10:![0-9]+]]
471545; CHECK: [[EXIT_LOOPEXIT]]:
472- ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ]
546+ ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ]
473547; CHECK-NEXT: br label %[[EXIT]]
474548; CHECK: [[EXIT]]:
475549; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], %[[EXIT_LOOPEXIT]] ]
@@ -522,7 +596,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
522596; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
523597; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
524598; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
525- ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
599+ ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
526600; CHECK: [[MIDDLE_SPLIT]]:
527601; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
528602; CHECK: [[MIDDLE_BLOCK]]:
0 commit comments