@@ -18,7 +18,7 @@ target triple = "aarch64-unknown-linux-gnu"
1818
1919; DEBUG-LABEL: LV: Checking a loop in 'trip_count_too_small'
2020; DEBUG: LV: Found a loop with a very small trip count. This loop is worth vectorizing only if no scalar iteration overheads are incurred.
21- ; DEBUG: LV: Not vectorizing: The trip count is below the minial threshold value. .
21+ ; DEBUG: LV: Not vectorizing: Runtime SCEV check is required with -Os/-Oz .
2222
2323; DEBUG-LABEL: LV: Checking a loop in 'too_many_runtime_checks'
2424; DEBUG: LV: Found trip count: 0
@@ -490,9 +490,103 @@ while.end:
490490 ret void
491491}
492492
493+ ; This has a trip-count of 4, and should vectorize with vf==4.
494+ define i32 @tc4 (ptr noundef readonly captures(none) %tmp ) vscale_range(1 ,16 ) {
495+ ; CHECK-LABEL: define i32 @tc4(
496+ ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]]) #[[ATTR1]] {
497+ ; CHECK-NEXT: [[ENTRY:.*]]:
498+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
499+ ; CHECK: [[VECTOR_PH]]:
500+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
501+ ; CHECK: [[VECTOR_BODY]]:
502+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
503+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
504+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDEX]]
505+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX1]], i32 0
506+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
507+ ; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
508+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
509+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
510+ ; CHECK: [[MIDDLE_BLOCK]]:
511+ ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
512+ ; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
513+ ; CHECK: [[SCALAR_PH]]:
514+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
515+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
516+ ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
517+ ; CHECK: [[FOR_COND_CLEANUP]]:
518+ ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
519+ ; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
520+ ; CHECK: [[FOR_BODY]]:
521+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
522+ ; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD]], %[[FOR_BODY]] ]
523+ ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDVARS_IV]]
524+ ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
525+ ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP5]]
526+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
527+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
528+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
529+ ;
530+ entry:
531+ br label %for.body
532+
533+ for.cond.cleanup: ; preds = %for.body
534+ %add.lcssa = phi i32 [ %add , %for.body ]
535+ ret i32 %add.lcssa
536+
537+ for.body: ; preds = %entry, %for.body
538+ %indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
539+ %sum.0179 = phi i32 [ 0 , %entry ], [ %add , %for.body ]
540+ %arrayidx1 = getelementptr inbounds nuw [4 x i32 ], ptr %tmp , i64 0 , i64 %indvars.iv
541+ %0 = load i32 , ptr %arrayidx1 , align 4
542+ %add = add i32 %sum.0179 , %0
543+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
544+ %exitcond.not = icmp eq i64 %indvars.iv.next , 4
545+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
546+ }
547+
548+ ; This has a trip-count of 4 from a profile.
549+ define i32 @tc4_from_profile (ptr noundef readonly captures(none) %tmp , i64 %N ) vscale_range(1 ,16 ) {
550+ ; CHECK-LABEL: define i32 @tc4_from_profile(
551+ ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
552+ ; CHECK-NEXT: [[ENTRY:.*]]:
553+ ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
554+ ; CHECK: [[FOR_COND_CLEANUP:.*]]:
555+ ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ]
556+ ; CHECK-NEXT: ret i32 [[TMP4]]
557+ ; CHECK: [[FOR_BODY]]:
558+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
559+ ; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD]], %[[FOR_BODY]] ]
560+ ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[INDVARS_IV]]
561+ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
562+ ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]]
563+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
564+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
565+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]]
566+ ;
567+ entry:
568+ br label %for.body
569+
570+ for.cond.cleanup: ; preds = %for.body
571+ %add.lcssa = phi i32 [ %add , %for.body ]
572+ ret i32 %add.lcssa
573+
574+ for.body: ; preds = %entry, %for.body
575+ %indvars.iv = phi i64 [ 0 , %entry ], [ %indvars.iv.next , %for.body ]
576+ %sum.0179 = phi i32 [ 0 , %entry ], [ %add , %for.body ]
577+ %arrayidx1 = getelementptr inbounds nuw [4 x i32 ], ptr %tmp , i64 0 , i64 %indvars.iv
578+ %0 = load i32 , ptr %arrayidx1 , align 4
579+ %add = add i32 %sum.0179 , %0
580+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
581+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
582+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body , !prof !2
583+ }
584+
493585
494586!0 = distinct !{!0 , !1 }
495587!1 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
588+ !2 = !{!"branch_weights" , i32 10 , i32 30 }
589+
496590;.
497591; CHECK-VS1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
498592; CHECK-VS1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -501,6 +595,9 @@ while.end:
501595; CHECK-VS1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
502596; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
503597; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
598+ ; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
599+ ; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
600+ ; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
504601;.
505602; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
506603; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -509,4 +606,7 @@ while.end:
509606; CHECK-VS2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
510607; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
511608; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
609+ ; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
610+ ; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
611+ ; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
512612;.
0 commit comments