Skip to content

Commit b43a589

Browse files
committed
resolve review comments
1 parent 19d9607 commit b43a589

File tree

2 files changed

+35
-39
lines changed

2 files changed

+35
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4417,6 +4417,9 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44174417
KnownMinRemIter,
44184418
SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
44194419

4420+
auto SkipVF = [&](const SCEV *VF, const SCEV *RemIter) -> bool {
4421+
return SE.isKnownPredicate(CmpInst::ICMP_UGT, VF, RemIter);
4422+
};
44204423
for (auto &NextVF : ProfitableVFs) {
44214424
// Skip candidate VFs without a corresponding VPlan.
44224425
if (!hasPlanWithVF(NextVF.Width))
@@ -4435,27 +4438,20 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44354438
// If NextVF is greater than the number of remaining iterations, the
44364439
// epilogue loop would be dead. Skip such factors.
44374440
if (ScalableRemIter == NextVF.Width.isScalable()) {
4438-
if (SE.isKnownPredicate(CmpInst::ICMP_UGT,
4439-
SE.getElementCount(TCType, NextVF.Width),
4440-
RemainingIterations))
4441+
if (SkipVF(SE.getElementCount(TCType, NextVF.Width), RemainingIterations))
44414442
continue;
44424443
}
44434444
// Handle the case where NextVF and RemainingIterations are in different
44444445
// numerical spaces.
44454446
else if (NextVF.Width.isScalable()) {
44464447
ElementCount EstimatedRuntimeNextVF = ElementCount::getFixed(
44474448
estimateElementCount(NextVF.Width, CM.getVScaleForTuning()));
4448-
if (SE.isKnownPredicate(
4449-
CmpInst::ICMP_UGT,
4450-
SE.getElementCount(TCType, EstimatedRuntimeNextVF),
4451-
RemainingIterations))
4452-
continue;
4453-
} else {
4454-
if (SE.isKnownPredicate(CmpInst::ICMP_UGT,
4455-
SE.getElementCount(TCType, NextVF.Width),
4456-
EstimatedRemIter))
4449+
if (SkipVF(SE.getElementCount(TCType, EstimatedRuntimeNextVF),
4450+
RemainingIterations))
44574451
continue;
4458-
}
4452+
} else if (SkipVF(SE.getElementCount(TCType, NextVF.Width),
4453+
EstimatedRemIter))
4454+
continue;
44594455

44604456
if (Result.Width.isScalar() ||
44614457
isMoreProfitable(NextVF, Result, MaxTripCount, !CM.foldTailByMasking(),

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -479,25 +479,25 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
479479
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
480480
; CHECK: vector.body:
481481
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
482-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
483-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
484-
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
485-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]]
486-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
487-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
488-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
489-
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
490-
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2
491-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[TMP12]]
492-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4
493-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
494-
; CHECK-NEXT: [[TMP14:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
495-
; CHECK-NEXT: [[TMP15:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
496-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP14]], ptr [[TMP10]], align 4
497-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP15]], ptr [[TMP13]], align 4
482+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
483+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
484+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
485+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP6]]
486+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4
487+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
488+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
489+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
490+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
491+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP10]]
492+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
493+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
494+
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
495+
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
496+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
497+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
498498
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
499-
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
500-
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
499+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
500+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
501501
; CHECK: middle.block:
502502
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
503503
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -508,24 +508,24 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
508508
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
509509
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
510510
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
511-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
511+
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
512512
; CHECK: vec.epilog.vector.body:
513-
; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[FOR_BODY]] ]
514-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
515-
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP20]], align 4
513+
; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
514+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
515+
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP15]], align 4
516516
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
517517
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP16]], align 4
518518
; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
519519
; CHECK-NEXT: store <2 x float> [[TMP17]], ptr [[TMP16]], align 4
520520
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
521521
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
522-
; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
522+
; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
523523
; CHECK: vec.epilog.middle.block:
524524
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
525525
; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
526526
; CHECK: vec.epilog.scalar.ph:
527527
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
528-
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
528+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
529529
; CHECK: for.body:
530530
;
531531
; CHECK-VF8-LABEL: @trip_count_vscale(
@@ -620,8 +620,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
620620
; CHECK-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
621621
; CHECK-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
622622
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
623-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
624-
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
623+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
624+
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
625625
; CHECK: middle.block:
626626
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
627627
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]

0 commit comments

Comments
 (0)