Skip to content

Commit e51c6fb

Browse files
committed
fixup address latest comments, thanks!
1 parent 4265a86 commit e51c6fb

File tree

9 files changed

+73
-79
lines changed

9 files changed

+73
-79
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,8 @@ class VPBuilder {
232232
}
233233

234234
/// Convert the input value \p Current to the corresponding value of an
235-
/// induction with different start and step values, using Start + Current *
236-
/// Step.
235+
/// induction with \p Start and \p Step values, using \p Start + \p Current *
236+
/// \p Step.
237237
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
238238
FPMathOperator *FPBinOp, VPValue *Start,
239239
VPValue *Current, VPValue *Step,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 48 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2660,6 +2660,8 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues(
26602660
assert(MainVectorTripCount && "Must have bypass information");
26612661

26622662
Instruction *OldInduction = Legal->getPrimaryInduction();
2663+
IRBuilder<> BypassBuilder(getAdditionalBypassBlock(),
2664+
getAdditionalBypassBlock()->getFirstInsertionPt());
26632665
for (const auto &InductionEntry : Legal->getInductionVars()) {
26642666
PHINode *OrigPhi = InductionEntry.first;
26652667
const InductionDescriptor &II = InductionEntry.second;
@@ -2668,18 +2670,15 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues(
26682670
// Otherwise it is computed.
26692671
Value *EndValueFromAdditionalBypass = MainVectorTripCount;
26702672
if (OrigPhi != OldInduction) {
2671-
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
2672-
26732673
// Fast-math-flags propagate from the original induction instruction.
26742674
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2675-
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2675+
BypassBuilder.setFastMathFlags(
2676+
II.getInductionBinOp()->getFastMathFlags());
26762677

26772678
// Compute the end value for the additional bypass.
2678-
B.SetInsertPoint(getAdditionalBypassBlock(),
2679-
getAdditionalBypassBlock()->getFirstInsertionPt());
2680-
EndValueFromAdditionalBypass =
2681-
emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step,
2682-
II.getKind(), II.getInductionBinOp());
2679+
EndValueFromAdditionalBypass = emitTransformedIndex(
2680+
BypassBuilder, MainVectorTripCount, II.getStartValue(), Step,
2681+
II.getKind(), II.getInductionBinOp());
26832682
EndValueFromAdditionalBypass->setName("ind.end");
26842683
}
26852684

@@ -8867,28 +8866,25 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR,
88678866
if (!WideIV)
88688867
return nullptr;
88698868

8869+
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8870+
// Truncated wide inductions resume from the last lane of their vector value
8871+
// in the last vector iteration which is handled elsewhere.
8872+
if (WideIntOrFp && WideIntOrFp->getTruncInst())
8873+
return nullptr;
8874+
88708875
VPValue *Start = WideIV->getStartValue();
88718876
VPValue *Step = WideIV->getStepValue();
88728877
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
8873-
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
8874-
bool IsCanonical = false;
8875-
if (auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(PhiR)) {
8876-
// Truncated wide inductions resume from the last lane of their vector value
8877-
// in the last vector iteration which is handled elsewhere.
8878-
if (WideIntOrFp->getTruncInst())
8879-
return nullptr;
8880-
IsCanonical = WideIntOrFp->isCanonical();
8881-
}
8882-
88838878
VPValue *EndValue = VectorTC;
8884-
if (!IsCanonical) {
8879+
if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
88858880
EndValue = VectorPHBuilder.createDerivedIV(
88868881
ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
88878882
Start, VectorTC, Step);
88888883
}
88898884

88908885
// EndValue is derived from the vector trip count (which has the same type as
88918886
// the widest induction) and thus may be wider than the induction here.
8887+
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
88928888
if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
88938889
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
88948890
ScalarTypeOfWideIV);
@@ -8903,9 +8899,7 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR,
89038899
/// Create resume phis in the scalar preheader for first-order recurrences,
89048900
/// reductions and inductions, and update the VPIRInstructions wrapping the
89058901
/// original phis in the scalar header.
8906-
static void addScalarResumePhis(
8907-
VPlan &Plan,
8908-
function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8902+
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89098903
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
89108904
auto *ScalarPH = Plan.getScalarPreheader();
89118905
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8921,7 +8915,7 @@ static void addScalarResumePhis(
89218915
if (!ScalarPhiI)
89228916
break;
89238917

8924-
auto *VectorPhiR = GetHeaderPhiRecipe(ScalarPhiI);
8918+
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
89258919
if (isa<VPWidenInductionRecipe>(VectorPhiR)) {
89268920
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
89278921
VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -9049,9 +9043,9 @@ addUsersInExitBlocks(VPlan &Plan,
90499043
static void addExitUsersForFirstOrderRecurrences(
90509044
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
90519045
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
9052-
auto *MainScalarPH = Plan.getScalarPreheader();
9046+
auto *ScalarPHVPBB = Plan.getScalarPreheader();
90539047
auto *MiddleVPBB = Plan.getMiddleBlock();
9054-
VPBuilder ScalarPHBuilder(MainScalarPH);
9048+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
90559049
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
90569050
VPValue *TwoVPV = Plan.getOrAddLiveIn(
90579051
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
@@ -9317,9 +9311,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93179311
VPlanTransforms::handleUncountableEarlyExit(
93189312
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93199313
}
9320-
addScalarResumePhis(*Plan, [&RecipeBuilder](PHINode *P) {
9321-
return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe(P));
9322-
});
9314+
addScalarResumePhis(RecipeBuilder, *Plan);
93239315
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
93249316
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
93259317
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9441,18 +9433,16 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
94419433
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
94429434
DebugLoc());
94439435

9444-
addScalarResumePhis(
9445-
*Plan,
9446-
[&Plan](PHINode *P) {
9447-
return find_singleton<VPHeaderPHIRecipe>(
9448-
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis(),
9449-
[P](VPRecipeBase &R, bool) -> VPHeaderPHIRecipe * {
9450-
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9451-
return HeaderR->getUnderlyingValue() == P ? HeaderR : nullptr;
9452-
});
9453-
}
9454-
9455-
);
9436+
// Collect mapping of IR header phis to header phi recipes, to be used in
9437+
// addScalarResumePhis.
9438+
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
9439+
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9440+
if (isa<VPCanonicalIVPHIRecipe>(&R))
9441+
continue;
9442+
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9443+
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
9444+
}
9445+
addScalarResumePhis(RecipeBuilder, *Plan);
94569446

94579447
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
94589448
return Plan;
@@ -9747,8 +9737,12 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
97479737
State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind,
97489738
cast_if_present<BinaryOperator>(FPBinOp));
97499739
DerivedIV->setName(Name);
9750-
// Index may only be set to constant 0 in prepareToExecute.
9751-
assert((DerivedIV != Index || cast<ConstantInt>(Index)->isNullValue()) &&
9740+
// If index is the vector trip count, the concrete value will only be set in
9741+
// prepareToExecute, leading to missed simplifications, e.g. if it is 0.
9742+
// TODO: Remove the special case for the vector trip count once it is computed
9743+
// in VPlan and can be used during VPlan simplification.
9744+
assert((DerivedIV != Index ||
9745+
getOperand(1) == &getParent()->getPlan()->getVectorTripCount()) &&
97529746
"IV didn't need transforming?");
97539747
State.set(this, DerivedIV, VPLane(0));
97549748
}
@@ -10074,8 +10068,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1007410068
EpiWidenedPhis.insert(
1007510069
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
1007610070
}
10077-
for (VPRecipeBase &R : make_early_inc_range(
10078-
*cast<VPIRBasicBlock>(MainPlan.getScalarHeader()))) {
10071+
for (VPRecipeBase &R : *cast<VPIRBasicBlock>(MainPlan.getScalarHeader())) {
1007910072
auto *VPIRInst = cast<VPIRInstruction>(&R);
1008010073
auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
1008110074
if (!IRI)
@@ -10095,19 +10088,19 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
1009510088
using namespace VPlanPatternMatch;
1009610089
VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader();
1009710090
VPValue *VectorTC = &MainPlan.getVectorTripCount();
10098-
// If there is no suitable resume value for the canonical induction in the
10099-
// scalar (which will become vector) epilogue loop, create it.
10100-
if (none_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
10091+
// If there is a suitable resume value for the canonical induction in the
10092+
// scalar (which will become vector) epilogue loop we are done. Otherwise
10093+
// create it below.
10094+
if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
1010110095
return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
1010210096
m_Specific(VectorTC), m_SpecificInt(0)));
10103-
})) {
10104-
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
10105-
ScalarPHBuilder.createNaryOp(
10106-
VPInstruction::ResumePhi,
10107-
{VectorTC, MainPlan.getOrAddLiveIn(ConstantInt::get(
10108-
MainPlan.getCanonicalIV()->getScalarType(), 0))},
10109-
{}, "vec.epilog.resume.val");
10110-
}
10097+
}))
10098+
return;
10099+
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
10100+
ScalarPHBuilder.createNaryOp(
10101+
VPInstruction::ResumePhi,
10102+
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
10103+
"vec.epilog.resume.val");
1011110104
}
1011210105

1011310106
/// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
9191
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]]
9292
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
9393
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
94-
; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]]
9594
; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]]
95+
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]]
9696
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[START]], [[N_VEC]]
9797
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
9898
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
@@ -117,11 +117,11 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) {
117117
; CHECK-NEXT: br i1 [[CMP_N11]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
118118
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
119119
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], %[[ITER_CHECK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ]
120-
; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ]
120+
; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ], [ [[IND_END2]], %[[VEC_EPILOG_ITER_CHECK]] ]
121121
; CHECK-NEXT: br label %[[LOOP:.*]]
122122
; CHECK: [[LOOP]]:
123123
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ]
124-
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ]
124+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL9]], %[[VEC_EPILOG_SCALAR_PH]] ]
125125
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
126126
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
127127
; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1

llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
5050
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
5151
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
5252
; CHECK: vec.epilog.iter.check:
53+
; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc nuw i64 [[N_VEC]] to i32
54+
; CHECK-NEXT: [[IND_END8:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]]
5355
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
54-
; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP6]]
56+
; CHECK-NEXT: [[IND_END10:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP6]]
5557
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
56-
; CHECK-NEXT: [[IND_END10:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP7]]
57-
; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc nuw i64 [[N_VEC]] to i32
58-
; CHECK-NEXT: [[IND_END8:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST7]]
58+
; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP7]]
5959
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 12
6060
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
6161
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
@@ -171,10 +171,10 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
171171
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
172172
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
173173
; CHECK: vec.epilog.iter.check:
174-
; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
175-
; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
176174
; CHECK-NEXT: [[DOTCAST6:%.*]] = trunc nuw i64 [[N_VEC]] to i32
177175
; CHECK-NEXT: [[IND_END7:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST6]]
176+
; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
177+
; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
178178
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 24
179179
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
180180
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ target triple = "aarch64-unknown-linux-gnu"
3535
; DEBUG: LV: can fold tail by masking.
3636
; DEBUG: Executing best plan with VF=vscale x 16, UF=1
3737

38+
; TODO: Clean up unused add instruction computing the end value of the
39+
; induction, which gets created during execution of the main plan once
40+
; VPlan's scope includes both vector loops.
3841
define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef %val) {
3942
; CHECK-VS1-LABEL: define void @low_vf_ic_is_better(
4043
; CHECK-VS1-SAME: ptr nocapture noundef [[P:%.*]], i32 [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {

llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
33

44
target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
@@ -192,8 +192,6 @@ exit:
192192
ret i1 %res
193193
}
194194
;.
195-
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
196-
;.
197195
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
198196
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
199197
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}

llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -667,10 +667,10 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt
667667
; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
668668
; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
669669
; AVX512: vec.epilog.iter.check:
670-
; AVX512-NEXT: [[TMP22:%.*]] = mul i64 [[N_VEC]], 64
671-
; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP22]]
672670
; AVX512-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 4
673671
; AVX512-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP23]]
672+
; AVX512-NEXT: [[TMP38:%.*]] = mul i64 [[N_VEC]], 64
673+
; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP38]]
674674
; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
675675
; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
676676
; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]

llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
5656
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
5757
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
5858
; CHECK: vec.epilog.iter.check:
59+
; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc nuw i64 [[N_VEC]] to i32
60+
; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]]
5961
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
60-
; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP12]]
62+
; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP12]]
6163
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
62-
; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP13]]
63-
; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc nuw i64 [[N_VEC]] to i32
64-
; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST9]]
64+
; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP13]]
6565
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 56
6666
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
6767
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
@@ -181,10 +181,10 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
181181
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
182182
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
183183
; CHECK: vec.epilog.iter.check:
184-
; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
185-
; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
186184
; CHECK-NEXT: [[DOTCAST8:%.*]] = trunc nuw i64 [[N_VEC]] to i32
187185
; CHECK-NEXT: [[IND_END9:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST8]]
186+
; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
187+
; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
188188
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 120
189189
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
190190
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]

0 commit comments

Comments
 (0)