Skip to content

Commit 555c93e

Browse files
committed
[VPlan] Update induction resume values in VPlan.
1 parent a5a1612 commit 555c93e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+735
-711
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -519,9 +519,9 @@ class InnerLoopVectorizer {
519519
/// and the resume values can come from an additional bypass block, the \p
520520
/// AdditionalBypass pair provides information about the bypass block and the
521521
/// end value on the edge from bypass to this loop.
522-
PHINode *createInductionResumeValue(
522+
void createInductionResumeValue(
523523
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
524-
ArrayRef<BasicBlock *> BypassBlocks,
524+
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
525525
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
526526

527527
/// Returns the original loop trip count.
@@ -532,6 +532,11 @@ class InnerLoopVectorizer {
532532
/// count of the original loop for both main loop and epilogue vectorization.
533533
void setTripCount(Value *TC) { TripCount = TC; }
534534

535+
std::pair<BasicBlock *, Value *>
536+
getInductionBypassValue(PHINode *OrigPhi) const {
537+
return InductionBypassValues.find(OrigPhi)->second;
538+
}
539+
535540
protected:
536541
friend class LoopVectorizationPlanner;
537542

@@ -667,6 +672,9 @@ class InnerLoopVectorizer {
667672
/// for cleaning the checks, if vectorization turns out unprofitable.
668673
GeneratedRTChecks &RTChecks;
669674

675+
/// Mapping of induction phis to their bypass values and bypass blocks.
676+
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
677+
670678
VPlan &Plan;
671679
};
672680

@@ -2591,9 +2599,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25912599
nullptr, Twine(Prefix) + "scalar.ph");
25922600
}
25932601

2594-
PHINode *InnerLoopVectorizer::createInductionResumeValue(
2602+
void InnerLoopVectorizer::createInductionResumeValue(
25952603
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2596-
ArrayRef<BasicBlock *> BypassBlocks,
2604+
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
25972605
std::pair<BasicBlock *, Value *> AdditionalBypass) {
25982606
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
25992607
assert(VectorTripCount && "Expected valid arguments");
@@ -2626,27 +2634,21 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26262634
}
26272635
}
26282636

2629-
// Create phi nodes to merge from the backedge-taken check block.
2630-
PHINode *BCResumeVal =
2631-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
2632-
LoopScalarPreHeader->getFirstNonPHIIt());
2633-
// Copy original phi DL over to the new one.
2634-
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
2635-
2636-
// The new PHI merges the original incoming value, in case of a bypass,
2637-
// or the value at the end of the vectorized loop.
2638-
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
2639-
2640-
// Fix the scalar body counter (PHI node).
2641-
// The old induction's phi node in the scalar body needs the truncated
2642-
// value.
2643-
for (BasicBlock *BB : BypassBlocks)
2644-
BCResumeVal->addIncoming(II.getStartValue(), BB);
2637+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2638+
VPInstruction::ResumePhi,
2639+
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2640+
OrigPhi->getDebugLoc(), "bc.resume.val");
2641+
auto *ScalarLoopHeader = Plan.getScalarHeader();
2642+
for (VPRecipeBase &R : *ScalarLoopHeader) {
2643+
auto *IRI = cast<VPIRInstruction>(&R);
2644+
if (&IRI->getInstruction() == OrigPhi) {
2645+
IRI->addOperand(ResumePhiRecipe);
2646+
break;
2647+
}
2648+
}
26452649

2646-
if (AdditionalBypass.first)
2647-
BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
2648-
EndValueFromAdditionalBypass);
2649-
return BCResumeVal;
2650+
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
2651+
EndValueFromAdditionalBypass};
26502652
}
26512653

26522654
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2676,13 +2678,14 @@ void InnerLoopVectorizer::createInductionResumeValues(
26762678
// iteration in the vectorized loop.
26772679
// If we come from a bypass edge then we need to start from the original
26782680
// start value.
2681+
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
2682+
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
26792683
for (const auto &InductionEntry : Legal->getInductionVars()) {
26802684
PHINode *OrigPhi = InductionEntry.first;
26812685
const InductionDescriptor &II = InductionEntry.second;
2682-
PHINode *BCResumeVal = createInductionResumeValue(
2683-
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
2684-
AdditionalBypass);
2685-
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
2686+
createInductionResumeValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2687+
LoopBypassBlocks, ScalarPHBuilder,
2688+
AdditionalBypass);
26862689
}
26872690
}
26882691

@@ -7808,6 +7811,27 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78087811
// the second pass for the scalar loop. The induction resume values for the
78097812
// inductions in the epilogue loop are created before executing the plan for
78107813
// the epilogue loop.
7814+
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
7815+
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
7816+
for (VPRecipeBase &R :
7817+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
7818+
// Create induction resume values for both widened pointer and
7819+
// integer/fp inductions and update the start value of the induction
7820+
// recipes to use the resume value.
7821+
PHINode *IndPhi = nullptr;
7822+
const InductionDescriptor *ID;
7823+
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7824+
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
7825+
ID = &Ind->getInductionDescriptor();
7826+
} else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7827+
IndPhi = WidenInd->getPHINode();
7828+
ID = &WidenInd->getInductionDescriptor();
7829+
} else
7830+
continue;
7831+
7832+
createInductionResumeValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7833+
LoopBypassBlocks, ScalarPHBuilder);
7834+
}
78117835

78127836
return {LoopVectorPreHeader, nullptr};
78137837
}
@@ -10296,23 +10320,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1029610320
RdxDesc.getRecurrenceStartValue());
1029710321
}
1029810322
} else {
10299-
// Create induction resume values for both widened pointer and
10300-
// integer/fp inductions and update the start value of the induction
10301-
// recipes to use the resume value.
10323+
// Retrive the induction resume values for wide inductions from
10324+
// their original phi nodes in the scalar loop
1030210325
PHINode *IndPhi = nullptr;
10303-
const InductionDescriptor *ID;
1030410326
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1030510327
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
10306-
ID = &Ind->getInductionDescriptor();
1030710328
} else {
1030810329
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1030910330
IndPhi = WidenInd->getPHINode();
10310-
ID = &WidenInd->getInductionDescriptor();
1031110331
}
10312-
10313-
ResumeV = MainILV.createInductionResumeValue(
10314-
IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10315-
{EPI.MainLoopIterationCountCheck});
10332+
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
1031610333
}
1031710334
assert(ResumeV && "Must have a resume value");
1031810335
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);
@@ -10324,7 +10341,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032410341
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1032510342
DT, true, &ExpandedSCEVs);
1032610343
++LoopsEpilogueVectorized;
10344+
BasicBlock *PH = L->getLoopPreheader();
1032710345

10346+
for (const auto &[IVPhi, _] : LVL.getInductionVars()) {
10347+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10348+
const auto &[BB, V] = EpilogILV.getInductionBypassValue(IVPhi);
10349+
Inc->setIncomingValueForBlock(BB, V);
10350+
}
1032810351
if (!MainILV.areSafetyChecksAdded())
1032910352
DisableRuntimeUnroll = true;
1033010353
} else {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
629629
State.CFG
630630
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
631631
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
632-
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
632+
for (auto *OtherPred :
633+
reverse(to_vector(predecessors(Builder.GetInsertBlock())))) {
633634
assert(OtherPred != VPlanPred &&
634635
"VPlan predecessors should not be connected yet");
635636
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
205205
; CHECK: vector.ph:
206206
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
207207
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
208+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
208209
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
209210
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
210211
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
211-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
212212
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
213213
; CHECK: vector.body:
214214
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -433,7 +433,7 @@ define void @test_widen_extended_induction(ptr %dst) {
433433
; CHECK: vec.epilog.middle.block:
434434
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
435435
; CHECK: vec.epilog.scalar.ph:
436-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
436+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
437437
; CHECK-NEXT: br label [[LOOP:%.*]]
438438
; CHECK: loop:
439439
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
114114
; DEFAULT-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
115115
; DEFAULT-NEXT: br i1 [[CMP_N7]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
116116
; DEFAULT: vec.epilog.scalar.ph:
117-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
117+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
118118
; DEFAULT-NEXT: br label [[LOOP:%.*]]
119119
; DEFAULT: loop:
120120
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -522,31 +522,31 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
522522
; PRED: pred.store.continue:
523523
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
524524
; PRED-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
525-
; PRED: pred.store.if3:
525+
; PRED: pred.store.if2:
526526
; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1
527527
; PRED-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP24]]
528528
; PRED-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 1
529529
; PRED-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4
530530
; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
531-
; PRED: pred.store.continue4:
531+
; PRED: pred.store.continue3:
532532
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
533533
; PRED-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
534-
; PRED: pred.store.if5:
534+
; PRED: pred.store.if4:
535535
; PRED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2
536536
; PRED-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP28]]
537537
; PRED-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 2
538538
; PRED-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4
539539
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
540-
; PRED: pred.store.continue6:
540+
; PRED: pred.store.continue5:
541541
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
542542
; PRED-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
543-
; PRED: pred.store.if7:
543+
; PRED: pred.store.if6:
544544
; PRED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3
545545
; PRED-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP32]]
546546
; PRED-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], 3
547547
; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4
548548
; PRED-NEXT: br label [[PRED_STORE_CONTINUE8]]
549-
; PRED: pred.store.continue8:
549+
; PRED: pred.store.continue7:
550550
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
551551
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]])
552552
; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
@@ -719,31 +719,31 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
719719
; PRED: pred.store.continue:
720720
; PRED-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
721721
; PRED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
722-
; PRED: pred.store.if2:
722+
; PRED: pred.store.if1:
723723
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
724724
; PRED-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP23]]
725725
; PRED-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 1
726726
; PRED-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4
727727
; PRED-NEXT: br label [[PRED_STORE_CONTINUE3]]
728-
; PRED: pred.store.continue3:
728+
; PRED: pred.store.continue2:
729729
; PRED-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
730730
; PRED-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
731-
; PRED: pred.store.if4:
731+
; PRED: pred.store.if3:
732732
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
733733
; PRED-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
734734
; PRED-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 2
735735
; PRED-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4
736736
; PRED-NEXT: br label [[PRED_STORE_CONTINUE5]]
737-
; PRED: pred.store.continue5:
737+
; PRED: pred.store.continue4:
738738
; PRED-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
739739
; PRED-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
740-
; PRED: pred.store.if6:
740+
; PRED: pred.store.if5:
741741
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
742742
; PRED-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]]
743743
; PRED-NEXT: [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], 3
744744
; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4
745745
; PRED-NEXT: br label [[PRED_STORE_CONTINUE7]]
746-
; PRED: pred.store.continue7:
746+
; PRED: pred.store.continue6:
747747
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
748748
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
749749
; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
@@ -884,12 +884,12 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
884884
; PRED: pred.store.continue:
885885
; PRED-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
886886
; PRED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
887-
; PRED: pred.store.if5:
887+
; PRED: pred.store.if4:
888888
; PRED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
889889
; PRED-NEXT: [[TMP13:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP12]], i32 2
890890
; PRED-NEXT: store i32 0, ptr [[TMP13]], align 8
891891
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
892-
; PRED: pred.store.continue6:
892+
; PRED: pred.store.continue5:
893893
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
894894
; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
895895
; PRED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
1111
; CHECK-NEXT: entry:
1212
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1313
; CHECK: vector.memcheck:
14-
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004
15-
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007
16-
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
17-
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[UGLYGEP]]
14+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004
15+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007
16+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
17+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
1818
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1919
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
2020
; CHECK: vector.ph:
@@ -35,10 +35,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
3535
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
3636
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
3737
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
38-
; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope !0
39-
; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope !0
40-
; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope !0
41-
; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope !0
38+
; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
39+
; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
40+
; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
41+
; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
4242
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
4343
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
4444
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
@@ -47,7 +47,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
4747
; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
4848
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
4949
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
50-
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope !3, !noalias !0
50+
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
5151
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5252
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
5353
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]

0 commit comments

Comments
 (0)