Skip to content

Commit 6c0f2fc

Browse files
committed
[VPlan] Replicate VPScalarIVStepsRecipe by VF outside replicate regions.
Extend replicateByVF to also handle VPScalarIVStepsRecipe. To do so, the patch adds a new lane operand to VPScalarIVStepsRecipe, which is only added when replicating. This enables removing a number of lane 0 computations. The lane operand will also be used to explicitly replicate replicate regions in a follow-up. Depends on #169796 (included in PR).
1 parent 5b2339d commit 6c0f2fc

File tree

77 files changed

+2063
-2172
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+2063
-2172
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3788,7 +3788,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
37883788
/// A recipe for handling phi nodes of integer and floating-point inductions,
37893789
/// producing their scalar values.
37903790
class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
3791-
public VPUnrollPartAccessor<3> {
3791+
public VPUnrollPartAccessor<4> {
37923792
Instruction::BinaryOps InductionOpcode;
37933793

37943794
public:
@@ -3812,10 +3812,13 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
38123812
~VPScalarIVStepsRecipe() override = default;
38133813

38143814
VPScalarIVStepsRecipe *clone() override {
3815-
return new VPScalarIVStepsRecipe(
3815+
auto *NewR = new VPScalarIVStepsRecipe(
38163816
getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
38173817
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
38183818
getDebugLoc());
3819+
if (getNumOperands() == 4)
3820+
NewR->addOperand(getOperand(3));
3821+
return NewR;
38193822
}
38203823

38213824
/// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2373,7 +2373,16 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
23732373
if (State.Lane) {
23742374
StartLane = State.Lane->getKnownLane();
23752375
EndLane = StartLane + 1;
2376+
} else if (getNumOperands() == 5) {
2377+
// Operand 3 is the Lane operand (when present after replicating by VF).
2378+
VPValue *Op3 = getOperand(3);
2379+
assert(Op3->isLiveIn() && "lane operand must be a live-in");
2380+
auto *C = cast<ConstantInt>(Op3->getLiveInIRValue());
2381+
unsigned Val = C->getZExtValue();
2382+
StartLane = Val;
2383+
EndLane = Val + 1;
23762384
}
2385+
23772386
Value *StartIdx0;
23782387
if (getUnrollPart(*this) == 0)
23792388
StartIdx0 = ConstantInt::get(IntStepTy, 0);
@@ -2400,7 +2409,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
24002409
"scalable");
24012410
auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
24022411
auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
2403-
State.set(this, Add, VPLane(Lane));
2412+
if (State.Lane)
2413+
State.set(this, Add, VPLane(Lane));
2414+
else
2415+
State.set(this, Add, VPLane(0));
24042416
}
24052417
}
24062418

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,9 +1437,14 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
14371437
}
14381438

14391439
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
1440-
// the first lane is demanded.
1440+
// the first lane is demanded and both Lane and UnrollPart operands are 0.
14411441
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
1442-
if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
1442+
bool LaneIsZero = Steps->getNumOperands() >= 4 &&
1443+
match(Steps->getOperand(3), m_ZeroInt());
1444+
bool PartIsZero =
1445+
Steps->getNumOperands() < 5 || match(Steps->getOperand(4), m_ZeroInt());
1446+
if (Steps->isPart0() && LaneIsZero && PartIsZero &&
1447+
vputils::onlyFirstLaneUsed(Steps)) {
14431448
Steps->replaceAllUsesWith(Steps->getOperand(0));
14441449
return;
14451450
}
@@ -4311,9 +4316,9 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
43114316
for (VPBasicBlock *VPBB :
43124317
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
43134318
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
4314-
if (!isa<VPReplicateRecipe, VPInstruction>(&R))
4319+
if (!isa<VPScalarIVStepsRecipe, VPReplicateRecipe, VPInstruction>(&R))
43154320
continue;
4316-
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
4321+
auto *DefR = cast<VPSingleDefRecipe>(&R);
43174322
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
43184323
VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
43194324
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
137137
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
138138
remapOperands(&PartIR, Part);
139139
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
140+
ScalarIVSteps->addOperand(getConstantInt(0));
140141
ScalarIVSteps->addOperand(getConstantInt(Part));
141142
}
142143

@@ -526,9 +527,21 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
526527
/*IsSingleScalar=*/true, /*Mask=*/nullptr,
527528
*RepR, *RepR, RepR->getDebugLoc());
528529
} else {
529-
assert(isa<VPInstruction>(DefR) &&
530+
assert((isa<VPInstruction, VPScalarIVStepsRecipe>(DefR)) &&
530531
"DefR must be a VPReplicateRecipe or VPInstruction");
531532
New = DefR->clone();
533+
if (isa<VPScalarIVStepsRecipe>(New)) {
534+
// Add or update lane operand for VPScalarIVStepsRecipe.
535+
if (NewOps.size() == 3) {
536+
NewOps.push_back(Plan.getConstantInt(IdxTy, 0));
537+
New->addOperand(NewOps.back());
538+
}
539+
NewOps.push_back(Plan.getConstantInt(IdxTy, Lane.getKnownLane()));
540+
New->addOperand(NewOps.back());
541+
if (NewOps.size() == 5)
542+
std::swap(NewOps[3], NewOps[4]);
543+
}
544+
532545
for (const auto &[Idx, Op] : enumerate(NewOps)) {
533546
New->setOperand(Idx, Op);
534547
}
@@ -558,14 +571,27 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
558571
SmallVector<VPRecipeBase *> ToRemove;
559572
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
560573
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
561-
if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
574+
if (!isa<VPInstruction, VPReplicateRecipe, VPScalarIVStepsRecipe>(&R) ||
562575
(isa<VPReplicateRecipe>(&R) &&
563576
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
564577
(isa<VPInstruction>(&R) &&
565578
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
566579
cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
567580
continue;
568581

582+
if (isa<VPScalarIVStepsRecipe>(&R) && Plan.hasScalarVFOnly()) {
583+
// Add lane operand to VPScalarIVStepsRecipe only when the plan is
584+
// scalar.
585+
if (R.getNumOperands() == 4) {
586+
R.addOperand(R.getOperand(3));
587+
R.setOperand(3, Plan.getConstantInt(IdxTy, 0));
588+
} else {
589+
R.addOperand(Plan.getConstantInt(IdxTy, 0));
590+
R.addOperand(Plan.getConstantInt(IdxTy, 0));
591+
}
592+
continue;
593+
}
594+
569595
auto *DefR = cast<VPSingleDefRecipe>(&R);
570596
VPBuilder Builder(DefR);
571597
if (DefR->getNumUsers() == 0) {

llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,43 +16,42 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
1616
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1717
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1818
; CHECK: [[VECTOR_BODY]]:
19-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0
2019
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
2120
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
2221
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
23-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
22+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
2423
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
2524
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
2625
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
2726
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
2827
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
2928
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
3029
; CHECK: [[PRED_STORE_IF]]:
31-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1
30+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 1
3231
; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1
3332
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
3433
; CHECK: [[PRED_STORE_CONTINUE]]:
3534
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
36-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
37-
; CHECK: [[PRED_STORE_IF5]]:
35+
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
36+
; CHECK: [[PRED_STORE_IF4]]:
3837
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 1
3938
; CHECK-NEXT: store i8 0, ptr [[TMP9]], align 1
40-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
41-
; CHECK: [[PRED_STORE_CONTINUE6]]:
39+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
40+
; CHECK: [[PRED_STORE_CONTINUE5]]:
4241
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
43-
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
44-
; CHECK: [[PRED_STORE_IF7]]:
42+
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
43+
; CHECK: [[PRED_STORE_IF6]]:
4544
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 1
4645
; CHECK-NEXT: store i8 0, ptr [[TMP11]], align 1
47-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
48-
; CHECK: [[PRED_STORE_CONTINUE8]]:
46+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
47+
; CHECK: [[PRED_STORE_CONTINUE7]]:
4948
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
50-
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
51-
; CHECK: [[PRED_STORE_IF9]]:
49+
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
50+
; CHECK: [[PRED_STORE_IF8]]:
5251
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 1
5352
; CHECK-NEXT: store i8 0, ptr [[TMP13]], align 1
54-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
55-
; CHECK: [[PRED_STORE_CONTINUE10]]:
53+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
54+
; CHECK: [[PRED_STORE_CONTINUE9]]:
5655
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
5756
; CHECK: [[MIDDLE_BLOCK]]:
5857
; CHECK-NEXT: br label %[[EXIT:.*]]

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
2020
; CHECK: vector.body:
2121
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2222
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
23-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
24-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
25-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP0]]
26-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP1]]
23+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
24+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[INDEX]]
25+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP0]]
2726
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
2827
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
2928
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64

llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,31 +21,30 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
2121
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2222
; CHECK: vector.body:
2323
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
24-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
25-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
26-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
27-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
24+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
25+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
26+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
27+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
2828
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
2929
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
3030
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
31-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
32-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
31+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META0:![0-9]+]]
3332
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
34-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
35-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
36-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
3733
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
38-
; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
39-
; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
40-
; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
41-
; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
34+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
35+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
36+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
37+
; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
38+
; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP8]], align 4, !alias.scope [[META0]]
39+
; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0]]
40+
; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
4241
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
4342
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
4443
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
4544
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i24> [[TMP19]], i24 [[TMP16]], i32 3
4645
; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i24> [[TMP20]] to <4 x i32>
4746
; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
48-
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
47+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
4948
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
5049
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5150
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000

llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -174,26 +174,25 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
174174
; CHECK: [[VECTOR_BODY]]:
175175
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
176176
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
177-
; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
178-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
179-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
180-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
181-
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
177+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
178+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
179+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
180+
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[OFFSET_IDX]]
182181
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
183182
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
184183
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
185184
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
186185
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
187186
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
188187
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
189-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
190-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
191-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
192-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
188+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[OFFSET_IDX]]
189+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP0]]
190+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
191+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
193192
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
194-
; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
195-
; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
196-
; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
193+
; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP15]], align 2
194+
; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP16]], align 2
195+
; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP17]], align 2
197196
; CHECK-NEXT: store i64 0, ptr [[A]], align 8
198197
; CHECK-NEXT: store i64 0, ptr [[B]], align 8
199198
; CHECK-NEXT: store i64 0, ptr [[C]], align 8

0 commit comments

Comments
 (0)