Skip to content

Commit 5ad4cc1

Browse files
committed
Allow splatting VPScalarCastRecipe directly, remove VPInstruction::splat
1 parent 8b77af4 commit 5ad4cc1

35 files changed

+327
-423
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,6 @@ class VPBuilder {
265265
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
266266
}
267267

268-
VPInstruction *createSplat(VPValue *Val) {
269-
return tryInsertInstruction(new VPInstruction(VPInstruction::Splat, {Val}));
270-
}
271-
272268
VPStepVectorRecipe *createStepVector(Type *Ty) {
273269
return tryInsertInstruction(new VPStepVectorRecipe(Ty));
274270
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -313,15 +313,17 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
313313
LastLane = 0;
314314
}
315315

316-
auto *LastInst = cast<Instruction>(get(Def, LastLane));
317-
// Set the insert point after the last scalarized instruction or after the
318-
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
319-
// will directly follow the scalar definitions.
320316
auto OldIP = Builder.saveIP();
321-
auto NewIP = isa<PHINode>(LastInst)
322-
? LastInst->getParent()->getFirstNonPHIIt()
323-
: std::next(BasicBlock::iterator(LastInst));
324-
Builder.SetInsertPoint(&*NewIP);
317+
auto *LastVal = get(Def, LastLane);
318+
if (auto *LastInst = dyn_cast<Instruction>(LastVal)) {
319+
// Set the insert point after the last scalarized instruction or after the
320+
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
321+
// will directly follow the scalar definitions.
322+
auto NewIP = isa<PHINode>(LastInst)
323+
? LastInst->getParent()->getFirstNonPHIIt()
324+
: std::next(BasicBlock::iterator(LastInst));
325+
Builder.SetInsertPoint(&*NewIP);
326+
}
325327

326328
// However, if we are vectorizing, we need to construct the vector values.
327329
// If the value is known to be uniform after vectorization, we can just
@@ -336,7 +338,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
336338
} else {
337339
// Initialize packing with insertelements to start from undef.
338340
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
339-
Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
341+
Value *Undef = PoisonValue::get(toVectorizedTy(LastVal->getType(), VF));
340342
set(Def, Undef);
341343
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
342344
packScalarIntoVectorizedValue(Def, Lane);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,8 +880,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
880880
// Extracts the first active lane of a vector, where the first operand is
881881
// the predicate, and the second operand is the vector to extract.
882882
ExtractFirstActive,
883-
// Splats a scalar value across all lanes.
884-
Splat,
885883
};
886884

887885
private:

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
9696
case VPInstruction::BranchOnCond:
9797
case VPInstruction::BranchOnCount:
9898
return Type::getVoidTy(Ctx);
99-
case VPInstruction::Splat:
100-
return inferScalarType(R->getOperand(0));
10199
default:
102100
break;
103101
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -712,9 +712,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
712712
Builder.getInt64Ty(), Mask, true, "first.active.lane");
713713
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
714714
}
715-
case VPInstruction::Splat:
716-
return State.Builder.CreateVectorSplat(
717-
State.VF, State.get(getOperand(0), true), Name);
718715

719716
default:
720717
llvm_unreachable("Unsupported opcode for instruction");
@@ -827,7 +824,6 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
827824
case VPInstruction::LogicalAnd:
828825
case VPInstruction::Not:
829826
case VPInstruction::PtrAdd:
830-
case VPInstruction::Splat:
831827
return false;
832828
default:
833829
return true;
@@ -855,7 +851,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
855851
case VPInstruction::BranchOnCount:
856852
case VPInstruction::BranchOnCond:
857853
case VPInstruction::ResumePhi:
858-
case VPInstruction::Splat:
859854
return true;
860855
};
861856
llvm_unreachable("switch should return");
@@ -947,9 +942,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
947942
case VPInstruction::ExtractFirstActive:
948943
O << "extract-first-active";
949944
break;
950-
case VPInstruction::Splat:
951-
O << "splat";
952-
break;
953945
default:
954946
O << Instruction::getOpcodeName(getOpcode());
955947
}
@@ -2294,8 +2286,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
22942286

22952287
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
22962288
State.setDebugLocFrom(getDebugLoc());
2297-
assert(vputils::onlyFirstLaneUsed(this) &&
2298-
"Codegen only implemented for first lane.");
22992289
switch (Opcode) {
23002290
case Instruction::SExt:
23012291
case Instruction::ZExt:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2089,9 +2089,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
20892089

20902090
// FIXME: The newly created binary instructions should contain nsw/nuw
20912091
// flags, which can be found from the original scalar operations.
2092-
Init = Builder.createNaryOp(MulOp, {Init, Builder.createSplat(Step)}, FMFs);
2093-
Init = Builder.createNaryOp(AddOp, {Builder.createSplat(Start), Init}, FMFs,
2094-
{}, "induction");
2092+
Init = Builder.createNaryOp(MulOp, {Init, Step}, FMFs);
2093+
Init = Builder.createNaryOp(AddOp, {Start, Init}, FMFs, {}, "induction");
20952094

20962095
// Create the widened phi of the vector IV.
20972096
auto *WidePHI =
@@ -2115,7 +2114,7 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
21152114
else if (Ty != TypeInfo.inferScalarType(VF))
21162115
VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL);
21172116

2118-
Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs));
2117+
Inc = Builder.createNaryOp(MulOp, {Step, VF}, FMFs);
21192118
Prev = WidePHI;
21202119
}
21212120

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
2020
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2121
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2222
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
23-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
24-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
23+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP6]], i64 0
2524
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
25+
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 8 x i64> splat (i64 1), [[DOTSPLAT]]
2626
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
2727
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -39,7 +39,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
3939
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
4040
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
4141
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
42-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
42+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP9]]
4343
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4444
; CHECK: middle.block:
4545
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -106,9 +106,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
106106
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107107
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108108
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
109-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
109+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP6]], i64 0
111110
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
111+
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 8 x i64> splat (i64 1), [[DOTSPLAT]]
112112
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113113
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -125,7 +125,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
125125
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
126126
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
127127
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
128-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
128+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP9]]
129129
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
130130
; CHECK: middle.block:
131131
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
127127
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
128128
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
129129
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
130-
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
131-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
130+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
132131
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
132+
; CHECK-NEXT: [[TMP18:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[DOTSPLAT]]
133133
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134134
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -156,7 +156,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
156156
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
157157
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
158158
; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
159-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
159+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP18]]
160160
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <vscale x 2 x i1> [[TMP36]], i32 0
161161
; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
162162
; CHECK: [[MIDDLE_BLOCK]]:
@@ -251,9 +251,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
251251
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252252
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253253
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254-
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]]
255-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i64 0
254+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
256255
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
256+
; CHECK-NEXT: [[TMP18:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[BROADCAST_SPLAT]]
257257
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
258258
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
259259
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
283283
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
284284
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
285285
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
286-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
286+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP18]]
287287
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
288288
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
289289
; CHECK: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ define void @foo() {
2323
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
2424
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
2525
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
26-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]]
27-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
28-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
26+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
27+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
28+
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[BROADCAST_SPLAT]]
2929
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3030
; CHECK: vector.body:
3131
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ]
@@ -47,7 +47,7 @@ define void @foo() {
4747
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[INNER_LOOP1]] ]
4848
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true))
4949
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
50-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
50+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP8]]
5151
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5353
; CHECK: middle.block:

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
3131
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
3232
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
3333
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
34-
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
35-
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
34+
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP7]], i64 0
3635
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
36+
; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 2 x i32> splat (i32 1), [[DOTSPLAT2]]
3737
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3838
; CHECK: vector.body:
3939
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -48,7 +48,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
4848
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i32 0
4949
; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP19]], align 8
5050
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]]
51-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
51+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[TMP11]]
5252
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5353
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5454
; CHECK: middle.block:

0 commit comments

Comments
 (0)