Skip to content

Commit 4cedafb

Browse files
committed
[VPlan] Introduce explicit broadcasts for live-ins.
Add a new VPInstruction::Broadcast opcode and use it to materialize explicit broadcasts of live-ins. The initial patch only materlizes the broadcasts if the vector preheader dominates all uses that need it. Later patches will pick the best valid insert point, thus retiring implicit hoisting of broadcasts from VPTransformsState::get().
1 parent 7f2f905 commit 4cedafb

35 files changed

+360
-271
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7666,7 +7666,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76667666
((VectorizingEpilogue && ExpandedSCEVs) ||
76677667
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
76687668
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7669-
7669+
VPlanTransforms::materializeBroadcasts(BestVPlan);
76707670
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
76717671
// cost model is complete for better cost estimates.
76727672
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
866866
CanonicalIVIncrementForPart,
867867
BranchOnCount,
868868
BranchOnCond,
869+
Broadcast,
869870
ComputeReductionResult,
870871
// Takes the VPValue to extract from as first operand and the lane or part
871872
// to extract as second operand, counting from the end starting with 1 for
@@ -1511,6 +1512,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
15111512
bool isInvariantCond() const {
15121513
return getCond()->isDefinedOutsideLoopRegions();
15131514
}
1515+
1516+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1517+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1518+
assert(is_contained(operands(), Op) &&
1519+
"Op must be an operand of the recipe");
1520+
return Op == getCond() && isInvariantCond();
1521+
}
15141522
};
15151523

15161524
/// A recipe for handling GEP instructions.
@@ -1558,6 +1566,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
15581566
void print(raw_ostream &O, const Twine &Indent,
15591567
VPSlotTracker &SlotTracker) const override;
15601568
#endif
1569+
1570+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1571+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1572+
assert(is_contained(operands(), Op) &&
1573+
"Op must be an operand of the recipe");
1574+
return Op == getOperand(0);
1575+
}
15611576
};
15621577

15631578
/// A recipe to compute the pointers for widened memory accesses of IndexTy
@@ -1873,6 +1888,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18731888
VPValue *getLastUnrolledPartOperand() {
18741889
return getNumOperands() == 5 ? getOperand(4) : this;
18751890
}
1891+
1892+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1893+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1894+
assert(is_contained(operands(), Op) &&
1895+
"Op must be an operand of the recipe");
1896+
return Op == getStartValue();
1897+
}
18761898
};
18771899

18781900
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
@@ -1905,6 +1927,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
19051927
/// Returns true if only scalar values will be generated.
19061928
bool onlyScalarsGenerated(bool IsScalable);
19071929

1930+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1931+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1932+
assert(is_contained(operands(), Op) &&
1933+
"Op must be an operand of the recipe");
1934+
return Op == getOperand(0);
1935+
}
1936+
19081937
/// Returns the VPValue representing the value of this induction at
19091938
/// the first unrolled part, if it exists. Returns itself if unrolling did not
19101939
/// take place.
@@ -2038,6 +2067,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
20382067
void print(raw_ostream &O, const Twine &Indent,
20392068
VPSlotTracker &SlotTracker) const override;
20402069
#endif
2070+
2071+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2072+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2073+
assert(is_contained(operands(), Op) &&
2074+
"Op must be an operand of the recipe");
2075+
return Op == getStartValue();
2076+
}
20412077
};
20422078

20432079
/// A recipe for handling reduction phis. The start value is the first operand
@@ -2104,6 +2140,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
21042140

21052141
/// Returns true, if the phi is part of an in-loop reduction.
21062142
bool isInLoop() const { return IsInLoop; }
2143+
2144+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2145+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2146+
assert(is_contained(operands(), Op) &&
2147+
"Op must be an operand of the recipe");
2148+
return Op == getStartValue();
2149+
}
21072150
};
21082151

21092152
/// A recipe for forming partial reductions. In the loop, an accumulator and
@@ -3726,6 +3769,8 @@ class VPlan {
37263769
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
37273770
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
37283771

3772+
ArrayRef<VPValue *> getLiveIns() const { return VPLiveInsToFree; }
3773+
37293774
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
37303775
/// Print the live-ins of this VPlan to \p O.
37313776
void printLiveIns(raw_ostream &O) const;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
9090
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
9191
"LogicalAnd operands should be bool");
9292
return IntegerType::get(Ctx, 1);
93+
case VPInstruction::Broadcast:
9394
case VPInstruction::PtrAdd:
9495
// Return the type based on the pointer argument (i.e. first operand).
9596
return inferScalarType(R->getOperand(0));

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
584584
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
585585
return CondBr;
586586
}
587+
case VPInstruction::Broadcast: {
588+
return Builder.CreateVectorSplat(
589+
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
590+
}
587591
case VPInstruction::ComputeReductionResult: {
588592
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
589593
// and will be removed by breaking up the recipe further.
@@ -841,7 +845,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
841845
case Instruction::ICmp:
842846
case Instruction::Select:
843847
case Instruction::Or:
844-
case VPInstruction::PtrAdd:
845848
// TODO: Cover additional opcodes.
846849
return vputils::onlyFirstLaneUsed(this);
847850
case VPInstruction::ActiveLaneMask:
@@ -852,6 +855,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
852855
case VPInstruction::BranchOnCond:
853856
case VPInstruction::ResumePhi:
854857
return true;
858+
case VPInstruction::PtrAdd:
859+
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
855860
};
856861
llvm_unreachable("switch should return");
857862
}
@@ -924,6 +929,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
924929
case VPInstruction::BranchOnCount:
925930
O << "branch-on-count";
926931
break;
932+
case VPInstruction::Broadcast:
933+
O << "broadcast";
934+
break;
935+
927936
case VPInstruction::ExtractFromEnd:
928937
O << "extract-from-end";
929938
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2151,3 +2151,34 @@ void VPlanTransforms::handleUncountableEarlyExit(
21512151
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21522152
LatchExitingBranch->eraseFromParent();
21532153
}
2154+
2155+
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
2156+
VPDominatorTree VPDT;
2157+
VPDT.recalculate(Plan);
2158+
auto *VectorPreheader = Plan.getVectorPreheader();
2159+
VPBuilder Builder(VectorPreheader);
2160+
for (VPValue *LiveIn : Plan.getLiveIns()) {
2161+
if (all_of(LiveIn->users(),
2162+
[LiveIn](VPUser *U) {
2163+
return cast<VPRecipeBase>(U)->usesScalars(LiveIn);
2164+
}) ||
2165+
!LiveIn->getLiveInIRValue() ||
2166+
isa<Constant>(LiveIn->getLiveInIRValue()))
2167+
continue;
2168+
2169+
// Add explicit broadcast if the vector preheader dominates all users.
2170+
// TODO: Find valid inert point for all users.
2171+
if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) {
2172+
return VectorPreheader != cast<VPRecipeBase>(U)->getParent() &&
2173+
VPDT.dominates(VectorPreheader,
2174+
cast<VPRecipeBase>(U)->getParent());
2175+
})) {
2176+
auto *Broadcast =
2177+
Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
2178+
LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U,
2179+
unsigned Idx) {
2180+
return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn);
2181+
});
2182+
}
2183+
}
2184+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ struct VPlanTransforms {
173173
static void
174174
optimizeInductionExitUsers(VPlan &Plan,
175175
DenseMap<VPValue *, VPValue *> &EndValues);
176+
177+
/// Add explicit broadcasts for live-ins used as vectors.
178+
static void materializeBroadcasts(VPlan &Plan);
176179
};
177180

178181
} // namespace llvm

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2022
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2123
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2224
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2325
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2426
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
2527
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,14 +103,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
103103
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
104104
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105105
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
107+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
106108
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107109
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108110
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
109111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110112
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
111113
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115115
; CHECK: vector.body:
116116
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
124124
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
125125
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
126126
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
127+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
128+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
127129
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
128130
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
129131
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
130132
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
131133
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
132134
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
133-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136136
; CHECK: [[VECTOR_BODY]]:
137137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -248,21 +248,21 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
248248
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
249249
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
250250
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
251+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
252+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251253
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252254
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253255
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254256
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
255-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
256-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
257-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
257+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
258258
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
259259
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
260260
; CHECK: [[VECTOR_BODY]]:
261261
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
262262
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
263263
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264264
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
265-
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
265+
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
266266
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
267267
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
268268
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
283283
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
284284
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
285285
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
286-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
286+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
287287
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
288288
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
289289
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)