Skip to content

Commit 883a01b

Browse files
SamTebbs33Lukacma
authored andcommitted
[LV] Bundle partial reductions inside VPExpressionRecipe (llvm#147302)
This PR bundles partial reductions inside the VPExpressionRecipe class. Stacked PRs: 1. llvm#147026 2. llvm#147255 3. llvm#156976 4. llvm#160154 5. -> llvm#147302 6. llvm#162503 7. llvm#147513
1 parent cd06b6d commit 883a01b

21 files changed

+1042
-731
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,9 @@ class TargetTransformInfo {
227227
/// Get the kind of extension that an instruction represents.
228228
LLVM_ABI static PartialReductionExtendKind
229229
getPartialReductionExtendKind(Instruction *I);
230+
/// Get the kind of extension that a cast opcode represents.
231+
LLVM_ABI static PartialReductionExtendKind
232+
getPartialReductionExtendKind(Instruction::CastOps CastOpc);
230233

231234
/// Construct a TTI object using a type implementing the \c Concept
232235
/// API below.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,13 +1001,25 @@ InstructionCost TargetTransformInfo::getShuffleCost(
10011001

10021002
TargetTransformInfo::PartialReductionExtendKind
10031003
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
1004-
if (isa<SExtInst>(I))
1005-
return PR_SignExtend;
1006-
if (isa<ZExtInst>(I))
1007-
return PR_ZeroExtend;
1004+
if (auto *Cast = dyn_cast<CastInst>(I))
1005+
return getPartialReductionExtendKind(Cast->getOpcode());
10081006
return PR_None;
10091007
}
10101008

1009+
TargetTransformInfo::PartialReductionExtendKind
1010+
TargetTransformInfo::getPartialReductionExtendKind(
1011+
Instruction::CastOps CastOpc) {
1012+
switch (CastOpc) {
1013+
case Instruction::CastOps::ZExt:
1014+
return PR_ZeroExtend;
1015+
case Instruction::CastOps::SExt:
1016+
return PR_SignExtend;
1017+
default:
1018+
return PR_None;
1019+
}
1020+
llvm_unreachable("Unhandled cast opcode");
1021+
}
1022+
10111023
TTI::CastContextHint
10121024
TargetTransformInfo::getCastContextHint(const Instruction *I) {
10131025
if (!I)

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2712,7 +2712,8 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
27122712

27132713
static inline bool classof(const VPRecipeBase *R) {
27142714
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2715-
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2715+
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2716+
R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
27162717
}
27172718

27182719
static inline bool classof(const VPUser *U) {
@@ -2783,7 +2784,10 @@ class VPPartialReductionRecipe : public VPReductionRecipe {
27832784
Opcode(Opcode), VFScaleFactor(ScaleFactor) {
27842785
[[maybe_unused]] auto *AccumulatorRecipe =
27852786
getChainOp()->getDefiningRecipe();
2786-
assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2787+
// When cloning as part of a VPExpressionRecipe the chain op could have
2788+
// replaced by a temporary VPValue, so it doesn't have a defining recipe.
2789+
assert((!AccumulatorRecipe ||
2790+
isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
27872791
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
27882792
"Unexpected operand order for partial reduction recipe");
27892793
}
@@ -3093,6 +3097,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
30933097
/// removed before codegen.
30943098
void decompose();
30953099

3100+
unsigned getVFScaleFactor() const {
3101+
auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3102+
return PR ? PR->getVFScaleFactor() : 1;
3103+
}
3104+
30963105
/// Method for generating code, must not be called as this recipe is abstract.
30973106
void execute(VPTransformState &State) override {
30983107
llvm_unreachable("recipe must be removed before execute");

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
168168
return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
169169
case VPBlendSC:
170170
case VPReductionEVLSC:
171+
case VPPartialReductionSC:
171172
case VPReductionSC:
172173
case VPScalarIVStepsSC:
173174
case VPVectorPointerSC:
@@ -300,14 +301,23 @@ InstructionCost
300301
VPPartialReductionRecipe::computeCost(ElementCount VF,
301302
VPCostContext &Ctx) const {
302303
std::optional<unsigned> Opcode;
303-
VPValue *Op = getOperand(0);
304-
VPRecipeBase *OpR = Op->getDefiningRecipe();
305-
306-
// If the partial reduction is predicated, a select will be operand 0
307-
if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
308-
OpR = Op->getDefiningRecipe();
304+
VPValue *Op = getVecOp();
305+
uint64_t MulConst;
306+
// If the partial reduction is predicated, a select will be operand 1.
307+
// If it isn't predicated and the mul isn't operating on a constant, then it
308+
// should have been turned into a VPExpressionRecipe.
309+
// FIXME: Replace the entire function with this once all partial reduction
310+
// variants are bundled into VPExpressionRecipe.
311+
if (!match(Op, m_Select(m_VPValue(), m_VPValue(Op), m_VPValue())) &&
312+
!match(Op, m_Mul(m_VPValue(), m_ConstantInt(MulConst)))) {
313+
auto *PhiType = Ctx.Types.inferScalarType(getChainOp());
314+
auto *InputType = Ctx.Types.inferScalarType(getVecOp());
315+
return Ctx.TTI.getPartialReductionCost(getOpcode(), InputType, InputType,
316+
PhiType, VF, TTI::PR_None,
317+
TTI::PR_None, {}, Ctx.CostKind);
309318
}
310319

320+
VPRecipeBase *OpR = Op->getDefiningRecipe();
311321
Type *InputTypeA = nullptr, *InputTypeB = nullptr;
312322
TTI::PartialReductionExtendKind ExtAType = TTI::PR_None,
313323
ExtBType = TTI::PR_None;
@@ -2856,11 +2866,19 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
28562866
cast<VPReductionRecipe>(ExpressionRecipes.back())->getRecurrenceKind());
28572867
switch (ExpressionType) {
28582868
case ExpressionTypes::ExtendedReduction: {
2859-
return Ctx.TTI.getExtendedReductionCost(
2860-
Opcode,
2861-
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
2862-
Instruction::ZExt,
2863-
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
2869+
unsigned Opcode = RecurrenceDescriptor::getOpcode(
2870+
cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
2871+
auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2872+
return isa<VPPartialReductionRecipe>(ExpressionRecipes.back())
2873+
? Ctx.TTI.getPartialReductionCost(
2874+
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr,
2875+
RedTy, VF,
2876+
TargetTransformInfo::getPartialReductionExtendKind(
2877+
ExtR->getOpcode()),
2878+
TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind)
2879+
: Ctx.TTI.getExtendedReductionCost(
2880+
Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy,
2881+
SrcVecTy, std::nullopt, Ctx.CostKind);
28642882
}
28652883
case ExpressionTypes::MulAccReduction:
28662884
return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,
@@ -2871,6 +2889,19 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
28712889
Opcode = Instruction::Sub;
28722890
[[fallthrough]];
28732891
case ExpressionTypes::ExtMulAccReduction: {
2892+
if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
2893+
auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2894+
auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
2895+
auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
2896+
return Ctx.TTI.getPartialReductionCost(
2897+
Opcode, Ctx.Types.inferScalarType(getOperand(0)),
2898+
Ctx.Types.inferScalarType(getOperand(1)), RedTy, VF,
2899+
TargetTransformInfo::getPartialReductionExtendKind(
2900+
Ext0R->getOpcode()),
2901+
TargetTransformInfo::getPartialReductionExtendKind(
2902+
Ext1R->getOpcode()),
2903+
Mul->getOpcode(), Ctx.CostKind);
2904+
}
28742905
return Ctx.TTI.getMulAccReductionCost(
28752906
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
28762907
Instruction::ZExt,
@@ -2910,12 +2941,13 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
29102941
O << " = ";
29112942
auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());
29122943
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
2944+
bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);
29132945

29142946
switch (ExpressionType) {
29152947
case ExpressionTypes::ExtendedReduction: {
29162948
getOperand(1)->printAsOperand(O, SlotTracker);
2917-
O << " +";
2918-
O << " reduce." << Instruction::getOpcodeName(Opcode) << " (";
2949+
O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
2950+
O << Instruction::getOpcodeName(Opcode) << " (";
29192951
getOperand(0)->printAsOperand(O, SlotTracker);
29202952
Red->printFlags(O);
29212953

@@ -2931,8 +2963,8 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
29312963
}
29322964
case ExpressionTypes::ExtNegatedMulAccReduction: {
29332965
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
2934-
O << " + reduce."
2935-
<< Instruction::getOpcodeName(
2966+
O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
2967+
O << Instruction::getOpcodeName(
29362968
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
29372969
<< " (sub (0, mul";
29382970
auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
@@ -2956,9 +2988,8 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
29562988
case ExpressionTypes::MulAccReduction:
29572989
case ExpressionTypes::ExtMulAccReduction: {
29582990
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
2959-
O << " + ";
2960-
O << "reduce."
2961-
<< Instruction::getOpcodeName(
2991+
O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
2992+
O << Instruction::getOpcodeName(
29622993
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
29632994
<< " (";
29642995
O << "mul";

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3519,18 +3519,31 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
35193519
VPValue *VecOp = Red->getVecOp();
35203520

35213521
// Clamp the range if using extended-reduction is profitable.
3522-
auto IsExtendedRedValidAndClampRange = [&](unsigned Opcode, bool isZExt,
3523-
Type *SrcTy) -> bool {
3522+
auto IsExtendedRedValidAndClampRange =
3523+
[&](unsigned Opcode, Instruction::CastOps ExtOpc, Type *SrcTy) -> bool {
35243524
return LoopVectorizationPlanner::getDecisionAndClampRange(
35253525
[&](ElementCount VF) {
35263526
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
35273527
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
3528-
InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3529-
Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
3530-
CostKind);
3528+
3529+
InstructionCost ExtRedCost;
35313530
InstructionCost ExtCost =
35323531
cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
35333532
InstructionCost RedCost = Red->computeCost(VF, Ctx);
3533+
3534+
if (isa<VPPartialReductionRecipe>(Red)) {
3535+
TargetTransformInfo::PartialReductionExtendKind ExtKind =
3536+
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
3537+
// FIXME: Move partial reduction creation, costing and clamping
3538+
// here from LoopVectorize.cpp.
3539+
ExtRedCost = Ctx.TTI.getPartialReductionCost(
3540+
Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
3541+
llvm::TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind);
3542+
} else {
3543+
ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3544+
Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3545+
Red->getFastMathFlags(), CostKind);
3546+
}
35343547
return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
35353548
},
35363549
Range);
@@ -3541,8 +3554,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
35413554
if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) &&
35423555
IsExtendedRedValidAndClampRange(
35433556
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()),
3544-
cast<VPWidenCastRecipe>(VecOp)->getOpcode() ==
3545-
Instruction::CastOps::ZExt,
3557+
cast<VPWidenCastRecipe>(VecOp)->getOpcode(),
35463558
Ctx.Types.inferScalarType(A)))
35473559
return new VPExpressionRecipe(cast<VPWidenCastRecipe>(VecOp), Red);
35483560

@@ -3560,6 +3572,8 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
35603572
static VPExpressionRecipe *
35613573
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
35623574
VPCostContext &Ctx, VFRange &Range) {
3575+
bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);
3576+
35633577
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
35643578
if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
35653579
return nullptr;
@@ -3568,16 +3582,41 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
35683582

35693583
// Clamp the range if using multiply-accumulate-reduction is profitable.
35703584
auto IsMulAccValidAndClampRange =
3571-
[&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
3572-
VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool {
3585+
[&](VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3586+
VPWidenCastRecipe *OuterExt) -> bool {
35733587
return LoopVectorizationPlanner::getDecisionAndClampRange(
35743588
[&](ElementCount VF) {
35753589
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
35763590
Type *SrcTy =
35773591
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3578-
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
3579-
InstructionCost MulAccCost = Ctx.TTI.getMulAccReductionCost(
3580-
isZExt, Opcode, RedTy, SrcVecTy, CostKind);
3592+
InstructionCost MulAccCost;
3593+
3594+
if (IsPartialReduction) {
3595+
Type *SrcTy2 =
3596+
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr;
3597+
// FIXME: Move partial reduction creation, costing and clamping
3598+
// here from LoopVectorize.cpp.
3599+
MulAccCost = Ctx.TTI.getPartialReductionCost(
3600+
Opcode, SrcTy, SrcTy2, RedTy, VF,
3601+
Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
3602+
Ext0->getOpcode())
3603+
: TargetTransformInfo::PR_None,
3604+
Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
3605+
Ext1->getOpcode())
3606+
: TargetTransformInfo::PR_None,
3607+
Mul->getOpcode(), CostKind);
3608+
} else {
3609+
// Only partial reductions support mixed extends at the moment.
3610+
if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3611+
return false;
3612+
3613+
bool IsZExt =
3614+
!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3615+
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
3616+
MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3617+
SrcVecTy, CostKind);
3618+
}
3619+
35813620
InstructionCost MulCost = Mul->computeCost(VF, Ctx);
35823621
InstructionCost RedCost = Red->computeCost(VF, Ctx);
35833622
InstructionCost ExtCost = 0;
@@ -3611,23 +3650,18 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
36113650
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
36123651
auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
36133652

3614-
// Match reduce.add(mul(ext, ext)).
3615-
if (RecipeA && RecipeB &&
3616-
(RecipeA->getOpcode() == RecipeB->getOpcode() || A == B) &&
3617-
match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
3653+
// Match reduce.add/sub(mul(ext, ext)).
3654+
if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
36183655
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
3619-
IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
3620-
Instruction::CastOps::ZExt,
3621-
Mul, RecipeA, RecipeB, nullptr)) {
3656+
IsMulAccValidAndClampRange(Mul, RecipeA, RecipeB, nullptr)) {
36223657
if (Sub)
36233658
return new VPExpressionRecipe(RecipeA, RecipeB, Mul,
36243659
cast<VPWidenRecipe>(Sub), Red);
36253660
return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
36263661
}
36273662
// Match reduce.add(mul).
36283663
// TODO: Add an expression type for this variant with a negated mul
3629-
if (!Sub &&
3630-
IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
3664+
if (!Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr))
36313665
return new VPExpressionRecipe(Mul, Red);
36323666
}
36333667
// TODO: Add an expression type for negated versions of other expression
@@ -3647,9 +3681,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
36473681
cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
36483682
if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
36493683
Ext0->getOpcode() == Ext1->getOpcode() &&
3650-
IsMulAccValidAndClampRange(Ext0->getOpcode() ==
3651-
Instruction::CastOps::ZExt,
3652-
Mul, Ext0, Ext1, Ext)) {
3684+
IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {
36533685
auto *NewExt0 = new VPWidenCastRecipe(
36543686
Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
36553687
*Ext0, Ext0->getDebugLoc());

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
151151
return RR->getVFScaleFactor();
152152
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
153153
return RR->getVFScaleFactor();
154+
if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
155+
return ER->getVFScaleFactor();
154156
assert(
155157
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
156158
VPInstruction::ReductionStartVector) &&

llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
8686
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
8787
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
8888
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
89-
; CHECK: Cost for VF 16: 41
89+
; CHECK: Cost of 1 for VF 16: EXPRESSION vp<%11> = ir<%sum> + partial.reduce.add (mul nuw nsw (ir<%1> zext to i64), (ir<%0> zext to i64))
90+
; CHECK: Cost for VF 16: 3
9091
; CHECK: LV: Selecting VF: 16
9192
entry:
9293
br label %for.body

llvm/test/Transforms/LoopVectorize/AArch64/maxbandwidth-regpressure.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
66
target triple = "aarch64-none-unknown-elf"
77

88
define i32 @dotp(ptr %a, ptr %b) #0 {
9-
; CHECK-REGS-VP-NOT: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
10-
; CHECK-REGS-VP: LV: Selecting VF: vscale x 8.
9+
; CHECK-REGS-VP: LV: Selecting VF: vscale x 16.
1110
;
1211
; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers
1312
; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers

0 commit comments

Comments
 (0)