Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ class TargetTransformInfo {
/// Get the kind of extension that an instruction represents.
LLVM_ABI static PartialReductionExtendKind
getPartialReductionExtendKind(Instruction *I);
/// Get the kind of extension that a cast opcode represents.
LLVM_ABI static PartialReductionExtendKind
getPartialReductionExtendKind(Instruction::CastOps CastOpc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be good to add a brief comment

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


/// Construct a TTI object using a type implementing the \c Concept
/// API below.
Expand Down
20 changes: 16 additions & 4 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,13 +1001,25 @@ InstructionCost TargetTransformInfo::getShuffleCost(

TargetTransformInfo::PartialReductionExtendKind
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
if (isa<SExtInst>(I))
return PR_SignExtend;
if (isa<ZExtInst>(I))
return PR_ZeroExtend;
if (auto *Cast = dyn_cast<CastInst>(I))
return getPartialReductionExtendKind(Cast->getOpcode());
return PR_None;
}

TargetTransformInfo::PartialReductionExtendKind
TargetTransformInfo::getPartialReductionExtendKind(
Instruction::CastOps CastOpc) {
switch (CastOpc) {
case Instruction::CastOps::ZExt:
return PR_ZeroExtend;
case Instruction::CastOps::SExt:
return PR_SignExtend;
default:
return PR_None;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you might need llvm_unreachable() at the bottom, I think some bots will complain otherwise

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks.

llvm_unreachable("Unhandled cast opcode");
}

TTI::CastContextHint
TargetTransformInfo::getCastContextHint(const Instruction *I) {
if (!I)
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2712,7 +2712,8 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {

static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this was missed before and only now is tested?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's right.

}

static inline bool classof(const VPUser *U) {
Expand Down Expand Up @@ -2783,7 +2784,10 @@ class VPPartialReductionRecipe : public VPReductionRecipe {
Opcode(Opcode), VFScaleFactor(ScaleFactor) {
[[maybe_unused]] auto *AccumulatorRecipe =
getChainOp()->getDefiningRecipe();
assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
// When cloning as part of a VPExpressionRecipe the chain op could have
// replaced by a temporary VPValue, so it doesn't have a defining recipe.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for this case, can we assert that it is a live-in w/o underlying value?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to assert that?

assert((!AccumulatorRecipe ||
isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
"Unexpected operand order for partial reduction recipe");
}
Expand Down Expand Up @@ -3093,6 +3097,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
/// removed before codegen.
void decompose();

unsigned getVFScaleFactor() const {
auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
return PR ? PR->getVFScaleFactor() : 1;
}

/// Method for generating code, must not be called as this recipe is abstract.
void execute(VPTransformState &State) override {
llvm_unreachable("recipe must be removed before execute");
Expand Down
67 changes: 49 additions & 18 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
case VPBlendSC:
case VPReductionEVLSC:
case VPPartialReductionSC:
case VPReductionSC:
case VPScalarIVStepsSC:
case VPVectorPointerSC:
Expand Down Expand Up @@ -300,14 +301,23 @@ InstructionCost
VPPartialReductionRecipe::computeCost(ElementCount VF,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the change you made to this function, it made me realise that this entirely function can be simplified to:

auto *PhiType = Ctx.Types.inferScalarType(getChainOp());
auto *InputType = Ctx.Types.inferScalarType(getVecOp());
return Ctx.TTI.getPartialReductionCost(getOpcode(), InputType, InputType,
                                       PhiType, VF, TTI::PR_None, TTI::PR_None,
                                       {}, Ctx.CostKind);

Because there is no need to re-analyse all the expressions again, all this information should have already been expressed by a VPExpression's and its corresponding cost model. (Note that the cost-model for AArch64 will return Invalid cost for all the costs if there is no BinOp and no extend, but that's fine, because we wouldn't want to select a partial reduction anyway if there wasn't any type-extension going on)

That being said, using VPExpressions for partial reductions doesn't currently support predicated vector loops, because that introduces another select in the loop, which isn't recognised by any of the VPExpressions as of today, so you can put the above suggested code under a if (!match(Op, m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {} condition.

It would also be good to add a FIXME to say that that complicated cost-model code here should be removed after fully migrating to VPExpressions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thank you.

VPCostContext &Ctx) const {
std::optional<unsigned> Opcode;
VPValue *Op = getOperand(0);
VPRecipeBase *OpR = Op->getDefiningRecipe();

// If the partial reduction is predicated, a select will be operand 0
if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
OpR = Op->getDefiningRecipe();
VPValue *Op = getVecOp();
uint64_t MulConst;
// If the partial reduction is predicated, a select will be operand 1.
// If it isn't predicated and the mul isn't operating on a constant, then it
// should have been turned into a VPExpressionRecipe.
// FIXME: Replace the entire function with this once all partial reduction
// variants are bundled into VPExpressionRecipe.
if (!match(Op, m_Select(m_VPValue(), m_VPValue(Op), m_VPValue())) &&
!match(Op, m_Mul(m_VPValue(), m_ConstantInt(MulConst)))) {
auto *PhiType = Ctx.Types.inferScalarType(getChainOp());
auto *InputType = Ctx.Types.inferScalarType(getVecOp());
return Ctx.TTI.getPartialReductionCost(getOpcode(), InputType, InputType,
PhiType, VF, TTI::PR_None,
TTI::PR_None, {}, Ctx.CostKind);
}

VPRecipeBase *OpR = Op->getDefiningRecipe();
Type *InputTypeA = nullptr, *InputTypeB = nullptr;
TTI::PartialReductionExtendKind ExtAType = TTI::PR_None,
ExtBType = TTI::PR_None;
Expand Down Expand Up @@ -2856,11 +2866,19 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
cast<VPReductionRecipe>(ExpressionRecipes.back())->getRecurrenceKind());
switch (ExpressionType) {
case ExpressionTypes::ExtendedReduction: {
return Ctx.TTI.getExtendedReductionCost(
Opcode,
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
unsigned Opcode = RecurrenceDescriptor::getOpcode(
cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
return isa<VPPartialReductionRecipe>(ExpressionRecipes.back())
? Ctx.TTI.getPartialReductionCost(
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr,
RedTy, VF,
TargetTransformInfo::getPartialReductionExtendKind(
ExtR->getOpcode()),
TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind)
: Ctx.TTI.getExtendedReductionCost(
Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy,
SrcVecTy, std::nullopt, Ctx.CostKind);
}
case ExpressionTypes::MulAccReduction:
return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,
Expand All @@ -2871,6 +2889,19 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
Opcode = Instruction::Sub;
[[fallthrough]];
case ExpressionTypes::ExtMulAccReduction: {
if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
Comment on lines +2893 to +2894
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work as expected for all test on current main? I think at least in some cases one of the operands may be a constant live-in.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the matching function in VPlanTransforms explicitly checks for two extends, so the constant variant doesn't get bundled currently. I'm happy to get that working separately if necessary.

auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
return Ctx.TTI.getPartialReductionCost(
Opcode, Ctx.Types.inferScalarType(getOperand(0)),
Ctx.Types.inferScalarType(getOperand(1)), RedTy, VF,
TargetTransformInfo::getPartialReductionExtendKind(
Ext0R->getOpcode()),
TargetTransformInfo::getPartialReductionExtendKind(
Ext1R->getOpcode()),
Mul->getOpcode(), Ctx.CostKind);
}
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
Expand Down Expand Up @@ -2910,12 +2941,13 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);

switch (ExpressionType) {
case ExpressionTypes::ExtendedReduction: {
getOperand(1)->printAsOperand(O, SlotTracker);
O << " +";
O << " reduce." << Instruction::getOpcodeName(Opcode) << " (";
O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(Opcode) << " (";
getOperand(0)->printAsOperand(O, SlotTracker);
Red->printFlags(O);

Expand All @@ -2931,8 +2963,8 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
}
case ExpressionTypes::ExtNegatedMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
O << " + reduce."
<< Instruction::getOpcodeName(
O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (sub (0, mul";
auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
Expand All @@ -2956,9 +2988,8 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
O << " + ";
O << "reduce."
<< Instruction::getOpcodeName(
O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (";
O << "mul";
Expand Down
80 changes: 56 additions & 24 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3519,18 +3519,31 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
VPValue *VecOp = Red->getVecOp();

// Clamp the range if using extended-reduction is profitable.
auto IsExtendedRedValidAndClampRange = [&](unsigned Opcode, bool isZExt,
Type *SrcTy) -> bool {
auto IsExtendedRedValidAndClampRange =
[&](unsigned Opcode, Instruction::CastOps ExtOpc, Type *SrcTy) -> bool {
return LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
CostKind);

InstructionCost ExtRedCost;
InstructionCost ExtCost =
cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
InstructionCost RedCost = Red->computeCost(VF, Ctx);

if (isa<VPPartialReductionRecipe>(Red)) {
TargetTransformInfo::PartialReductionExtendKind ExtKind =
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
ExtRedCost = Ctx.TTI.getPartialReductionCost(
Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
llvm::TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind);
} else {
ExtRedCost = Ctx.TTI.getExtendedReductionCost(
Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
Red->getFastMathFlags(), CostKind);
}
return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
},
Range);
Expand All @@ -3541,8 +3554,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) &&
IsExtendedRedValidAndClampRange(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()),
cast<VPWidenCastRecipe>(VecOp)->getOpcode() ==
Instruction::CastOps::ZExt,
cast<VPWidenCastRecipe>(VecOp)->getOpcode(),
Ctx.Types.inferScalarType(A)))
return new VPExpressionRecipe(cast<VPWidenCastRecipe>(VecOp), Red);

Expand All @@ -3560,6 +3572,8 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
static VPExpressionRecipe *
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
VPCostContext &Ctx, VFRange &Range) {
bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);

unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
return nullptr;
Expand All @@ -3568,16 +3582,41 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,

// Clamp the range if using multiply-accumulate-reduction is profitable.
auto IsMulAccValidAndClampRange =
[&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool {
[&](VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
VPWidenCastRecipe *OuterExt) -> bool {
return LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *SrcTy =
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
InstructionCost MulAccCost = Ctx.TTI.getMulAccReductionCost(
isZExt, Opcode, RedTy, SrcVecTy, CostKind);
InstructionCost MulAccCost;

if (IsPartialReduction) {
Type *SrcTy2 =
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr;
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
MulAccCost = Ctx.TTI.getPartialReductionCost(
Opcode, SrcTy, SrcTy2, RedTy, VF,
Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
Ext0->getOpcode())
: TargetTransformInfo::PR_None,
Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
Ext1->getOpcode())
: TargetTransformInfo::PR_None,
Mul->getOpcode(), CostKind);
} else {
// Only partial reductions support mixed extends at the moment.
if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
return false;

bool IsZExt =
!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
SrcVecTy, CostKind);
}

InstructionCost MulCost = Mul->computeCost(VF, Ctx);
InstructionCost RedCost = Red->computeCost(VF, Ctx);
InstructionCost ExtCost = 0;
Expand Down Expand Up @@ -3611,23 +3650,18 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());

// Match reduce.add(mul(ext, ext)).
if (RecipeA && RecipeB &&
(RecipeA->getOpcode() == RecipeB->getOpcode() || A == B) &&
match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
// Match reduce.add/sub(mul(ext, ext)).
if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
Instruction::CastOps::ZExt,
Mul, RecipeA, RecipeB, nullptr)) {
IsMulAccValidAndClampRange(Mul, RecipeA, RecipeB, nullptr)) {
if (Sub)
return new VPExpressionRecipe(RecipeA, RecipeB, Mul,
cast<VPWidenRecipe>(Sub), Red);
return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
}
// Match reduce.add(mul).
// TODO: Add an expression type for this variant with a negated mul
if (!Sub &&
IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
if (!Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr))
return new VPExpressionRecipe(Mul, Red);
}
// TODO: Add an expression type for negated versions of other expression
Expand All @@ -3647,9 +3681,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
Ext0->getOpcode() == Ext1->getOpcode() &&
IsMulAccValidAndClampRange(Ext0->getOpcode() ==
Instruction::CastOps::ZExt,
Mul, Ext0, Ext1, Ext)) {
IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {
auto *NewExt0 = new VPWidenCastRecipe(
Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
*Ext0, Ext0->getDebugLoc());
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
return RR->getVFScaleFactor();
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
return RR->getVFScaleFactor();
if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
return ER->getVFScaleFactor();
assert(
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
VPInstruction::ReductionStartVector) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK: Cost for VF 16: 41
; CHECK: Cost of 1 for VF 16: EXPRESSION vp<%11> = ir<%sum> + partial.reduce.add (mul nuw nsw (ir<%1> zext to i64), (ir<%0> zext to i64))
; CHECK: Cost for VF 16: 3
; CHECK: LV: Selecting VF: 16
entry:
br label %for.body
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-none-unknown-elf"

define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-REGS-VP-NOT: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
; CHECK-REGS-VP: LV: Selecting VF: vscale x 8.
; CHECK-REGS-VP: LV: Selecting VF: vscale x 16.
;
; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers
; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
Expand Down
Loading
Loading