Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9810,8 +9810,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);

// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
: FastMathFlags();
auto *RedRecipe = new VPReductionRecipe(
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
Expand Down
38 changes: 17 additions & 21 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2239,22 +2239,19 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence decriptor for the reduction in question.
const RecurrenceDescriptor &RdxDesc;
/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;

protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
VPValue *CondOp, bool IsOrdered, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands,
isa_and_nonnull<FPMathOperator>(I)
? R.getFastMathFlags()
: FastMathFlags(),
DL),
RdxDesc(R), IsOrdered(IsOrdered) {
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
bool IsOrdered, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
Expand All @@ -2263,19 +2260,19 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
}

public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}

~VPReductionRecipe() override = default;

VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
getVecOp(), getCondOp(), IsOrdered,
getDebugLoc());
return new VPReductionRecipe(RdxKind, getFastMathFlags(),
getUnderlyingInstr(), getChainOp(), getVecOp(),
getCondOp(), IsOrdered, getDebugLoc());
}

static inline bool classof(const VPRecipeBase *R) {
Expand All @@ -2301,10 +2298,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPSlotTracker &SlotTracker) const override;
#endif

/// Return the recurrence decriptor for the in-loop reduction.
const RecurrenceDescriptor &getRecurrenceDescriptor() const {
return RdxDesc;
}
/// Return the recurrence kind for the in-loop reduction.
RecurKind getRecurrenceKind() const { return RdxKind; }
/// Return true if the in-loop reduction is ordered.
bool isOrdered() const { return IsOrdered; };
/// Return true if the in-loop reduction is conditional.
Expand All @@ -2328,7 +2323,8 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
DebugLoc DL = {})
: VPReductionRecipe(
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
Expand Down
46 changes: 21 additions & 25 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2285,7 +2285,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
RecurKind Kind = getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"In-loop AnyOf reductions aren't currently supported");
// Propagate the fast-math flags carried by the underlying instruction.
Expand All @@ -2298,8 +2298,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();

Value *Start =
getRecurrenceIdentity(Kind, ElementTy, RdxDesc.getFastMathFlags());
Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());
if (State.VF.isVector())
Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);

Expand All @@ -2314,18 +2313,19 @@ void VPReductionRecipe::execute(VPTransformState &State) {
createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),
PrevInChain, NewVecOp);
PrevInChain = NewRed;
NextInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
NewRed, PrevInChain);
NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
else
NextInChain = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind), NewRed,
PrevInChain);
}
State.set(this, NextInChain, /*IsScalar*/ true);
}
Expand All @@ -2336,10 +2336,9 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
Builder.setFastMathFlags(getFastMathFlags());

RecurKind Kind = RdxDesc.getRecurrenceKind();
RecurKind Kind = getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
Value *VecOp = State.get(getVecOp());
Value *EVL = State.get(getEVL(), VPLane(0));
Expand All @@ -2362,18 +2361,19 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
else
NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
NewRed, Prev);
NewRed = Builder.CreateBinOp(
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind), NewRed,
Prev);
}
State.set(this, NewRed, /*IsScalar*/ true);
}

InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
RecurKind RdxKind = RdxDesc.getRecurrenceKind();
RecurKind RdxKind = getRecurrenceKind();
Type *ElementTy = Ctx.Types.inferScalarType(this);
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
unsigned Opcode = RdxDesc.getOpcode();
unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
FastMathFlags FMFs = getFastMathFlags();

// TODO: Support any-of and in-loop reductions.
Expand All @@ -2386,9 +2386,6 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
ForceTargetInstructionCost.getNumOccurrences() > 0) &&
"In-loop reduction not implemented in VPlan-based cost model currently.");

assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
"Inferred type and recurrence type mismatch.");

// Cost = Reduction cost + BinOp cost
InstructionCost Cost =
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
Expand All @@ -2411,28 +2408,30 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
O << " reduce."
<< Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
<< " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
O << ", ";
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
if (RdxDesc.IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
Comment on lines -2436 to -2438
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK to drop this, as the store is sunk explicitly.

}

void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
O << " vp.reduce."
<< Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
<< " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
getEVL()->printAsOperand(O, SlotTracker);
Expand All @@ -2441,9 +2440,6 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
if (RdxDesc.IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
Expand Down
16 changes: 8 additions & 8 deletions llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,8 +1170,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
Expand All @@ -1185,8 +1185,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_FALSE(EVLRecipe.mayHaveSideEffects());
Expand Down Expand Up @@ -1540,8 +1540,8 @@ TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
Expand All @@ -1555,8 +1555,8 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
Expand Down