Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9799,9 +9799,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);

auto *RedRecipe = new VPReductionRecipe(
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
auto *RedRecipe =
new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
CondOp, CM.useOrderedReductions(RdxDesc));
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
Expand Down
28 changes: 20 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,8 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
R->getVPDefID() == VPRecipeBase::VPReductionSC ||
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
Expand Down Expand Up @@ -788,6 +790,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
}
}

/// Set fast-math flags for this recipe.
void setFastMathFlags(FastMathFlags FMFs) {
assert(OpType == OperationType::FPMathOp);
this->FMFs = FMFs;
}

CmpInst::Predicate getPredicate() const {
assert(OpType == OperationType::Cmp &&
"recipe doesn't have a compare predicate");
Expand Down Expand Up @@ -2286,7 +2294,7 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPSingleDefRecipe {
class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence decriptor for the reduction in question.
const RecurrenceDescriptor &RdxDesc;
bool IsOrdered;
Expand All @@ -2296,29 +2304,33 @@ class VPReductionRecipe : public VPSingleDefRecipe {
protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
VPValue *CondOp, bool IsOrdered, DebugLoc DL)
: VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
VPValue *CondOp, bool IsOrdered)
: VPRecipeWithIRFlags(SC, Operands, *I), RdxDesc(R),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
}
// The inloop reduction may across multiple scalar instruction and the
// underlying instruction may not contains the corresponding flags. Set the
// flags explicit from the redurrence descriptor.
if (isa<FPMathOperator>(I))
setFastMathFlags(R.getFastMathFlags());
}

public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
bool IsOrdered)
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
IsOrdered) {}

~VPReductionRecipe() override = default;

VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
getVecOp(), getCondOp(), IsOrdered,
getDebugLoc());
getVecOp(), getCondOp(), IsOrdered);
}

static inline bool classof(const VPRecipeBase *R) {
Expand Down Expand Up @@ -2373,7 +2385,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), R.getDebugLoc()) {}
R.isOrdered()) {}

~VPReductionEVLRecipe() override = default;

Expand Down
22 changes: 12 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2224,7 +2224,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
RecurKind Kind = RdxDesc.getRecurrenceKind();
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
State.setDebugLocFrom(getDebugLoc());
Value *NewVecOp = State.get(getVecOp());
if (VPValue *Cond = getCondOp()) {
Expand Down Expand Up @@ -2275,7 +2276,8 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
if (hasFastMathFlags())
Builder.setFastMathFlags(getFastMathFlags());

RecurKind Kind = RdxDesc.getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
Expand Down Expand Up @@ -2312,6 +2314,8 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
Type *ElementTy = Ctx.Types.inferScalarType(this);
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
unsigned Opcode = RdxDesc.getOpcode();
FastMathFlags FMFs =
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();

// TODO: Support any-of and in-loop reductions.
assert(
Expand All @@ -2331,12 +2335,12 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
return Cost + Ctx.TTI.getMinMaxReductionCost(
Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
return Cost +
Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
}

return Cost + Ctx.TTI.getArithmeticReductionCost(
Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs,
Ctx.CostKind);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand All @@ -2347,8 +2351,7 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
if (isa<FPMathOperator>(getUnderlyingInstr()))
O << getUnderlyingInstr()->getFastMathFlags();
printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
Expand All @@ -2369,8 +2372,7 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
if (isa<FPMathOperator>(getUnderlyingInstr()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not major but if we add back the isa check then we remove the diff for the integer reduction test

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add it back the prevent integer reduction test changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can we end up with integer reductions with FMFs set?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I understand an integer RecurrenceDescriptor has all fast math flags set:

// Start with all flags set because we will intersect this with the reduction
// flags from all the reduction operations.
FastMathFlags FMF = FastMathFlags::getFast();

So currently today an integer VPReductionRecipe will use a builder with all fast math flags set:

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
  State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but we shouldn't further propagate incorrect flags to VPReductionRecipe. We should probably check if it is a FP reduction on construction, and if it isn't set empty (fast-math) flags.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. I think @ElvisWang123 had already done something similar in a previous version of this PR like

     if (isa<FPMathOperator>(I))
       setFastMathFlags(R.getFastMathFlags());

in the constructor. Should that be done as a part of this PR or in a follow up?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be done straight away, I added a suggested edit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed checks, thanks!

O << getUnderlyingInstr()->getFastMathFlags();
printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + nsw vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you rebase to latest main? This change shouldn't be needed any longer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rebase, fixed.

; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
Expand Down
24 changes: 18 additions & 6 deletions llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,29 +1165,35 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
}

{
auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
PoisonValue::get(Int32));
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
EXPECT_FALSE(Recipe.mayReadOrWriteMemory());
delete Add;
}

{
auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
PoisonValue::get(Int32));
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_FALSE(EVLRecipe.mayHaveSideEffects());
EXPECT_FALSE(EVLRecipe.mayReadFromMemory());
EXPECT_FALSE(EVLRecipe.mayWriteToMemory());
EXPECT_FALSE(EVLRecipe.mayReadOrWriteMemory());
delete Add;
}

{
Expand Down Expand Up @@ -1529,28 +1535,34 @@ TEST_F(VPRecipeTest, dumpRecipeUnnamedVPValuesNotInPlanOrBlock) {

TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
PoisonValue::get(Int32));
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
VecOp, false);
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
false);
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
delete Add;
}

TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
PoisonValue::get(Int32));
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
VecOp, false);
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
false);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
VPRecipeBase *BaseR = &EVLRecipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
delete Add;
}
} // namespace

Expand Down
Loading