Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 20 additions & 103 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,11 @@ class LoopVectorizationCostModel {
return InLoopReductions.contains(Phi);
}

/// Returns the set of in-loop reduction PHIs.
const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
return InLoopReductions;
}

/// Returns true if the predicated reduction select should be used to set the
/// incoming value for the reduction phi.
bool usePredicatedReductionSelect() const {
Expand Down Expand Up @@ -7626,58 +7631,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
VPIRMetadata(*Store, LVer), VPI->getDebugLoc());
}

/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
/// also insert a recipe to expand the step for the induction recipe.
static VPWidenIntOrFpInductionRecipe *
createWidenInductionRecipes(VPInstruction *PhiR,
const InductionDescriptor &IndDesc, VPlan &Plan,
ScalarEvolution &SE, Loop &OrigLoop) {
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
"step must be loop invariant");

VPValue *Start = PhiR->getOperand(0);
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
"Start VPValue must match IndDesc's start value");

VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
using namespace llvm::VPlanPatternMatch;
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);

PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
IndDesc, PhiR->getDebugLoc());
}

VPHeaderPHIRecipe *
VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) {
auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());

// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);

// Check if this is pointer induction. If so, build the recipe for it.
if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
return new VPWidenPointerInductionRecipe(
Phi, VPI->getOperand(0), Step, &Plan.getVFxUF(), *II,
LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
return CM.isScalarAfterVectorization(Phi, VF);
},
Range),
VPI->getDebugLoc());
}
return nullptr;
}

VPWidenIntOrFpInductionRecipe *
VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
VFRange &Range) {
Expand Down Expand Up @@ -8154,45 +8107,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
if (auto *PhiR = dyn_cast<VPPhi>(R)) {
VPBasicBlock *Parent = PhiR->getParent();
[[maybe_unused]] VPRegionBlock *LoopRegionOf =
Parent->getEnclosingLoopRegion();
assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
"Non-header phis should have been handled during predication");
auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
if ((Recipe = tryToOptimizeInductionPHI(PhiR, Range)))
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Legal->isFixedOrderRecurrence(Phi)) &&
"can only widen reductions and fixed-order recurrences here");
VPValue *StartV = R->getOperand(0);
if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));

// If the PHI is used by a partial reduction, set the scale factor.
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be modeled
// directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
}
// Add backedge value.
PhiRecipe->addOperand(R->getOperand(1));
return PhiRecipe;
}
assert(!R->isPhi() && "only VPPhi nodes expected at this point");
assert(!R->isPhi() && "phis must be handled earlier");

auto *VPI = cast<VPInstruction>(R);
Instruction *Instr = R->getUnderlyingInstr();
Expand Down Expand Up @@ -8249,6 +8164,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
std::swap(BinOp, Accumulator);

if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
RedPhiR->setVFScaleFactor(ScaleFactor);

assert(ScaleFactor ==
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
"all accumulators in chain must have same scale factor");
Expand Down Expand Up @@ -8295,6 +8213,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
OrigLoop, *LI, Legal->getWidestInductionType(),
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE);

// Create recipes for header phis.
VPlanTransforms::createHeaderPhiRecipes(
*VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
CM.getInLoopReductions(), Hints.allowReordering());

auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
Expand Down Expand Up @@ -8415,25 +8339,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;

// Now process all other blocks and instructions.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *SingleDef = cast<VPSingleDefRecipe>(&R);
auto *UnderlyingValue = SingleDef->getUnderlyingValue();
// Skip recipes that do not need transforming, including canonical IV,
// wide canonical IV and VPInstructions without underlying values. The
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
auto *SingleDef = dyn_cast<VPInstruction>(&R);
if (!SingleDef || !SingleDef->getUnderlyingValue())
continue;
assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");

// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
Instruction *Instr = cast<Instruction>(UnderlyingValue);
Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
Builder.setInsertPoint(SingleDef);

// The stores with invariant address inside the loop will be deleted, and
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,6 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);

/// Check if an induction recipe should be constructed for \p VPI. If so build
/// and return it. If not, return null.
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI,
VFRange &Range);

/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
VPWidenIntOrFpInductionRecipe *
Expand Down
21 changes: 11 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1084,7 +1084,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
OpcodeTy Opcode;

/// An optional name that can be used for the generated IR instruction.
const std::string Name;
std::string Name;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
Expand Down Expand Up @@ -1183,6 +1183,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,

/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }

void setName(StringRef NewName) { Name = NewName.str(); }
};

/// A specialization of VPInstruction augmenting it with a dedicated result
Expand Down Expand Up @@ -2211,19 +2213,15 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
};

class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
bool IsScalarAfterVectorization;

public:
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start and the number of elements unrolled \p NumUnrolledElems, typically
/// VF*UF.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
VPValue *NumUnrolledElems,
const InductionDescriptor &IndDesc,
bool IsScalarAfterVectorization, DebugLoc DL)
const InductionDescriptor &IndDesc, DebugLoc DL)
: VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
Step, IndDesc, DL),
IsScalarAfterVectorization(IsScalarAfterVectorization) {
Step, IndDesc, DL) {
addOperand(NumUnrolledElems);
}

Expand All @@ -2232,8 +2230,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
getDebugLoc());
getOperand(2), getInductionDescriptor(), getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
Expand Down Expand Up @@ -2309,8 +2306,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)

VPFirstOrderRecurrencePHIRecipe *clone() override {
return new VPFirstOrderRecurrencePHIRecipe(
auto *R = new VPFirstOrderRecurrencePHIRecipe(
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
R->addOperand(getOperand(1));
return R;
}

void execute(VPTransformState &State) override;
Expand Down Expand Up @@ -2379,6 +2378,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }

void setVFScaleFactor(unsigned ScaleFactor) { VFScaleFactor = ScaleFactor; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down
96 changes: 96 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,15 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
Plan.getEntry()->swapSuccessors();

createExtractsForLiveOuts(Plan, MiddleVPBB);

VPBuilder ScalarPHBuilder(ScalarPH);
for (const auto &[PhiR, ScalarPhiR] : zip_equal(
drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
auto *VectorPhiR = cast<VPPhi>(&PhiR);
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
}
}

std::unique_ptr<VPlan>
Expand All @@ -544,6 +553,93 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
return VPlan0;
}

/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
/// for \p Phi based on \p IndDesc.
static VPHeaderPHIRecipe *
createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR,
const InductionDescriptor &IndDesc, VPlan &Plan,
ScalarEvolution &SE, Loop &OrigLoop) {
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
"step must be loop invariant");

VPValue *Start = PhiR->getOperand(0);
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
"Start VPValue must match IndDesc's start value");
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
IndDesc, PhiR->getDebugLoc());

// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
using namespace llvm::VPlanPatternMatch;
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);

return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
IndDesc, PhiR->getDebugLoc());
}

void VPlanTransforms::createHeaderPhiRecipes(
VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
const MapVector<PHINode *, InductionDescriptor> &Inductions,
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {

VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());

for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
auto *PhiR = dyn_cast<VPPhi>(&R);
if (!PhiR)
break;

// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
assert(PhiR->getNumOperands() == 2 &&
"Must have 2 operands for header phis");

VPHeaderPHIRecipe *HeaderPhiR = nullptr;
auto InductionIt = Inductions.find(Phi);
if (InductionIt != Inductions.end()) {
HeaderPhiR = createWidenInductionRecipe(Phi, PhiR, InductionIt->second,
Plan, SE, OrigLoop);
} else {
VPValue *Start = PhiR->getOperand(0);
auto ReductionIt = Reductions.find(Phi);
if (ReductionIt != Reductions.end()) {
const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));

bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
HeaderPhiR = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *Start,
InLoopReductions.contains(Phi), UseOrderedReductions);
} else {
assert(FixedOrderRecurrences.contains(Phi) &&
"can only widen reductions and fixed-order recurrences here");
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be
// modeled directly, enabling more efficient codegen.
HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
}
HeaderPhiR->addOperand(PhiR->getOperand(1));
}
HeaderPhiR->insertBefore(PhiR);
PhiR->replaceAllUsesWith(HeaderPhiR);
PhiR->eraseFromParent();
}
}

void VPlanTransforms::handleEarlyExits(VPlan &Plan,
bool HasUncountableEarlyExit) {
auto *MiddleVPBB = cast<VPBasicBlock>(
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4310,7 +4310,7 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif

bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
return IsScalarAfterVectorization &&
return vputils::onlyScalarValuesUsed(this) &&
(!IsScalable || vputils::onlyFirstLaneUsed(this));
}

Expand Down
Loading
Loading