Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ class LoopVectorizationLegality {
/// induction descriptor.
using InductionList = MapVector<PHINode *, InductionDescriptor>;

/// MonotonicPHIList saves monotonic phi variables and maps them to the
/// monotonic phi descriptor.
using MonotonicPHIList = MapVector<PHINode *, MonotonicDescriptor>;

/// RecurrenceSet contains the phi nodes that are recurrences other than
/// inductions and reductions.
using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
Expand Down Expand Up @@ -315,6 +319,11 @@ class LoopVectorizationLegality {
/// Returns the induction variables found in the loop.
const InductionList &getInductionVars() const { return Inductions; }

/// Returns the monotonic phi variables found in the loop.
const MonotonicPHIList &getMonotonicPHIs() const { return MonotonicPHIs; }

bool hasMonotonicPHIs() const { return !MonotonicPHIs.empty(); }

/// Return the fixed-order recurrences found in the loop.
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }

Expand Down Expand Up @@ -372,6 +381,12 @@ class LoopVectorizationLegality {
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;

/// Returns true if Phi is monotonic variable.
bool isMonotonicPHI(PHINode *Phi) const;

/// Check if memory access is compressed when vectorizing.
bool isCompressedPtr(Type *AccessTy, Value *Ptr, BasicBlock *BB) const;

/// Returns true if \p V is invariant across all loop iterations according to
/// SCEV.
bool isInvariant(Value *V) const;
Expand Down Expand Up @@ -677,6 +692,9 @@ class LoopVectorizationLegality {
/// variables can be pointers.
InductionList Inductions;

/// Holds all of the monotonic phi variables that we found in the loop.
MonotonicPHIList MonotonicPHIs;

/// Holds all the casts that participate in the update chain of the induction
/// variables, and that have been proven to be redundant (possibly under a
/// runtime guard). These casts can be ignored when creating the vectorized
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
cl::desc("Enable recognition of non-constant strided "
"pointer induction variables."));

static cl::opt<bool> EnableMonotonicPatterns(
"lv-monotonic-patterns", cl::init(true), cl::Hidden,
cl::desc("Enable recognition of monotonic patterns."));

static cl::opt<bool>
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
cl::desc("Allow enabling loop hints to reorder "
Expand Down Expand Up @@ -470,6 +474,30 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
return 0;
}

bool LoopVectorizationLegality::isMonotonicPHI(PHINode *Phi) const {
return MonotonicPHIs.count(Phi);
}

bool LoopVectorizationLegality::isCompressedPtr(Type *AccessTy, Value *Ptr,
BasicBlock *BB) const {
MonotonicDescriptor Desc;
if (!MonotonicDescriptor::isMonotonicVal(Ptr, TheLoop, Desc, *PSE.getSE()))
return false;

// Check if memory operation will use the same mask as monotonic phi.
// TODO: relax restrictions of current implementation.
if (Desc.getPredicateEdge() !=
MonotonicDescriptor::Edge(BB, BB->getUniqueSuccessor()))
return false;

// Check if pointer step equals access size.
auto *Step =
dyn_cast<SCEVConstant>(Desc.getExpr()->getStepRecurrence(*PSE.getSE()));
if (!Step)
return false;
return Step->getAPInt() == BB->getDataLayout().getTypeAllocSize(AccessTy);
}

bool LoopVectorizationLegality::isInvariant(Value *V) const {
return LAI->isInvariant(V);
}
Expand Down Expand Up @@ -916,6 +944,13 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
return true;
}

MonotonicDescriptor MD;
if (EnableMonotonicPatterns &&
MonotonicDescriptor::isMonotonicPHI(Phi, TheLoop, MD, *PSE.getSE())) {
MonotonicPHIs[Phi] = MD;
return true;
}

if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
AllowedExit.insert(Phi);
FixedOrderRecurrences.insert(Phi);
Expand Down
119 changes: 112 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1241,9 +1241,9 @@ class LoopVectorizationCostModel {
getDivRemSpeculationCost(Instruction *I,
ElementCount VF) const;

/// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
/// memory instruction with consecutive access that can be widened, or
/// CM_Unknown otherwise.
/// Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed) if
/// \p I is a memory instruction with consecutive access that can be widened,
/// or CM_Unknown otherwise.
InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);

/// Returns true if \p I is a memory instruction in an interleaved-group
Expand Down Expand Up @@ -3000,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
auto *Ptr = getLoadStorePointerOperand(I);
auto *ScalarTy = getLoadStoreType(I);

if (Legal->isCompressedPtr(ScalarTy, Ptr, I->getParent()))
return CM_Compressed;

// In order to be widened, the pointer should be consecutive, first of all.
auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr);
if (!Stride)
Expand Down Expand Up @@ -3257,6 +3260,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
AddToWorklistIfAllowed(IndUpdate);
}

// Handle monotonic phis (similarly to induction vars).
for (const auto &MonotonicPHI : Legal->getMonotonicPHIs()) {
auto *Phi = MonotonicPHI.first;
auto *PhiUpdate = cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
const auto &Desc = MonotonicPHI.second;

auto UniformPhi = llvm::all_of(Phi->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
if (I == Desc.getStepInst())
return true;
if (auto *PN = dyn_cast<PHINode>(I); PN && Desc.getChain().contains(PN))
return true;
return !TheLoop->contains(I) || Worklist.count(I) ||
IsVectorizedMemAccessUse(I, Phi);
});
if (!UniformPhi)
continue;

auto UniformPhiUpdate =
llvm::all_of(PhiUpdate->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
if (I == Phi)
return true;
return !TheLoop->contains(I) || Worklist.count(I) ||
IsVectorizedMemAccessUse(I, Phi);
});
if (!UniformPhiUpdate)
continue;

AddToWorklistIfAllowed(Phi);
AddToWorklistIfAllowed(PhiUpdate);
}

Uniforms[VF].insert_range(Worklist);
}

Expand Down Expand Up @@ -4561,6 +4597,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
if (Plan.hasEarlyExit())
return 1;

// Monotonic vars don't support interleaving.
if (Legal->hasMonotonicPHIs())
return 1;

const bool HasReductions =
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
IsaPred<VPReductionPHIRecipe>);
Expand Down Expand Up @@ -8074,12 +8114,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
VPSingleDefRecipe *PhiRecipe = nullptr;
assert((Legal->isMonotonicPHI(Phi) || Legal->isReductionVariable(Phi) ||
Legal->isFixedOrderRecurrence(Phi)) &&
"can only widen reductions and fixed-order recurrences here");
"can only widen monotonic phis, reductions and fixed-order "
"recurrences here");
VPValue *StartV = Operands[0];
if (Legal->isReductionVariable(Phi)) {
Value *IncomingVal =
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader());
if (Legal->isMonotonicPHI(Phi)) {
PhiRecipe = new VPPhi({StartV}, Phi->getDebugLoc(),
Phi->getName() + ".monotonic");
PhiRecipe->setUnderlyingValue(Phi);
} else if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
Expand Down Expand Up @@ -8430,6 +8477,55 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// bring the VPlan to its final state.
// ---------------------------------------------------------------------------

// Adjust the recipes for any monotonic phis.
auto &MonotonicPHIs = Legal->getMonotonicPHIs();
for (VPRecipeBase &R : HeaderVPBB->phis()) {
auto *MonotonicPhi = dyn_cast<VPPhi>(&R);
if (!MonotonicPhi)
continue;
assert(MonotonicPhi->getNumIncoming() == 2 &&
MonotonicPhi->getIncomingBlock(0) == Plan->getVectorPreheader());

auto It =
MonotonicPHIs.find(cast<PHINode>(MonotonicPhi->getUnderlyingValue()));
if (It == MonotonicPHIs.end())
continue;
auto &Desc = It->second;

// Prohibit scalarization of monotonic phis.
if (!all_of(Range, [&](ElementCount VF) {
return CM.isUniformAfterVectorization(
MonotonicPhi->getUnderlyingInstr(), VF);
}))
return nullptr;

// Obtain mask value for the predicate edge from the last VPBlendRecipe in
// chain.
VPValue *Chain = MonotonicPhi->getIncomingValue(1);
VPValue *Mask = nullptr;
while (auto *BlendR = dyn_cast<VPBlendRecipe>(Chain))
for (unsigned I = 0, E = BlendR->getNumIncomingValues(); I != E; ++I)
if (auto *IncomingVal = BlendR->getIncomingValue(I);
IncomingVal != MonotonicPhi) {
Chain = IncomingVal;
Mask = BlendR->getMask(I);
break;
}
assert(Mask);

auto &SE = *PSE.getSE();
auto *Step = vputils::getOrCreateVPValueForSCEVExpr(
*Plan, Desc.getExpr()->getStepRecurrence(SE));

auto *MonotonicI =
new VPInstruction(VPInstruction::ComputeMonotonicResult,
{MonotonicPhi, Mask, Step}, *Desc.getStepInst());
auto *BackedgeVal = MonotonicPhi->getIncomingValue(1);
auto *InsertBlock = BackedgeVal->getDefiningRecipe()->getParent();
InsertBlock->insert(MonotonicI, InsertBlock->getFirstNonPhi());
BackedgeVal->replaceAllUsesWith(MonotonicI);
}

// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

Expand Down Expand Up @@ -9892,6 +9988,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);

unsigned SelectedIC = std::max(IC, UserIC);

if (LVL.hasMonotonicPHIs() && SelectedIC > 1) {
reportVectorizationFailure(
"Interleaving of loop with monotonic vars",
"Interleaving of loops with monotonic vars is not supported",
"CantInterleaveWithMonotonicVars", ORE, L);
return false;
}

// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
if (VF.Width.isVector() || SelectedIC > 1) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ComputeAnyOfResult,
ComputeFindIVResult,
ComputeReductionResult,
ComputeMonotonicResult,
// Extracts the last lane from its operand if it is a vector, or the last
// part if scalar. In the latter case, the recipe will be removed during
// unrolling.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::ComputeReductionResult: {
return inferScalarType(R->getOperand(0));
}
case VPInstruction::ComputeMonotonicResult: {
auto *PhiR = cast<VPPhi>(R->getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
return OrigPhi->getType();
}
case VPInstruction::ExplicitVectorLength:
return Type::getIntNTy(Ctx, 32);
case Instruction::PHI:
Expand Down
50 changes: 46 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ActiveLaneMask:
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ReductionStartVector:
case VPInstruction::ComputeMonotonicResult:
return 3;
case VPInstruction::ComputeFindIVResult:
return 4;
Expand Down Expand Up @@ -900,6 +901,34 @@ Value *VPInstruction::generate(VPTransformState &State) {

return ReducedPartRdx;
}
case VPInstruction::ComputeMonotonicResult: {
assert(getParent()->getPlan()->getUF() == 1 &&
"Expected unroll factor of 1.");

auto *Phi = State.get(getOperand(0), /*IsScalar*/ true);
auto *PhiTy = Phi->getType();
Value *Mask = State.get(getOperand(1), 0);
auto *MaskTy = Mask->getType();
assert(isa<VectorType>(MaskTy) &&
cast<VectorType>(MaskTy)->getElementType()->isIntegerTy(1) &&
"Mask type should be <N x i1>");

const auto &DL = State.CFG.PrevBB->getDataLayout();
auto *IntTy = PhiTy->isIntegerTy() ? PhiTy : DL.getIndexType(PhiTy);

auto *Step = State.get(getOperand(2), /*IsScalar*/ true);

auto &Builder = State.Builder;
auto *NumElems = Builder.CreateAddReduce(
Builder.CreateZExt(Mask, MaskTy->getWithNewType(IntTy)));
auto *Offset = Builder.CreateMul(NumElems, Step);

return PhiTy->isPointerTy()
? Builder.CreatePtrAdd(Phi, Offset, "monotonic.add",
getGEPNoWrapFlags())
: Builder.CreateAdd(Phi, Offset, "monotonic.add",
hasNoUnsignedWrap(), hasNoSignedWrap());
}
case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement: {
Expand Down Expand Up @@ -1169,6 +1198,12 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
I32Ty, {Arg0Ty, I32Ty, I1Ty});
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
}
case VPInstruction::ComputeMonotonicResult: {
Type *ElementTy = Ctx.Types.inferScalarType(getOperand(0));
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
return Ctx.TTI.getArithmeticReductionCost(Instruction::Add, VectorTy,
std::nullopt, Ctx.CostKind);
}
case VPInstruction::ExtractLastElement: {
// Add on the cost of extracting the element.
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
Expand All @@ -1182,8 +1217,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
default:
// TODO: Compute cost other VPInstructions once the legacy cost model has
// been retired.
assert(!getUnderlyingValue() &&
"unexpected VPInstruction witht underlying value");
assert((getOpcode() == Instruction::PHI || !getUnderlyingValue()) &&
"unexpected VPInstruction with underlying value");
return 0;
}
}
Expand All @@ -1198,6 +1233,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::ComputeMonotonicResult ||
getOpcode() == VPInstruction::AnyOf;
}

Expand Down Expand Up @@ -1421,6 +1457,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
case VPInstruction::ComputeMonotonicResult:
O << "compute-monotonic-result";
break;
case VPInstruction::LogicalAnd:
O << "logical-and";
break;
Expand Down Expand Up @@ -2043,7 +2082,9 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
case OperationType::OverflowingBinOp:
return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
Opcode == Instruction::Mul ||
Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
Opcode ==
VPInstruction::VPInstruction::CanonicalIVIncrementForPart ||
Opcode == VPInstruction::ComputeMonotonicResult;
case OperationType::Trunc:
return Opcode == Instruction::Trunc;
case OperationType::DisjointOp:
Expand All @@ -2053,7 +2094,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
case OperationType::GEPOp:
return Opcode == Instruction::GetElementPtr ||
Opcode == VPInstruction::PtrAdd ||
Opcode == VPInstruction::WidePtrAdd;
Opcode == VPInstruction::WidePtrAdd ||
Opcode == VPInstruction::ComputeMonotonicResult;
case OperationType::FPMathOp:
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
Expand Down
Loading
Loading