Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/ScalarEvolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,9 @@ class ScalarEvolution {
/// \p IndexExprs The expressions for the indices.
LLVM_ABI const SCEV *
getGEPExpr(GEPOperator *GEP, const SmallVectorImpl<const SCEV *> &IndexExprs);
LLVM_ABI const SCEV *getGEPExpr(
const SCEV *BaseExpr, const SmallVectorImpl<const SCEV *> &IndexExprs,
Type *SrcElementTy, SCEV::NoWrapFlags OffsetWrap = SCEV::FlagAnyWrap);
LLVM_ABI const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
LLVM_ABI const SCEV *getMinMaxExpr(SCEVTypes Kind,
SmallVectorImpl<const SCEV *> &Operands);
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3774,7 +3774,6 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
// getSCEV(Base)->getType() has the same address space as Base->getType()
// because SCEV::getType() preserves the address space.
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
GEPNoWrapFlags NW = GEP->getNoWrapFlags();
if (NW != GEPNoWrapFlags::none()) {
// We'd like to propagate flags from the IR to the corresponding SCEV nodes,
Expand All @@ -3793,7 +3792,16 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
if (NW.hasNoUnsignedWrap())
OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNUW);

Type *CurTy = GEP->getType();
return getGEPExpr(BaseExpr, IndexExprs, GEP->getSourceElementType(),
OffsetWrap);
}

const SCEV *
ScalarEvolution::getGEPExpr(const SCEV *BaseExpr,
const SmallVectorImpl<const SCEV *> &IndexExprs,
Type *SrcElementTy, SCEV::NoWrapFlags OffsetWrap) {
Type *CurTy = BaseExpr->getType();
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
bool FirstIter = true;
SmallVector<const SCEV *, 4> Offsets;
for (const SCEV *IndexExpr : IndexExprs) {
Expand All @@ -3812,7 +3820,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
if (FirstIter) {
assert(isa<PointerType>(CurTy) &&
"The first index of a GEP indexes a pointer");
CurTy = GEP->getSourceElementType();
CurTy = SrcElementTy;
FirstIter = false;
} else {
CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0);
Expand All @@ -3837,8 +3845,9 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// Add the base address and the offset. We cannot use the nsw flag, as the
// base address is unsigned. However, if we know that the offset is
// non-negative, we can use nuw.
bool NUW = NW.hasNoUnsignedWrap() ||
(NW.hasNoUnsignedSignedWrap() && isKnownNonNegative(Offset));
bool NUW =
hasFlags(OffsetWrap, SCEV::FlagNUW) ||
(hasFlags(OffsetWrap, SCEV::FlagNSW) && isKnownNonNegative(Offset));
SCEV::NoWrapFlags BaseWrap = NUW ? SCEV::FlagNUW : SCEV::FlagAnyWrap;
auto *GEPExpr = getAddExpr(BaseExpr, Offset, BaseWrap);
assert(BaseExpr->getType() == GEPExpr->getType() &&
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
continue;

VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
*CM.PSE.getSE());
*CM.PSE.getSE(), OrigLoop);
precomputeCosts(*Plan, VF, CostCtx);
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
Expand Down Expand Up @@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
// Add on other costs that are modelled in VPlan, but not in the legacy
// cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
*CM.PSE.getSE());
*CM.PSE.getSE(), OrigLoop);
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
assert(VectorRegion && "Expected to have a vector region!");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
Expand Down Expand Up @@ -6858,7 +6858,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,

InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(),
OrigLoop);
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);

// Now compute and add the VPlan-based cost.
Expand Down Expand Up @@ -7092,7 +7093,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// case, don't trigger the assertion, as the extra simplifications may cause a
// different VF to be picked by the VPlan-based cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
*CM.PSE.getSE());
*CM.PSE.getSE(), OrigLoop);
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
// with early exits and plans with additional VPlan simplifications. The
Expand Down Expand Up @@ -8427,7 +8428,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// and mulacc-reduction are implemented.
if (!CM.foldTailWithEVL()) {
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
*CM.PSE.getSE());
*CM.PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
CostCtx, Range);
}
Expand Down Expand Up @@ -9895,7 +9896,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
CM.CostKind, *CM.PSE.getSE());
CM.CostKind, *CM.PSE.getSE(), L);
if (!ForceVectorization &&
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
LVP.getPlanFor(VF.Width), SEL,
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,13 +350,14 @@ struct VPCostContext {
SmallPtrSet<Instruction *, 8> SkipCostComputation;
TargetTransformInfo::TargetCostKind CostKind;
ScalarEvolution &SE;
const Loop *L;

VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
const VPlan &Plan, LoopVectorizationCostModel &CM,
TargetTransformInfo::TargetCostKind CostKind,
ScalarEvolution &SE)
ScalarEvolution &SE, const Loop *L)
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
CostKind(CostKind), SE(SE) {}
CostKind(CostKind), SE(SE), L(L) {}

/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
Expand Down
23 changes: 13 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3132,26 +3132,30 @@ bool VPReplicateRecipe::shouldPack() const {
});
}

/// Returns true if \p Ptr is a pointer computation for which the legacy cost
/// model computes a SCEV expression when computing the address cost.
static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) {
/// Returns a SCEV expression for \p Ptr if it is a pointer computation for
/// which the legacy cost model computes a SCEV expression when computing the
/// address cost. Computing SCEVs for VPValues is incomplete and returns
/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In
/// those cases we fall back to the legacy cost model. Otherwise return nullptr.
static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE,
const Loop *L) {
auto *PtrR = Ptr->getDefiningRecipe();
if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
cast<VPReplicateRecipe>(PtrR)->getOpcode() ==
Instruction::GetElementPtr) ||
isa<VPWidenGEPRecipe>(PtrR) ||
match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue()))))
return false;
return nullptr;

// We are looking for a GEP where all indices are either loop invariant or
// inductions.
for (VPValue *Opd : drop_begin(PtrR->operands())) {
if (!Opd->isDefinedOutsideLoopRegions() &&
!isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
return false;
return nullptr;
}

return true;
return vputils::getSCEVExprForVPValue(Ptr, SE, L);
}

/// Returns true if \p V is used as part of the address of another load or
Expand Down Expand Up @@ -3319,9 +3323,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,

bool IsLoad = UI->getOpcode() == Instruction::Load;
const VPValue *PtrOp = getOperand(!IsLoad);
// TODO: Handle cases where we need to pass a SCEV to
// getAddressComputationCost.
if (shouldUseAddressAccessSCEV(PtrOp))
const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L);
if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))
break;

Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
Expand All @@ -3339,7 +3342,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
nullptr, Ctx.CostKind);
PtrSCEV, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;

Expand Down
44 changes: 43 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
B == Plan.getBackedgeTakenCount();
}

const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
ScalarEvolution &SE, const Loop *L) {
if (V->isLiveIn()) {
if (Value *LiveIn = V->getLiveInIRValue())
return SE.getSCEV(LiveIn);
Expand All @@ -89,6 +90,47 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
.Case<VPExpandSCEVRecipe>(
[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
.Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) {
if (!L)
return SE.getCouldNotCompute();
const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
SCEV::FlagAnyWrap);
})
.Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) {
const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L);
const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L);
if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>))
return SE.getCouldNotCompute();

return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()),
SE.getMulExpr(IV, SE.getTruncateOrSignExtend(
Scale, IV->getType())));
})
.Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) {
return getSCEVExprForVPValue(R->getOperand(0), SE, L);
})
.Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) {
if (R->getOpcode() != Instruction::GetElementPtr)
return SE.getCouldNotCompute();

const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L);
if (isa<SCEVCouldNotCompute>(Base))
return SE.getCouldNotCompute();

SmallVector<const SCEV *> IndexExprs;
for (VPValue *Index : drop_begin(R->operands())) {
const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L);
if (isa<SCEVCouldNotCompute>(IndexExpr))
return SE.getCouldNotCompute();
IndexExprs.push_back(IndexExpr);
}

Type *SrcElementTy = cast<GetElementPtrInst>(R->getUnderlyingInstr())
->getSourceElementType();
return SE.getGEPExpr(Base, IndexExprs, SrcElementTy, SCEV::FlagAnyWrap);
})
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr);

/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
/// SCEV expression could be constructed.
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE,
const Loop *L = nullptr);

/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
Expand Down