Skip to content

Commit 1ecd2de

Browse files
committed
[VPlan] Extend getSCEVForVPV, use to compute VPReplicateRecipe cost. (llvm#161276)
Update getSCEVExprForVPValue to handle more complex expressions, to use it in VPReplicateRecipe::comptueCost. In particular, it supports construction SCEV expressions for GetElementPtr VPReplicateRecipes, with operands that are VPScalarIVStepsRecipe, VPDerivedIVRecipe and VPCanonicalIVRecipe. If we hit a sub-expression we don't support yet, we return SCEVCouldNotCompute. Note that the SCEV expression is valid VF = 1: we only support construction AddRecs for VPCanonicalIVRecipe, which is an AddRec starting at 0 and stepping by 1. The returned SCEV expressions could be converted to a VF specific one, by rewriting the AddRecs to ones with the appropriate step. Note that the logic for constructing SCEVs for GetElementPtr was directly ported from ScalarEvolution.cpp. Another thing to note is that we construct SCEV expression purely by looking at the operation of the recipe and its translated operands, w/o accessing the underlying IR (the exception being getting the source element type for GEPs). PR: llvm#161276 (cherry-picked from b2d12d6) This cherry-picks only the getSCEVForVPV improvements.
1 parent 83f5f48 commit 1ecd2de

File tree

4 files changed

+84
-20
lines changed

4 files changed

+84
-20
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4018,8 +4018,8 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
40184018
if (VF.isScalar())
40194019
continue;
40204020

4021-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
4022-
CM, CM.CostKind);
4021+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
4022+
*CM.PSE.getSE(), OrigLoop);
40234023
precomputeCosts(*Plan, VF, CostCtx);
40244024
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
40254025
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4273,8 +4273,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
42734273

42744274
// Add on other costs that are modelled in VPlan, but not in the legacy
42754275
// cost model.
4276-
VPCostContext CostCtx(CM.TTI, *CM.TLI, CM.Legal->getWidestInductionType(),
4277-
CM, CM.CostKind);
4276+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
4277+
*CM.PSE.getSE(), OrigLoop);
42784278
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
42794279
assert(VectorRegion && "Expected to have a vector region!");
42804280
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6874,8 +6874,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
68746874

68756875
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
68766876
ElementCount VF) const {
6877-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
6878-
CM.CostKind);
6877+
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(),
6878+
OrigLoop);
68796879
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
68806880

68816881
// Now compute and add the VPlan-based cost.
@@ -7075,13 +7075,14 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70757075
// simplifications not accounted for in the legacy cost model. If that's the
70767076
// case, don't trigger the assertion, as the extra simplifications may cause a
70777077
// different VF to be picked by the VPlan-based cost model.
7078-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
7079-
CM.CostKind);
7078+
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
7079+
*CM.PSE.getSE(), OrigLoop);
70807080
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
70817081
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
70827082
// with early exits and plans with additional VPlan simplifications. The
70837083
// legacy cost model doesn't properly model costs for such loops.
70847084
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7085+
!Legal->getLAI()->getSymbolicStrides().empty() ||
70857086
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
70867087
CostCtx, OrigLoop,
70877088
BestFactor.Width) ||
@@ -8832,8 +8833,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88328833
// TODO: Enable following transform when the EVL-version of extended-reduction
88338834
// and mulacc-reduction are implemented.
88348835
if (!CM.foldTailWithEVL()) {
8835-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
8836-
CM.CostKind);
8836+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
8837+
*CM.PSE.getSE(), OrigLoop);
88378838
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
88388839
CostCtx, Range);
88398840
}
@@ -10108,8 +10109,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1010810109
// Check if it is profitable to vectorize with runtime checks.
1010910110
bool ForceVectorization =
1011010111
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
10111-
VPCostContext CostCtx(CM.TTI, *CM.TLI, CM.Legal->getWidestInductionType(),
10112-
CM, CM.CostKind);
10112+
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
10113+
CM.CostKind, *CM.PSE.getSE(), L);
1011310114
if (!ForceVectorization &&
1011410115
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
1011510116
LVP.getPlanFor(VF.Width), SEL,

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,15 @@ struct VPCostContext {
349349
LoopVectorizationCostModel &CM;
350350
SmallPtrSet<Instruction *, 8> SkipCostComputation;
351351
TargetTransformInfo::TargetCostKind CostKind;
352+
ScalarEvolution &SE;
353+
const Loop *L;
352354

353355
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
354-
Type *CanIVTy, LoopVectorizationCostModel &CM,
355-
TargetTransformInfo::TargetCostKind CostKind)
356-
: TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
357-
CM(CM), CostKind(CostKind) {}
356+
const VPlan &Plan, LoopVectorizationCostModel &CM,
357+
TargetTransformInfo::TargetCostKind CostKind,
358+
ScalarEvolution &SE, const Loop *L)
359+
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
360+
CostKind(CostKind), SE(SE), L(L) {}
358361

359362
/// Return the cost for \p UI with \p VF using the legacy cost model as
360363
/// fallback until computing the cost of all recipes migrates to VPlan.

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,73 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
7373
IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount();
7474
}
7575

76-
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
77-
if (V->isLiveIn())
78-
return SE.getSCEV(V->getLiveInIRValue());
76+
const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
77+
ScalarEvolution &SE, const Loop *L) {
78+
if (V->isLiveIn()) {
79+
if (Value *LiveIn = V->getLiveInIRValue())
80+
return SE.getSCEV(LiveIn);
81+
return SE.getCouldNotCompute();
82+
}
7983

8084
// TODO: Support constructing SCEVs for more recipes as needed.
8185
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
8286
.Case<VPExpandSCEVRecipe>(
8387
[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
88+
.Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) {
89+
if (!L)
90+
return SE.getCouldNotCompute();
91+
const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
92+
return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
93+
SCEV::FlagAnyWrap);
94+
})
95+
.Case<VPWidenIntOrFpInductionRecipe>(
96+
[&SE, L](const VPWidenIntOrFpInductionRecipe *R) {
97+
const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), SE, L);
98+
if (!L || isa<SCEVCouldNotCompute>(Step))
99+
return SE.getCouldNotCompute();
100+
const SCEV *Start =
101+
getSCEVExprForVPValue(R->getStartValue(), SE, L);
102+
return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
103+
})
104+
.Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) {
105+
const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
106+
const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L);
107+
const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L);
108+
if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>))
109+
return SE.getCouldNotCompute();
110+
111+
return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()),
112+
SE.getMulExpr(IV, SE.getTruncateOrSignExtend(
113+
Scale, IV->getType())));
114+
})
115+
.Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) {
116+
const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L);
117+
const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L);
118+
if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step) ||
119+
!Step->isOne())
120+
return SE.getCouldNotCompute();
121+
return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()),
122+
Step);
123+
})
124+
.Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) {
125+
if (R->getOpcode() != Instruction::GetElementPtr)
126+
return SE.getCouldNotCompute();
127+
128+
const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L);
129+
if (isa<SCEVCouldNotCompute>(Base))
130+
return SE.getCouldNotCompute();
131+
132+
SmallVector<const SCEV *> IndexExprs;
133+
for (VPValue *Index : drop_begin(R->operands())) {
134+
const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L);
135+
if (isa<SCEVCouldNotCompute>(IndexExpr))
136+
return SE.getCouldNotCompute();
137+
IndexExprs.push_back(IndexExpr);
138+
}
139+
140+
auto *GEP = cast<GEPOperator>(R->getUnderlyingInstr());
141+
return SE.getGEPExpr(const_cast<GEPOperator *>(GEP), IndexExprs);
142+
})
84143
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
85144
}
86145

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3636

3737
/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
3838
/// SCEV expression could be constructed.
39-
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
39+
const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE,
40+
const Loop *L = nullptr);
4041

4142
/// Returns true if \p VPV is a single scalar, either because it produces the
4243
/// same value for all lanes or only has its first lane used.

0 commit comments

Comments
 (0)