Skip to content

Commit b367457

Browse files
committed
[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). (llvm#154617)
Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some intrinsics pseudo intrinsics for which the computed cost is known zero, so we handle those up front. Depends on llvm#154291. PR: llvm#154617 (cherry-picked from df09879)
1 parent 296c254 commit b367457

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2984,17 +2984,42 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
29842984
case Instruction::Call: {
29852985
auto *CalledFn =
29862986
cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
2987-
if (CalledFn->isIntrinsic())
2988-
break;
29892987

2988+
SmallVector<const VPValue *> ArgOps(drop_end(operands()));
29902989
SmallVector<Type *, 4> Tys;
2991-
for (VPValue *ArgOp : drop_end(operands()))
2990+
for (const VPValue *ArgOp : ArgOps)
29922991
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
2992+
2993+
if (CalledFn->isIntrinsic())
2994+
// Various pseudo-intrinsics with costs of 0 are scalarized instead of
2995+
// vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
2996+
switch (CalledFn->getIntrinsicID()) {
2997+
case Intrinsic::assume:
2998+
case Intrinsic::lifetime_end:
2999+
case Intrinsic::lifetime_start:
3000+
case Intrinsic::sideeffect:
3001+
case Intrinsic::pseudoprobe:
3002+
case Intrinsic::experimental_noalias_scope_decl: {
3003+
assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3004+
ElementCount::getFixed(1), Ctx) == 0 &&
3005+
"scalarizing intrinsic should be free");
3006+
return InstructionCost(0);
3007+
}
3008+
default:
3009+
break;
3010+
}
3011+
29933012
Type *ResultTy = Ctx.Types.inferScalarType(this);
29943013
InstructionCost ScalarCallCost =
29953014
Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
2996-
if (isSingleScalar())
3015+
if (isSingleScalar()) {
3016+
if (CalledFn->isIntrinsic())
3017+
ScalarCallCost = std::min(
3018+
ScalarCallCost,
3019+
getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3020+
ElementCount::getFixed(1), Ctx));
29973021
return ScalarCallCost;
3022+
}
29983023

29993024
if (VF.isScalable())
30003025
return InstructionCost::getInvalid();
@@ -3015,7 +3040,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30153040
// incur any overhead.
30163041
SmallPtrSet<const VPValue *, 4> UniqueOperands;
30173042
Tys.clear();
3018-
for (auto *Op : drop_end(operands())) {
3043+
for (auto *Op : ArgOps) {
30193044
if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
30203045
!UniqueOperands.insert(Op).second)
30213046
continue;
@@ -3025,8 +3050,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30253050
Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
30263051
}
30273052

3028-
return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
3029-
ScalarizationCost;
3053+
return ScalarCallCost * VF.getFixedValue() + ScalarizationCost;
30303054
}
30313055
case Instruction::Add:
30323056
case Instruction::Sub:

0 commit comments

Comments
 (0)