Skip to content

Commit bddd21b

Browse files
committed
[VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI).
Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some intrinsics pseudo intrinsics for which the computed cost is known zero, so we handle those up front. Depends on llvm#154291.
1 parent 5e32f72 commit bddd21b

File tree

1 file changed

+30
-7
lines changed

1 file changed

+30
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3063,17 +3063,41 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30633063
case Instruction::Call: {
30643064
auto *CalledFn =
30653065
cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
3066-
if (CalledFn->isIntrinsic())
3067-
break;
30683066

3067+
SmallVector<const VPValue *> ArgOps(drop_end(operands()));
30693068
SmallVector<Type *, 4> Tys;
3070-
for (VPValue *ArgOp : drop_end(operands()))
3069+
for (const VPValue *ArgOp : ArgOps)
30713070
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
3071+
3072+
if (CalledFn->isIntrinsic())
3073+
// Various pseudo-intrinsics with costs of 0 are scalarized instead of
3074+
// vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
3075+
switch (CalledFn->getIntrinsicID()) {
3076+
case Intrinsic::assume:
3077+
case Intrinsic::lifetime_end:
3078+
case Intrinsic::lifetime_start:
3079+
case Intrinsic::sideeffect:
3080+
case Intrinsic::pseudoprobe:
3081+
case Intrinsic::experimental_noalias_scope_decl: {
3082+
assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3083+
ElementCount::getFixed(1), Ctx) == 0 && "scalarizing intrinsic should be free");
3084+
return InstructionCost(0);
3085+
}
3086+
default:
3087+
break;
3088+
}
3089+
30723090
Type *ResultTy = Ctx.Types.inferScalarType(this);
30733091
InstructionCost ScalarCallCost =
30743092
Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
3075-
if (isSingleScalar())
3093+
if (isSingleScalar()) {
3094+
if (CalledFn->isIntrinsic())
3095+
ScalarCallCost = std::min(
3096+
ScalarCallCost,
3097+
getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3098+
ElementCount::getFixed(1), Ctx));
30763099
return ScalarCallCost;
3100+
}
30773101

30783102
if (VF.isScalable())
30793103
return InstructionCost::getInvalid();
@@ -3094,7 +3118,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30943118
// incur any overhead.
30953119
SmallPtrSet<const VPValue *, 4> UniqueOperands;
30963120
Tys.clear();
3097-
for (auto *Op : drop_end(operands())) {
3121+
for (auto *Op : ArgOps) {
30983122
if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
30993123
!UniqueOperands.insert(Op).second)
31003124
continue;
@@ -3104,8 +3128,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
31043128
Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
31053129
}
31063130

3107-
return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
3108-
ScalarizationCost;
3131+
return ScalarCallCost * VF.getFixedValue() + ScalarizationCost;
31093132
}
31103133
case Instruction::Add:
31113134
case Instruction::Sub:

0 commit comments

Comments
 (0)