@@ -2984,17 +2984,42 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
29842984 case Instruction::Call: {
29852985 auto *CalledFn =
29862986 cast<Function>(getOperand (getNumOperands () - 1 )->getLiveInIRValue ());
2987- if (CalledFn->isIntrinsic ())
2988- break ;
29892987
2988+ SmallVector<const VPValue *> ArgOps (drop_end (operands ()));
29902989 SmallVector<Type *, 4 > Tys;
2991- for (VPValue *ArgOp : drop_end ( operands ()) )
2990+ for (const VPValue *ArgOp : ArgOps )
29922991 Tys.push_back (Ctx.Types .inferScalarType (ArgOp));
2992+
2993+ if (CalledFn->isIntrinsic ())
2994+ // Various pseudo-intrinsics with costs of 0 are scalarized instead of
2995+ // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
2996+ switch (CalledFn->getIntrinsicID ()) {
2997+ case Intrinsic::assume:
2998+ case Intrinsic::lifetime_end:
2999+ case Intrinsic::lifetime_start:
3000+ case Intrinsic::sideeffect:
3001+ case Intrinsic::pseudoprobe:
3002+ case Intrinsic::experimental_noalias_scope_decl: {
3003+ assert (getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3004+ ElementCount::getFixed (1 ), Ctx) == 0 &&
3005+ " scalarizing intrinsic should be free" );
3006+ return InstructionCost (0 );
3007+ }
3008+ default :
3009+ break ;
3010+ }
3011+
29933012 Type *ResultTy = Ctx.Types .inferScalarType (this );
29943013 InstructionCost ScalarCallCost =
29953014 Ctx.TTI .getCallInstrCost (CalledFn, ResultTy, Tys, Ctx.CostKind );
2996- if (isSingleScalar ())
3015+ if (isSingleScalar ()) {
3016+ if (CalledFn->isIntrinsic ())
3017+ ScalarCallCost = std::min (
3018+ ScalarCallCost,
3019+ getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3020+ ElementCount::getFixed (1 ), Ctx));
29973021 return ScalarCallCost;
3022+ }
29983023
29993024 if (VF.isScalable ())
30003025 return InstructionCost::getInvalid ();
@@ -3015,7 +3040,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30153040 // incur any overhead.
30163041 SmallPtrSet<const VPValue *, 4 > UniqueOperands;
30173042 Tys.clear ();
3018- for (auto *Op : drop_end ( operands ()) ) {
3043+ for (auto *Op : ArgOps ) {
30193044 if (Op->isLiveIn () || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
30203045 !UniqueOperands.insert (Op).second )
30213046 continue ;
@@ -3025,8 +3050,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30253050 Ctx.TTI .getOperandsScalarizationOverhead (Tys, Ctx.CostKind );
30263051 }
30273052
3028- return ScalarCallCost * (isSingleScalar () ? 1 : VF.getFixedValue ()) +
3029- ScalarizationCost;
3053+ return ScalarCallCost * VF.getFixedValue () + ScalarizationCost;
30303054 }
30313055 case Instruction::Add:
30323056 case Instruction::Sub:
0 commit comments