@@ -1522,6 +1522,55 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
15221522 State.addMetadata (Cast, cast_or_null<Instruction>(getUnderlyingValue ()));
15231523}
15241524
1525+ InstructionCost VPWidenCastRecipe::computeCost (ElementCount VF,
1526+ VPCostContext &Ctx) const {
1527+ // Computes the CastContextHint from a recipes that may access memory.
1528+ auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1529+ if (VF.isScalar ())
1530+ return TTI::CastContextHint::Normal;
1531+ if (isa<VPInterleaveRecipe>(R))
1532+ return TTI::CastContextHint::Interleave;
1533+ if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1534+ return ReplicateRecipe->isPredicated () ? TTI::CastContextHint::Masked
1535+ : TTI::CastContextHint::Normal;
1536+ const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1537+ if (WidenMemoryRecipe == nullptr )
1538+ return TTI::CastContextHint::None;
1539+ if (!WidenMemoryRecipe->isConsecutive ())
1540+ return TTI::CastContextHint::GatherScatter;
1541+ if (WidenMemoryRecipe->isReverse ())
1542+ return TTI::CastContextHint::Reversed;
1543+ if (WidenMemoryRecipe->isMasked ())
1544+ return TTI::CastContextHint::Masked;
1545+ return TTI::CastContextHint::Normal;
1546+ };
1547+
1548+ VPValue *Operand = getOperand (0 );
1549+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
1550+ // For Trunc/FPTrunc, get the context from the only user.
1551+ if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1552+ !hasMoreThanOneUniqueUser () && getNumUsers () > 0 ) {
1553+ if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin ()))
1554+ CCH = ComputeCCH (StoreRecipe);
1555+ }
1556+ // For Z/Sext, get the context from the operand.
1557+ else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1558+ Opcode == Instruction::FPExt) {
1559+ if (Operand->isLiveIn ())
1560+ CCH = TTI::CastContextHint::Normal;
1561+ else if (Operand->getDefiningRecipe ())
1562+ CCH = ComputeCCH (Operand->getDefiningRecipe ());
1563+ }
1564+
1565+ auto *SrcTy =
1566+ cast<VectorType>(ToVectorTy (Ctx.Types .inferScalarType (Operand), VF));
1567+ auto *DestTy = cast<VectorType>(ToVectorTy (getResultType (), VF));
1568+ // Arm TTI will use the underlying instruction to determine the cost.
1569+ return Ctx.TTI .getCastInstrCost (
1570+ Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
1571+ dyn_cast_if_present<Instruction>(getUnderlyingValue ()));
1572+ }
1573+
15251574#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
15261575void VPWidenCastRecipe::print (raw_ostream &O, const Twine &Indent,
15271576 VPSlotTracker &SlotTracker) const {
0 commit comments