@@ -1888,7 +1888,8 @@ ARMTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
18881888InstructionCost
18891889ARMTTIImpl::getIntrinsicInstrCost (const IntrinsicCostAttributes &ICA,
18901890 TTI::TargetCostKind CostKind) {
1891- switch (ICA.getID ()) {
1891+ unsigned Opc = ICA.getID ();
1892+ switch (Opc) {
18921893 case Intrinsic::get_active_lane_mask:
18931894 // Currently we make a somewhat optimistic assumption that
18941895 // active_lane_mask's are always free. In reality it may be freely folded
@@ -1904,17 +1905,38 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
19041905 case Intrinsic::ssub_sat:
19051906 case Intrinsic::uadd_sat:
19061907 case Intrinsic::usub_sat: {
1908+ bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1909+ bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1910+ Type *RetTy = ICA.getReturnType ();
1911+
1912+ if (auto *ITy = dyn_cast<IntegerType>(RetTy)) {
1913+ if (IsSigned && ST->hasDSP () && ITy->getBitWidth () == 32 )
1914+ return 1 ; // qadd / qsub
1915+ if (ST->hasDSP () && (ITy->getBitWidth () == 8 || ITy->getBitWidth () == 16 ))
1916+ return 2 ; // uqadd16 / qadd16 / uqsub16 / qsub16 + possible extend.
1917+ // Otherwise return the cost of expanding the node. Generally an add +
1918+ // icmp + sel.
1919+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
1920+ Type *CondTy = RetTy->getWithNewBitWidth (1 );
1921+ return getArithmeticInstrCost (IsAdd ? Instruction::Add : Instruction::Sub,
1922+ RetTy, CostKind) +
1923+ 2 * getCmpSelInstrCost (BinaryOperator::ICmp, RetTy, CondTy, Pred,
1924+ CostKind) +
1925+ 2 * getCmpSelInstrCost (BinaryOperator::Select, RetTy, CondTy, Pred,
1926+ CostKind);
1927+ }
1928+
19071929 if (!ST->hasMVEIntegerOps ())
19081930 break ;
1909- Type *VT = ICA.getReturnType ();
19101931
1911- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (VT );
1932+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (RetTy );
19121933 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
19131934 LT.second == MVT::v16i8) {
19141935 // This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
19151936 // need to extend the type, as it uses shr(qadd(shl, shl)).
19161937 unsigned Instrs =
1917- LT.second .getScalarSizeInBits () == VT->getScalarSizeInBits () ? 1 : 4 ;
1938+ LT.second .getScalarSizeInBits () == RetTy->getScalarSizeInBits () ? 1
1939+ : 4 ;
19181940 return LT.first * ST->getMVEVectorCostFactor (CostKind) * Instrs;
19191941 }
19201942 break ;
@@ -1948,7 +1970,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
19481970 case Intrinsic::fptoui_sat: {
19491971 if (ICA.getArgTypes ().empty ())
19501972 break ;
1951- bool IsSigned = ICA. getID () == Intrinsic::fptosi_sat;
1973+ bool IsSigned = Opc == Intrinsic::fptosi_sat;
19521974 auto LT = getTypeLegalizationCost (ICA.getArgTypes ()[0 ]);
19531975 EVT MTy = TLI->getValueType (DL, ICA.getReturnType ());
19541976 // Check for the legal types, with the corect subtarget features.
0 commit comments