2222#include " llvm/ADT/SmallVector.h"
2323#include " llvm/Analysis/LoopInfo.h"
2424#include " llvm/Analysis/OptimizationRemarkEmitter.h"
25+ #include " llvm/Analysis/TargetLibraryInfo.h"
2526#include " llvm/Analysis/TargetTransformInfo.h"
2627#include " llvm/Analysis/TargetTransformInfoImpl.h"
2728#include " llvm/Analysis/ValueTracking.h"
@@ -285,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
285286 return false ;
286287 }
287288
289+ // / Several intrinsics that return structs (including llvm.sincos[pi] and
290+ // / llvm.modf) can be lowered to a vector library call (for certain VFs). The
291+ // / vector library functions correspond to the scalar calls (e.g. sincos or
292+ // / modf), which unlike the intrinsic return values via output pointers. This
293+ // / helper checks if a vector call exists for the given intrinsic, and returns
294+ // / the cost, which includes the cost of the mask (if required), and the loads
295+ // / for values returned via output pointers. \p LC is the scalar libcall and
296+ // / \p CallRetElementIndex (optional) is the struct element which is mapped to
297+ // / the call return value. If std::nullopt is returned, then no vector library
298+ // / call is available, so the intrinsic should be assigned the default cost
299+ // / (e.g. scalarization).
300+ std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost (
301+ const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
302+ RTLIB::Libcall LC, std::optional<unsigned > CallRetElementIndex = {}) {
303+ Type *RetTy = ICA.getReturnType ();
304+ // Vector variants of the intrinsic can be mapped to a vector library call.
305+ auto const *LibInfo = ICA.getLibInfo ();
306+ if (!LibInfo || !isa<StructType>(RetTy) ||
307+ !isVectorizedStructTy (cast<StructType>(RetTy)))
308+ return std::nullopt ;
309+
310+ // Find associated libcall.
311+ const char *LCName = getTLI ()->getLibcallName (LC);
312+ if (!LCName)
313+ return std::nullopt ;
314+
315+ // Search for a corresponding vector variant.
316+ LLVMContext &Ctx = RetTy->getContext ();
317+ ElementCount VF = getVectorizedTypeVF (RetTy);
318+ VecDesc const *VD = nullptr ;
319+ for (bool Masked : {false , true }) {
320+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
321+ break ;
322+ }
323+ if (!VD)
324+ return std::nullopt ;
325+
326+ // Cost the call + mask.
327+ auto Cost =
328+ thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
329+ if (VD->isMasked ())
330+ Cost += thisT ()->getShuffleCost (
331+ TargetTransformInfo::SK_Broadcast,
332+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
333+ nullptr , {});
334+
335+ // Lowering to a library call (with output pointers) may require us to emit
336+ // reloads for the results.
337+ for (auto [Idx, VectorTy] : enumerate(getContainedTypes (RetTy))) {
338+ if (Idx == CallRetElementIndex)
339+ continue ;
340+ Cost += thisT ()->getMemoryOpCost (
341+ Instruction::Load, VectorTy,
342+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind);
343+ }
344+ return Cost;
345+ }
346+
288347protected:
289348 explicit BasicTTIImplBase (const TargetMachine *TM, const DataLayout &DL)
290349 : BaseT(DL) {}
@@ -1716,9 +1775,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17161775
17171776 Type *RetTy = ICA.getReturnType ();
17181777
1719- ElementCount RetVF =
1720- (RetTy-> isVectorTy () ? cast<VectorType>(RetTy)-> getElementCount ()
1721- : ElementCount::getFixed ( 1 ));
1778+ ElementCount RetVF = isVectorizedTy (RetTy) ? getVectorizedTypeVF (RetTy)
1779+ : ElementCount::getFixed ( 1 );
1780+
17221781 const IntrinsicInst *I = ICA.getInst ();
17231782 const SmallVectorImpl<const Value *> &Args = ICA.getArgs ();
17241783 FastMathFlags FMF = ICA.getFlags ();
@@ -1971,6 +2030,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19712030 }
19722031 case Intrinsic::experimental_vector_match:
19732032 return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
2033+ case Intrinsic::sincos: {
2034+ Type *Ty = getContainedTypes (RetTy).front ();
2035+ EVT VT = getTLI ()->getValueType (DL, Ty);
2036+ RTLIB::Libcall LC = RTLIB::getFSINCOS (VT.getScalarType ());
2037+ if (auto Cost =
2038+ getMultipleResultIntrinsicVectorLibCallCost (ICA, CostKind, LC))
2039+ return *Cost;
2040+ // Otherwise, fallback to default scalarization cost.
2041+ break ;
2042+ }
19742043 }
19752044
19762045 // Assume that we need to scalarize this intrinsic.)
@@ -1979,10 +2048,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19792048 InstructionCost ScalarizationCost = InstructionCost::getInvalid ();
19802049 if (RetVF.isVector () && !RetVF.isScalable ()) {
19812050 ScalarizationCost = 0 ;
1982- if (!RetTy->isVoidTy ())
1983- ScalarizationCost += getScalarizationOverhead (
1984- cast<VectorType>(RetTy),
1985- /* Insert*/ true , /* Extract*/ false , CostKind);
2051+ if (!RetTy->isVoidTy ()) {
2052+ for (Type *VectorTy : getContainedTypes (RetTy)) {
2053+ ScalarizationCost += getScalarizationOverhead (
2054+ cast<VectorType>(VectorTy),
2055+ /* Insert=*/ true , /* Extract=*/ false , CostKind);
2056+ }
2057+ }
19862058 ScalarizationCost +=
19872059 getOperandsScalarizationOverhead (Args, ICA.getArgTypes (), CostKind);
19882060 }
@@ -2637,27 +2709,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
26372709 // Else, assume that we need to scalarize this intrinsic. For math builtins
26382710 // this will emit a costly libcall, adding call overhead and spills. Make it
26392711 // very expensive.
2640- if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2712+ if (isVectorizedTy (RetTy)) {
2713+ ArrayRef<Type *> RetVTys = getContainedTypes (RetTy);
2714+
26412715 // Scalable vectors cannot be scalarized, so return Invalid.
2642- if (isa<ScalableVectorType>(RetTy) || any_of (Tys, [](const Type *Ty) {
2643- return isa<ScalableVectorType>(Ty);
2644- }))
2716+ if (any_of (concat<Type *const >(RetVTys, Tys),
2717+ [](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
26452718 return InstructionCost::getInvalid ();
26462719
2647- InstructionCost ScalarizationCost =
2648- SkipScalarizationCost
2649- ? ScalarizationCostPassed
2650- : getScalarizationOverhead (RetVTy, /* Insert*/ true ,
2651- /* Extract*/ false , CostKind);
2720+ InstructionCost ScalarizationCost = ScalarizationCostPassed;
2721+ if (!SkipScalarizationCost) {
2722+ ScalarizationCost = 0 ;
2723+ for (Type *RetVTy : RetVTys) {
2724+ ScalarizationCost += getScalarizationOverhead (
2725+ cast<VectorType>(RetVTy), /* Insert=*/ true ,
2726+ /* Extract=*/ false , CostKind);
2727+ }
2728+ }
26522729
2653- unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)-> getNumElements ();
2730+ unsigned ScalarCalls = getVectorizedTypeVF (RetTy). getFixedValue ();
26542731 SmallVector<Type *, 4 > ScalarTys;
26552732 for (Type *Ty : Tys) {
26562733 if (Ty->isVectorTy ())
26572734 Ty = Ty->getScalarType ();
26582735 ScalarTys.push_back (Ty);
26592736 }
2660- IntrinsicCostAttributes Attrs (IID, RetTy-> getScalarType ( ), ScalarTys, FMF);
2737+ IntrinsicCostAttributes Attrs (IID, toScalarizedTy (RetTy ), ScalarTys, FMF);
26612738 InstructionCost ScalarCost =
26622739 thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
26632740 for (Type *Ty : Tys) {
0 commit comments