@@ -286,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
286286 return false ;
287287 }
288288
289+ // / Several intrinsics struct-ret (including llvm.sincos[pi] and llvm.modf)
290+ // / can be lowered to a vector library call (for certain VFs). The vector
291+ // / library functions correspond to the scalar calls (e.g. sincos or modf),
292+ // / which unlike the intrinsic return values via output pointers. This helper
293+ // / checks if a vector call exists for the given intrinsic, and returns the
294+ // / cost, which includes the cost of the mask (if required), and the loads for
295+ // / values returned via output pointers. \p LC is the scalar libcall and
296+ // / \p CallRetElementIndex (optional) is the struct element which is mapped to
297+ // / the call return value. If std::nullopt is returned, the no vector library
298+ // / call is available, so the intrinsic should be assigned the default cost
299+ // / (e.g. scalarization).
300+ std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost (
301+ const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
302+ RTLIB::Libcall LC, std::optional<unsigned > CallRetElementIndex = {}) {
303+ Type *RetTy = ICA.getReturnType ();
304+ // Vector variants of the intrinsic can be mapped to a vector library call.
305+ auto const *LibInfo = ICA.getLibInfo ();
306+ if (!LibInfo || !isa<StructType>(RetTy) ||
307+ !isVectorizedStructTy (cast<StructType>(RetTy)))
308+ return std::nullopt ;
309+
310+ // Find associated libcall.
311+ const char *LCName = getTLI ()->getLibcallName (LC);
312+ if (!LC || !LCName)
313+ return std::nullopt ;
314+
315+ // Search for a corresponding vector variant.
316+ LLVMContext &Ctx = RetTy->getContext ();
317+ ElementCount VF = getVectorizedTypeVF (RetTy);
318+ VecDesc const *VD = nullptr ;
319+ for (bool Masked : {false , true }) {
320+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
321+ break ;
322+ }
323+ if (!VD)
324+ return std::nullopt ;
325+
326+ // Cost the call + mask.
327+ auto Cost =
328+ thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
329+ if (VD->isMasked ())
330+ Cost += thisT ()->getShuffleCost (
331+ TargetTransformInfo::SK_Broadcast,
332+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
333+ nullptr , {});
334+
335+ // Lowering to a library call (with output pointers) may require us to emit
336+ // reloads for the results.
337+ for (auto [Idx, VectorTy] : enumerate(getContainedTypes (RetTy))) {
338+ if (Idx == CallRetElementIndex)
339+ continue ;
340+ Cost += thisT ()->getMemoryOpCost (
341+ Instruction::Load, VectorTy,
342+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind);
343+ }
344+ return Cost;
345+ }
346+
289347protected:
290348 explicit BasicTTIImplBase (const TargetMachine *TM, const DataLayout &DL)
291349 : BaseT(DL) {}
@@ -1999,47 +2057,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19992057 case Intrinsic::experimental_vector_match:
20002058 return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
20012059 case Intrinsic::sincos: {
2002- // Vector variants of llvm.sincos can be mapped to a vector library call.
2003- auto const *LibInfo = ICA.getLibInfo ();
2004- if (!LibInfo || !isVectorizedTy (RetTy))
2005- break ;
2006-
2007- // Find associated libcall.
2008- VectorType *VectorTy = cast<VectorType>(getContainedTypes (RetTy).front ());
2009- EVT VT = getTLI ()->getValueType (DL, VectorTy);
2010- RTLIB::Libcall LC = RTLIB::getSINCOS (VT.getVectorElementType ());
2011- const char *LCName = getTLI ()->getLibcallName (LC);
2012- if (!LC || !LCName)
2013- break ;
2014-
2015- // Search for a corresponding vector variant.
2016- LLVMContext &Ctx = RetTy->getContext ();
2017- auto VF = getVectorizedTypeVF (RetTy);
2018- VecDesc const *VD = nullptr ;
2019- for (bool Masked : {false , true }) {
2020- if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
2021- break ;
2022- }
2023- if (!VD)
2024- break ;
2025-
2026- // Cost the call + mask.
2027- auto Cost = thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (),
2028- CostKind);
2029- if (VD->isMasked ())
2030- Cost += thisT ()->getShuffleCost (
2031- TargetTransformInfo::SK_Broadcast,
2032- VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
2033- nullptr , {});
2034-
2035- // Lowering to a sincos library call (with output pointers) may require us
2036- // to emit reloads for the results.
2037- Cost +=
2038- thisT ()->getMemoryOpCost (
2039- Instruction::Load, VectorTy,
2040- thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind) *
2041- 2 ;
2042- return Cost;
2060+ Type *Ty = getContainedTypes (RetTy).front ();
2061+ EVT VT = getTLI ()->getValueType (DL, Ty);
2062+ RTLIB::Libcall LC = RTLIB::getSINCOS (VT.getScalarType ());
2063+ if (auto Cost =
2064+ getMultipleResultIntrinsicVectorLibCallCost (ICA, CostKind, LC))
2065+ return *Cost;
2066+ // Otherwise, fallback to default scalarization cost.
2067+ break ;
20432068 }
20442069 }
20452070
0 commit comments