2222#include " llvm/ADT/SmallVector.h"
2323#include " llvm/Analysis/LoopInfo.h"
2424#include " llvm/Analysis/OptimizationRemarkEmitter.h"
25+ #include " llvm/Analysis/TargetLibraryInfo.h"
2526#include " llvm/Analysis/TargetTransformInfo.h"
2627#include " llvm/Analysis/TargetTransformInfoImpl.h"
2728#include " llvm/Analysis/ValueTracking.h"
@@ -285,6 +286,66 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
285286 return false ;
286287 }
287288
289+ /* Downstream change: #87 (sincos vectorization)*/
290+ // / Several intrinsics that return structs (including llvm.sincos[pi] and
291+ // / llvm.modf) can be lowered to a vector library call (for certain VFs). The
292+ // / vector library functions correspond to the scalar calls (e.g. sincos or
293+ // / modf), which unlike the intrinsic return values via output pointers. This
294+ // / helper checks if a vector call exists for the given intrinsic, and returns
295+ // / the cost, which includes the cost of the mask (if required), and the loads
296+ // / for values returned via output pointers. \p LC is the scalar libcall and
297+ // / \p CallRetElementIndex (optional) is the struct element which is mapped to
298+ // / the call return value. If std::nullopt is returned, then no vector library
299+ // / call is available, so the intrinsic should be assigned the default cost
300+ // / (e.g. scalarization).
301+ std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost (
302+ const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
303+ RTLIB::Libcall LC, std::optional<unsigned > CallRetElementIndex = {}) {
304+ Type *RetTy = ICA.getReturnType ();
305+ // Vector variants of the intrinsic can be mapped to a vector library call.
306+ auto const *LibInfo = ICA.getLibInfo ();
307+ if (!LibInfo || !isa<StructType>(RetTy) ||
308+ !isVectorizedStructTy (cast<StructType>(RetTy)))
309+ return std::nullopt ;
310+
311+ // Find associated libcall.
312+ const char *LCName = getTLI ()->getLibcallName (LC);
313+ if (!LCName)
314+ return std::nullopt ;
315+
316+ // Search for a corresponding vector variant.
317+ LLVMContext &Ctx = RetTy->getContext ();
318+ ElementCount VF = getVectorizedTypeVF (RetTy);
319+ VecDesc const *VD = nullptr ;
320+ for (bool Masked : {false , true }) {
321+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
322+ break ;
323+ }
324+ if (!VD)
325+ return std::nullopt ;
326+
327+ // Cost the call + mask.
328+ auto Cost =
329+ thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
330+ if (VD->isMasked ())
331+ Cost += thisT ()->getShuffleCost (
332+ TargetTransformInfo::SK_Broadcast,
333+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
334+ nullptr , {});
335+
336+ // Lowering to a library call (with output pointers) may require us to emit
337+ // reloads for the results.
338+ for (auto [Idx, VectorTy] : enumerate(getContainedTypes (RetTy))) {
339+ if (Idx == CallRetElementIndex)
340+ continue ;
341+ Cost += thisT ()->getMemoryOpCost (
342+ Instruction::Load, VectorTy,
343+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind);
344+ }
345+ return Cost;
346+ }
347+ /* End downstream change: #87 */
348+
288349protected:
289350 explicit BasicTTIImplBase (const TargetMachine *TM, const DataLayout &DL)
290351 : BaseT(DL) {}
@@ -1716,9 +1777,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17161777
17171778 Type *RetTy = ICA.getReturnType ();
17181779
1719- ElementCount RetVF =
1720- (RetTy->isVectorTy () ? cast<VectorType>(RetTy)->getElementCount ()
1721- : ElementCount::getFixed (1 ));
1780+ /* Downstream change: #87 (sincos vectorization)*/
1781+ ElementCount RetVF = isVectorizedTy (RetTy) ? getVectorizedTypeVF (RetTy)
1782+ : ElementCount::getFixed (1 );
1783+ /* End downstream change: #87 */
1784+
17221785 const IntrinsicInst *I = ICA.getInst ();
17231786 const SmallVectorImpl<const Value *> &Args = ICA.getArgs ();
17241787 FastMathFlags FMF = ICA.getFlags ();
@@ -1971,6 +2034,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19712034 }
19722035 case Intrinsic::experimental_vector_match:
19732036 return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
2037+ /* Downstream change: #87 (sincos vectorization)*/
2038+ case Intrinsic::sincos: {
2039+ Type *Ty = getContainedTypes (RetTy).front ();
2040+ EVT VT = getTLI ()->getValueType (DL, Ty);
2041+ RTLIB::Libcall LC = RTLIB::getFSINCOS (VT.getScalarType ());
2042+ if (auto Cost =
2043+ getMultipleResultIntrinsicVectorLibCallCost (ICA, CostKind, LC))
2044+ return *Cost;
2045+ // Otherwise, fallback to default scalarization cost.
2046+ break ;
2047+ }
2048+ /* End downstream change: #87 */
19742049 }
19752050
19762051 // Assume that we need to scalarize this intrinsic.)
@@ -1979,10 +2054,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19792054 InstructionCost ScalarizationCost = InstructionCost::getInvalid ();
19802055 if (RetVF.isVector () && !RetVF.isScalable ()) {
19812056 ScalarizationCost = 0 ;
1982- if (!RetTy->isVoidTy ())
1983- ScalarizationCost += getScalarizationOverhead (
1984- cast<VectorType>(RetTy),
1985- /* Insert*/ true , /* Extract*/ false , CostKind);
2057+ /* Downstream change: #87 (sincos vectorization)*/
2058+ if (!RetTy->isVoidTy ()) {
2059+ for (Type *VectorTy : getContainedTypes (RetTy)) {
2060+ ScalarizationCost += getScalarizationOverhead (
2061+ cast<VectorType>(VectorTy),
2062+ /* Insert=*/ true , /* Extract=*/ false , CostKind);
2063+ }
2064+ }
2065+ /* End downstream change: #87 */
19862066 ScalarizationCost +=
19872067 getOperandsScalarizationOverhead (Args, ICA.getArgTypes (), CostKind);
19882068 }
@@ -2637,27 +2717,34 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
26372717 // Else, assume that we need to scalarize this intrinsic. For math builtins
26382718 // this will emit a costly libcall, adding call overhead and spills. Make it
26392719 // very expensive.
2640- if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2720+ /* Downstream change: #87 (sincos vectorization)*/
2721+ if (isVectorizedTy (RetTy)) {
2722+ ArrayRef<Type *> RetVTys = getContainedTypes (RetTy);
2723+
26412724 // Scalable vectors cannot be scalarized, so return Invalid.
2642- if (isa<ScalableVectorType>(RetTy) || any_of (Tys, [](const Type *Ty) {
2643- return isa<ScalableVectorType>(Ty);
2644- }))
2725+ if (any_of (concat<Type *const >(RetVTys, Tys),
2726+ [](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
26452727 return InstructionCost::getInvalid ();
26462728
2647- InstructionCost ScalarizationCost =
2648- SkipScalarizationCost
2649- ? ScalarizationCostPassed
2650- : getScalarizationOverhead (RetVTy, /* Insert*/ true ,
2651- /* Extract*/ false , CostKind);
2729+ InstructionCost ScalarizationCost = ScalarizationCostPassed;
2730+ if (!SkipScalarizationCost) {
2731+ ScalarizationCost = 0 ;
2732+ for (Type *RetVTy : RetVTys) {
2733+ ScalarizationCost += getScalarizationOverhead (
2734+ cast<VectorType>(RetVTy), /* Insert=*/ true ,
2735+ /* Extract=*/ false , CostKind);
2736+ }
2737+ }
26522738
2653- unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)-> getNumElements ();
2739+ unsigned ScalarCalls = getVectorizedTypeVF (RetTy). getFixedValue ();
26542740 SmallVector<Type *, 4 > ScalarTys;
26552741 for (Type *Ty : Tys) {
26562742 if (Ty->isVectorTy ())
26572743 Ty = Ty->getScalarType ();
26582744 ScalarTys.push_back (Ty);
26592745 }
2660- IntrinsicCostAttributes Attrs (IID, RetTy->getScalarType (), ScalarTys, FMF);
2746+ IntrinsicCostAttributes Attrs (IID, toScalarizedTy (RetTy), ScalarTys, FMF);
2747+ /* End downstream change: #87 */
26612748 InstructionCost ScalarCost =
26622749 thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
26632750 for (Type *Ty : Tys) {
0 commit comments