Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,17 @@ class IntrinsicCostAttributes {
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
// arguments and the return value will be computed based on types.
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
/* Downstream change: #87 (sincos vectorization)*/
TargetLibraryInfo const *LibInfo = nullptr;
/* End downstream change: #87 */

public:
/* Downstream change: #87 (sincos vectorization)*/
IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI,
InstructionCost ScalarCost = InstructionCost::getInvalid(),
bool TypeBasedOnly = false);
bool TypeBasedOnly = false, TargetLibraryInfo const *LibInfo = nullptr);
/* End downstream change: #87 */

IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
Expand All @@ -141,11 +146,14 @@ class IntrinsicCostAttributes {
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args);

/* Downstream change: #87 (sincos vectorization)*/
IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
const IntrinsicInst *I = nullptr,
InstructionCost ScalarCost = InstructionCost::getInvalid());
InstructionCost ScalarCost = InstructionCost::getInvalid(),
TargetLibraryInfo const *LibInfo = nullptr);
/* End downstream change: #87 */

Intrinsic::ID getID() const { return IID; }
const IntrinsicInst *getInst() const { return II; }
Expand All @@ -155,6 +163,10 @@ class IntrinsicCostAttributes {
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }

/* Downstream change: #87 (sincos vectorization)*/
const TargetLibraryInfo *getLibInfo() const { return LibInfo; }
/* End downstream change: #87 */

bool isTypeBasedOnly() const {
return Arguments.empty();
}
Expand Down
123 changes: 105 additions & 18 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/Analysis/ValueTracking.h"
Expand Down Expand Up @@ -285,6 +286,66 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return false;
}

/* Downstream change: #87 (sincos vectorization)*/
/// Several intrinsics that return structs (including llvm.sincos[pi] and
/// llvm.modf) can be lowered to a vector library call (for certain VFs). The
/// vector library functions correspond to the scalar calls (e.g. sincos or
/// modf), which unlike the intrinsic return values via output pointers. This
/// helper checks if a vector call exists for the given intrinsic, and returns
/// the cost, which includes the cost of the mask (if required), and the loads
/// for values returned via output pointers. \p LC is the scalar libcall and
/// \p CallRetElementIndex (optional) is the struct element which is mapped to
/// the call return value. If std::nullopt is returned, then no vector library
/// call is available, so the intrinsic should be assigned the default cost
/// (e.g. scalarization).
std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
RTLIB::Libcall LC, std::optional<unsigned> CallRetElementIndex = {}) {
Type *RetTy = ICA.getReturnType();
// Vector variants of the intrinsic can be mapped to a vector library call.
auto const *LibInfo = ICA.getLibInfo();
if (!LibInfo || !isa<StructType>(RetTy) ||
!isVectorizedStructTy(cast<StructType>(RetTy)))
return std::nullopt;

// Find associated libcall.
const char *LCName = getTLI()->getLibcallName(LC);
if (!LCName)
return std::nullopt;

// Search for a corresponding vector variant.
LLVMContext &Ctx = RetTy->getContext();
ElementCount VF = getVectorizedTypeVF(RetTy);
VecDesc const *VD = nullptr;
for (bool Masked : {false, true}) {
if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
break;
}
if (!VD)
return std::nullopt;

// Cost the call + mask.
auto Cost =
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
if (VD->isMasked())
Cost += thisT()->getShuffleCost(
TargetTransformInfo::SK_Broadcast,
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
nullptr, {});

// Lowering to a library call (with output pointers) may require us to emit
// reloads for the results.
for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {
if (Idx == CallRetElementIndex)
continue;
Cost += thisT()->getMemoryOpCost(
Instruction::Load, VectorTy,
thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind);
}
return Cost;
}
/* End downstream change: #87 */

protected:
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
: BaseT(DL) {}
Expand Down Expand Up @@ -1716,9 +1777,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {

Type *RetTy = ICA.getReturnType();

ElementCount RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
: ElementCount::getFixed(1));
/* Downstream change: #87 (sincos vectorization)*/
ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy)
: ElementCount::getFixed(1);
/* End downstream change: #87 */

const IntrinsicInst *I = ICA.getInst();
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
FastMathFlags FMF = ICA.getFlags();
Expand Down Expand Up @@ -1971,6 +2034,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::experimental_vector_match:
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
/* Downstream change: #87 (sincos vectorization)*/
case Intrinsic::sincos: {
Type *Ty = getContainedTypes(RetTy).front();
EVT VT = getTLI()->getValueType(DL, Ty);
RTLIB::Libcall LC = RTLIB::getFSINCOS(VT.getScalarType());
if (auto Cost =
getMultipleResultIntrinsicVectorLibCallCost(ICA, CostKind, LC))
return *Cost;
// Otherwise, fallback to default scalarization cost.
break;
}
/* End downstream change: #87 */
}

// Assume that we need to scalarize this intrinsic.)
Expand All @@ -1979,10 +2054,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
if (RetVF.isVector() && !RetVF.isScalable()) {
ScalarizationCost = 0;
if (!RetTy->isVoidTy())
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(RetTy),
/*Insert*/ true, /*Extract*/ false, CostKind);
/* Downstream change: #87 (sincos vectorization)*/
if (!RetTy->isVoidTy()) {
for (Type *VectorTy : getContainedTypes(RetTy)) {
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(VectorTy),
/*Insert=*/true, /*Extract=*/false, CostKind);
}
}
/* End downstream change: #87 */
ScalarizationCost +=
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
}
Expand Down Expand Up @@ -2637,27 +2717,34 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
/* Downstream change: #87 (sincos vectorization)*/
if (isVectorizedTy(RetTy)) {
ArrayRef<Type *> RetVTys = getContainedTypes(RetTy);

// Scalable vectors cannot be scalarized, so return Invalid.
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
return isa<ScalableVectorType>(Ty);
}))
if (any_of(concat<Type *const>(RetVTys, Tys),
[](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
return InstructionCost::getInvalid();

InstructionCost ScalarizationCost =
SkipScalarizationCost
? ScalarizationCostPassed
: getScalarizationOverhead(RetVTy, /*Insert*/ true,
/*Extract*/ false, CostKind);
InstructionCost ScalarizationCost = ScalarizationCostPassed;
if (!SkipScalarizationCost) {
ScalarizationCost = 0;
for (Type *RetVTy : RetVTys) {
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(RetVTy), /*Insert=*/true,
/*Extract=*/false, CostKind);
}
}

unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue();
SmallVector<Type *, 4> ScalarTys;
for (Type *Ty : Tys) {
if (Ty->isVectorTy())
Ty = Ty->getScalarType();
ScalarTys.push_back(Ty);
}
IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF);
/* End downstream change: #87 */
InstructionCost ScalarCost =
thisT()->getIntrinsicInstrCost(Attrs, CostKind);
for (Type *Ty : Tys) {
Expand Down
23 changes: 18 additions & 5 deletions llvm/lib/Analysis/CostModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/CostModel.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
Expand All @@ -42,25 +44,36 @@ static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost",
cl::desc("Calculate intrinsics cost based only on argument types"),
cl::init(false));

/* Downstream change: #87 (sincos vectorization)*/
static cl::opt<bool> PreferIntrinsicCost(
"prefer-intrinsic-cost",
cl::desc("Prefer using getIntrinsicInstrCost over getInstructionCost"),
cl::init(false));
/* End downstream change: #87 */

#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME

PreservedAnalyses CostModelPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
// Downstream change: #87 (sincos vectorization)
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n";
for (BasicBlock &B : F) {
for (Instruction &Inst : B) {
// TODO: Use a pass parameter instead of cl::opt CostKind to determine
// which cost kind to print.
InstructionCost Cost;
auto *II = dyn_cast<IntrinsicInst>(&Inst);
if (II && TypeBasedIntrinsicCost) {
IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
InstructionCost::getInvalid(), true);
/* Downstream change: #87 (sincos vectorization)*/
if (II && (PreferIntrinsicCost || TypeBasedIntrinsicCost)) {
IntrinsicCostAttributes ICA(
II->getIntrinsicID(), *II, InstructionCost::getInvalid(),
/*TypeBasedOnly=*/TypeBasedIntrinsicCost, &TLI);
Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
}
else {
} else {
/* End downstream change: #87 */
Cost = TTI.getInstructionCost(&Inst, CostKind);
}

Expand Down
21 changes: 12 additions & 9 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
return true;
}

/* Downstream change: #87 (sincos vectorization)*/
IntrinsicCostAttributes::IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
bool TypeBasedOnly)
bool TypeBasedOnly, const TargetLibraryInfo *LibInfo)
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
ScalarizationCost(ScalarizationCost) {
ScalarizationCost(ScalarizationCost), LibInfo(LibInfo) {
/* End downstream change: #87 */

if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
Expand Down Expand Up @@ -101,13 +103,14 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ParamTys.push_back(Argument->getType());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys,
FastMathFlags Flags,
const IntrinsicInst *I,
InstructionCost ScalarCost)
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
/* Downstream change: #87 (sincos vectorization)*/
IntrinsicCostAttributes::IntrinsicCostAttributes(
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys, FastMathFlags Flags, const IntrinsicInst *I,
InstructionCost ScalarCost, TargetLibraryInfo const *LibInfo)
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost),
LibInfo(LibInfo) {
/* End downstream change: #87 */
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::atan2:
case Intrinsic::sin:
case Intrinsic::cos:
// Downstream change: #87 (sincos vectorization)
case Intrinsic::sincos:
case Intrinsic::tan:
case Intrinsic::sinh:
case Intrinsic::cosh:
Expand Down Expand Up @@ -179,6 +181,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::ucmp:
case Intrinsic::scmp:
return OpdIdx == -1 || OpdIdx == 0;
// Downstream change: #87 (sincos vectorization)
case Intrinsic::sincos:
case Intrinsic::is_fpclass:
case Intrinsic::vp_is_fpclass:
return OpdIdx == 0;
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2884,8 +2884,11 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
std::back_inserter(ParamTys),
[&](Type *Ty) { return maybeVectorizeType(Ty, VF); });

/* Downstream change: #87 (sincos vectorization)*/
IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF,
dyn_cast<IntrinsicInst>(CI));
dyn_cast<IntrinsicInst>(CI),
InstructionCost::getInvalid(), TLI);
/* End downstream change: #87 */
return TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
}

Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1149,9 +1149,12 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,

// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
/* Downstream change: #87 (sincos vectorization)*/
IntrinsicCostAttributes CostAttrs(
VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()),
InstructionCost::getInvalid(), &Ctx.TLI);
/* End downstream change: #87 */
return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
}

Expand Down
Loading