diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 81852f6a13058..4cbb3898f4187 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -891,6 +891,37 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { return true; } +bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget->hasFullFP16(); + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + +bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, + Type *Ty) const { + switch (Ty->getScalarType()->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + return false; + } +} + bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { switch (N->getOpcode()) { case ISD::EntryToken: @@ -1000,6 +1031,33 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { return DestSize < SrcSize && DestSize % 32 == 0; } +/// Check if it is profitable to hoist instruction in then/else to if. +/// Not profitable if I and it's user can form a FMA instruction +/// because we prefer FMSUB/FMADD. +bool AMDGPUTargetLowering::isProfitableToHoist(Instruction *I) const { + if (I->getOpcode() != Instruction::FMul) + return true; + + if (!I->hasOneUse()) + return true; + + Instruction *User = I->user_back(); + + if (!(User->getOpcode() == Instruction::FSub || + User->getOpcode() == Instruction::FAdd)) + return true; + + const TargetOptions &Options = getTargetMachine().Options; + const Function *F = I->getFunction(); + const DataLayout &DL = F->getDataLayout(); + Type *Ty = User->getOperand(0)->getType(); + + return !( + isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); +} + bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { unsigned SrcSize = Src->getScalarSizeInBits(); unsigned DestSize = Dest->getScalarSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 18b5c388f3293..9e5b23d0126ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -193,6 +193,8 @@ class AMDGPUTargetLowering : public TargetLowering { bool isTruncateFree(EVT Src, EVT Dest) const override; bool isTruncateFree(Type *Src, Type *Dest) const override; + bool isProfitableToHoist(Instruction *I) const override; + bool isZExtFree(Type *Src, Type *Dest) const override; bool isZExtFree(EVT Src, EVT Dest) const override; @@ -229,6 +231,13 @@ class AMDGPUTargetLowering : public TargetLowering { bool isCheapToSpeculateCttz(Type *Ty) const override; bool isCheapToSpeculateCtlz(Type *Ty) const override; + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; + bool isSDNodeAlwaysUniform(const SDNode *N) const override; // FIXME: This hook should not exist