Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,37 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return true;
}

bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();

if (!VT.isSimple())
return false;

switch (VT.getSimpleVT().SimpleTy) {
case MVT::f16:
return Subtarget->hasFullFP16();
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}

return false;
}

bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
switch (Ty->getScalarType()->getTypeID()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should have the same logic as the EVT variant, this is the aarch64 logic

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also will probably be easier to move this down to SITargetLowering

case Type::FloatTyID:
case Type::DoubleTyID:
return true;
default:
return false;
}
}

bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
switch (N->getOpcode()) {
case ISD::EntryToken:
Expand Down Expand Up @@ -1000,6 +1031,33 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
return DestSize < SrcSize && DestSize % 32 == 0;
}

/// Check if it is profitable to hoist instruction in then/else to if.
/// Not profitable if I and it's user can form a FMA instruction
/// because we prefer FMSUB/FMADD.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments copied from AArch64

bool AMDGPUTargetLowering::isProfitableToHoist(Instruction *I) const {
if (I->getOpcode() != Instruction::FMul)
return true;

if (!I->hasOneUse())
return true;

Instruction *User = I->user_back();

if (!(User->getOpcode() == Instruction::FSub ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Push negate through parentheses

User->getOpcode() == Instruction::FAdd))
return true;

const TargetOptions &Options = getTargetMachine().Options;
const Function *F = I->getFunction();
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();

return !(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same, demorgan this. Should also check legality first

isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
}

bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
unsigned SrcSize = Src->getScalarSizeInBits();
unsigned DestSize = Dest->getScalarSizeInBits();
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isTruncateFree(EVT Src, EVT Dest) const override;
bool isTruncateFree(Type *Src, Type *Dest) const override;

bool isProfitableToHoist(Instruction *I) const override;

bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;

Expand Down Expand Up @@ -229,6 +231,13 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isCheapToSpeculateCttz(Type *Ty) const override;
bool isCheapToSpeculateCtlz(Type *Ty) const override;

/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;

bool isSDNodeAlwaysUniform(const SDNode *N) const override;

// FIXME: This hook should not exist
Expand Down