Skip to content

Commit 92d4337

Browse files
committed
[llvm][AMDGPU] Implemented isProfitableToHoist and
isFMAFasterThanFMulAndFAdd Signed-off-by: Kushal Pal <[email protected]>
1 parent 1825cf2 commit 92d4337

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,37 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
891891
return true;
892892
}
893893

894+
bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
895+
EVT VT) const {
896+
VT = VT.getScalarType();
897+
898+
if (!VT.isSimple())
899+
return false;
900+
901+
switch (VT.getSimpleVT().SimpleTy) {
902+
case MVT::f16:
903+
return Subtarget->hasFullFP16();
904+
case MVT::f32:
905+
case MVT::f64:
906+
return true;
907+
default:
908+
break;
909+
}
910+
911+
return false;
912+
}
913+
914+
bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
915+
Type *Ty) const {
916+
switch (Ty->getScalarType()->getTypeID()) {
917+
case Type::FloatTyID:
918+
case Type::DoubleTyID:
919+
return true;
920+
default:
921+
return false;
922+
}
923+
}
924+
894925
bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
895926
switch (N->getOpcode()) {
896927
case ISD::EntryToken:
@@ -1000,6 +1031,33 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
10001031
return DestSize < SrcSize && DestSize % 32 == 0;
10011032
}
10021033

1034+
/// Check if it is profitable to hoist instruction in then/else to if.
1035+
/// Not profitable if I and it's user can form a FMA instruction
1036+
/// because we prefer FMSUB/FMADD.
1037+
bool AMDGPUTargetLowering::isProfitableToHoist(Instruction *I) const {
1038+
if (I->getOpcode() != Instruction::FMul)
1039+
return true;
1040+
1041+
if (!I->hasOneUse())
1042+
return true;
1043+
1044+
Instruction *User = I->user_back();
1045+
1046+
if (!(User->getOpcode() == Instruction::FSub ||
1047+
User->getOpcode() == Instruction::FAdd))
1048+
return true;
1049+
1050+
const TargetOptions &Options = getTargetMachine().Options;
1051+
const Function *F = I->getFunction();
1052+
const DataLayout &DL = F->getDataLayout();
1053+
Type *Ty = User->getOperand(0)->getType();
1054+
1055+
return !(
1056+
isFMAFasterThanFMulAndFAdd(*F, Ty) &&
1057+
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
1058+
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
1059+
}
1060+
10031061
bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
10041062
unsigned SrcSize = Src->getScalarSizeInBits();
10051063
unsigned DestSize = Dest->getScalarSizeInBits();

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ class AMDGPUTargetLowering : public TargetLowering {
193193
bool isTruncateFree(EVT Src, EVT Dest) const override;
194194
bool isTruncateFree(Type *Src, Type *Dest) const override;
195195

196+
bool isProfitableToHoist(Instruction *I) const override;
197+
196198
bool isZExtFree(Type *Src, Type *Dest) const override;
197199
bool isZExtFree(EVT Src, EVT Dest) const override;
198200

@@ -229,6 +231,13 @@ class AMDGPUTargetLowering : public TargetLowering {
229231
bool isCheapToSpeculateCttz(Type *Ty) const override;
230232
bool isCheapToSpeculateCtlz(Type *Ty) const override;
231233

234+
/// Return true if an FMA operation is faster than a pair of fmul and fadd
235+
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
236+
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
237+
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
238+
EVT VT) const override;
239+
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
240+
232241
bool isSDNodeAlwaysUniform(const SDNode *N) const override;
233242

234243
// FIXME: This hook should not exist

0 commit comments

Comments
 (0)