-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[llvm][AMDGPU] Implemented isProfitableToHoist and isFMAFasterThanFMulAndFAdd #108756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
isFMAFasterThanFMulAndFAdd Signed-off-by: Kushal Pal <[email protected]>
|
@llvm/pr-subscribers-backend-amdgpu Author: None (braw-lee) ChangesFixes issue #108751 Full diff: https://github.com/llvm/llvm-project/pull/108756.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 81852f6a130584..4cbb3898f4187b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -891,6 +891,37 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return true;
}
+bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget->hasFullFP16();
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+ Type *Ty) const {
+ switch (Ty->getScalarType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
switch (N->getOpcode()) {
case ISD::EntryToken:
@@ -1000,6 +1031,33 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
return DestSize < SrcSize && DestSize % 32 == 0;
}
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AMDGPUTargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (I->getOpcode() != Instruction::FMul)
+ return true;
+
+ if (!I->hasOneUse())
+ return true;
+
+ Instruction *User = I->user_back();
+
+ if (!(User->getOpcode() == Instruction::FSub ||
+ User->getOpcode() == Instruction::FAdd))
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
+
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+}
+
bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
unsigned SrcSize = Src->getScalarSizeInBits();
unsigned DestSize = Dest->getScalarSizeInBits();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 18b5c388f32932..9e5b23d0126ab1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -193,6 +193,8 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isTruncateFree(EVT Src, EVT Dest) const override;
bool isTruncateFree(Type *Src, Type *Dest) const override;
+ bool isProfitableToHoist(Instruction *I) const override;
+
bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;
@@ -229,6 +231,13 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isCheapToSpeculateCttz(Type *Ty) const override;
bool isCheapToSpeculateCtlz(Type *Ty) const override;
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+ /// returns true, otherwise fmuladd is expanded to fmul + fadd.
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
+
bool isSDNodeAlwaysUniform(const SDNode *N) const override;
// FIXME: This hook should not exist
|
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs tests
|
|
||
| /// Check if it is profitable to hoist instruction in then/else to if. | ||
| /// Not profitable if I and it's user can form a FMA instruction | ||
| /// because we prefer FMSUB/FMADD. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comments copied from AArch64
|
|
||
| Instruction *User = I->user_back(); | ||
|
|
||
| if (!(User->getOpcode() == Instruction::FSub || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Push negate through parentheses
| const DataLayout &DL = F->getDataLayout(); | ||
| Type *Ty = User->getOperand(0)->getType(); | ||
|
|
||
| return !( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same, demorgan this. Should also check legality first
|
|
||
| bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, | ||
| Type *Ty) const { | ||
| switch (Ty->getScalarType()->getTypeID()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should have the same logic as the EVT variant, this is the aarch64 logic
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also will probably be easier to move this down to SITargetLowering
|
@braw-lee are you still working on this ? If not, let us know. Thanks ! |
|
Fixed by #121465 due to inactivity on this PR. Closing it now. |
Fixes #108751