Skip to content

Commit ba7858c

Browse files
committed
[CodeGen] [AMDGPU] Attempt DAGCombine for fmul with select to ldexp (llvm#111109)
The materialization cost of 32-bit non-inline in case of fmul is quite relatively more, rather than if possible to combine it into ldexp instruction for specific scenarios (for datatypes like f64, f32 and f16) as this is being handled here : The dag combine for any pair of select values which are exact exponent of 2. ``` fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b)) fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b)) where, A=2^a & B=2^b ; a and b are integers. ``` This dagCombine is handled separately in fmulCombine (newly defined in SIIselLowering), targeting fmul fusing it with select type operand into ldexp. Thus, it fixes llvm#104900. Change-Id: I1e76049d8de218329efac4c62ee3c52cd824258c
1 parent 5573625 commit ba7858c

14 files changed

+4237
-2943
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -913,6 +913,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
913913
ISD::FADD,
914914
ISD::FSUB,
915915
ISD::FDIV,
916+
ISD::FMUL,
916917
ISD::FMINNUM,
917918
ISD::FMAXNUM,
918919
ISD::FMINNUM_IEEE,
@@ -14729,6 +14730,66 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N,
1472914730
return SDValue();
1473014731
}
1473114732

14733+
SDValue SITargetLowering::performFMulCombine(SDNode *N,
14734+
DAGCombinerInfo &DCI) const {
14735+
SelectionDAG &DAG = DCI.DAG;
14736+
EVT VT = N->getValueType(0);
14737+
EVT ScalarVT = VT.getScalarType();
14738+
EVT IntVT = VT.changeElementType(MVT::i32);
14739+
14740+
SDValue LHS = N->getOperand(0);
14741+
SDValue RHS = N->getOperand(1);
14742+
14743+
// It is cheaper to realize i32 inline constants as compared against
14744+
// materializing f16 or f64 (or even non-inline f32) values,
14745+
// possible via ldexp usage, as shown below :
14746+
//
14747+
// Given : A = 2^a & B = 2^b ; where a and b are integers.
14748+
// fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) )
14749+
// fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) )
14750+
if ((ScalarVT == MVT::f64 || ScalarVT == MVT::f32 || ScalarVT == MVT::f16) &&
14751+
(RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT)) {
14752+
const ConstantFPSDNode *TrueNode = isConstOrConstSplatFP(RHS.getOperand(1));
14753+
if (!TrueNode)
14754+
return SDValue();
14755+
const ConstantFPSDNode *FalseNode =
14756+
isConstOrConstSplatFP(RHS.getOperand(2));
14757+
if (!FalseNode)
14758+
return SDValue();
14759+
14760+
if (TrueNode->isNegative() != FalseNode->isNegative())
14761+
return SDValue();
14762+
14763+
// For f32, only non-inline constants should be transformed.
14764+
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
14765+
if (ScalarVT == MVT::f32 &&
14766+
TII->isInlineConstant(TrueNode->getValueAPF()) &&
14767+
TII->isInlineConstant(FalseNode->getValueAPF()))
14768+
return SDValue();
14769+
14770+
int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs();
14771+
if (TrueNodeExpVal == INT_MIN)
14772+
return SDValue();
14773+
int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs();
14774+
if (FalseNodeExpVal == INT_MIN)
14775+
return SDValue();
14776+
14777+
SDLoc SL(N);
14778+
SDValue SelectNode = DAG.getNode(
14779+
ISD::SELECT, SL, IntVT, RHS.getOperand(0),
14780+
DAG.getSignedConstant(TrueNodeExpVal, SL, IntVT, false, false),
14781+
DAG.getSignedConstant(FalseNodeExpVal, SL, IntVT, false, false));
14782+
14783+
LHS = TrueNode->isNegative()
14784+
? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHS->getFlags())
14785+
: LHS;
14786+
14787+
return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, N->getFlags());
14788+
}
14789+
14790+
return SDValue();
14791+
}
14792+
1473214793
SDValue SITargetLowering::performFMACombine(SDNode *N,
1473314794
DAGCombinerInfo &DCI) const {
1473414795
SelectionDAG &DAG = DCI.DAG;
@@ -14994,6 +15055,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1499415055
return performFSubCombine(N, DCI);
1499515056
case ISD::FDIV:
1499615057
return performFDivCombine(N, DCI);
15058+
case ISD::FMUL:
15059+
return performFMulCombine(N, DCI);
1499715060
case ISD::SETCC:
1499815061
return performSetCCCombine(N, DCI);
1499915062
case ISD::FMAXNUM:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
217217
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
218218
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
219219
SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220+
SDValue performFMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220221
SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const;
221222
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
222223
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;

0 commit comments

Comments
 (0)