@@ -918,6 +918,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
918
918
ISD::FADD,
919
919
ISD::FSUB,
920
920
ISD::FDIV,
921
+ ISD::FMUL,
921
922
ISD::FMINNUM,
922
923
ISD::FMAXNUM,
923
924
ISD::FMINNUM_IEEE,
@@ -14764,6 +14765,66 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N,
14764
14765
return SDValue();
14765
14766
}
14766
14767
14768
+ SDValue SITargetLowering::performFMulCombine(SDNode *N,
14769
+ DAGCombinerInfo &DCI) const {
14770
+ SelectionDAG &DAG = DCI.DAG;
14771
+ EVT VT = N->getValueType(0);
14772
+ EVT ScalarVT = VT.getScalarType();
14773
+ EVT IntVT = VT.changeElementType(MVT::i32);
14774
+
14775
+ SDValue LHS = N->getOperand(0);
14776
+ SDValue RHS = N->getOperand(1);
14777
+
14778
+ // It is cheaper to realize i32 inline constants as compared against
14779
+ // materializing f16 or f64 (or even non-inline f32) values,
14780
+ // possible via ldexp usage, as shown below :
14781
+ //
14782
+ // Given : A = 2^a & B = 2^b ; where a and b are integers.
14783
+ // fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) )
14784
+ // fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) )
14785
+ if ((ScalarVT == MVT::f64 || ScalarVT == MVT::f32 || ScalarVT == MVT::f16) &&
14786
+ (RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT)) {
14787
+ const ConstantFPSDNode *TrueNode = isConstOrConstSplatFP(RHS.getOperand(1));
14788
+ if (!TrueNode)
14789
+ return SDValue();
14790
+ const ConstantFPSDNode *FalseNode =
14791
+ isConstOrConstSplatFP(RHS.getOperand(2));
14792
+ if (!FalseNode)
14793
+ return SDValue();
14794
+
14795
+ if (TrueNode->isNegative() != FalseNode->isNegative())
14796
+ return SDValue();
14797
+
14798
+ // For f32, only non-inline constants should be transformed.
14799
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
14800
+ if (ScalarVT == MVT::f32 &&
14801
+ TII->isInlineConstant(TrueNode->getValueAPF()) &&
14802
+ TII->isInlineConstant(FalseNode->getValueAPF()))
14803
+ return SDValue();
14804
+
14805
+ int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs();
14806
+ if (TrueNodeExpVal == INT_MIN)
14807
+ return SDValue();
14808
+ int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs();
14809
+ if (FalseNodeExpVal == INT_MIN)
14810
+ return SDValue();
14811
+
14812
+ SDLoc SL(N);
14813
+ SDValue SelectNode = DAG.getNode(
14814
+ ISD::SELECT, SL, IntVT, RHS.getOperand(0),
14815
+ DAG.getSignedConstant(TrueNodeExpVal, SL, IntVT, false, false),
14816
+ DAG.getSignedConstant(FalseNodeExpVal, SL, IntVT, false, false));
14817
+
14818
+ LHS = TrueNode->isNegative()
14819
+ ? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHS->getFlags())
14820
+ : LHS;
14821
+
14822
+ return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, N->getFlags());
14823
+ }
14824
+
14825
+ return SDValue();
14826
+ }
14827
+
14767
14828
SDValue SITargetLowering::performFMACombine(SDNode *N,
14768
14829
DAGCombinerInfo &DCI) const {
14769
14830
SelectionDAG &DAG = DCI.DAG;
@@ -15029,6 +15090,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
15029
15090
return performFSubCombine(N, DCI);
15030
15091
case ISD::FDIV:
15031
15092
return performFDivCombine(N, DCI);
15093
+ case ISD::FMUL:
15094
+ return performFMulCombine(N, DCI);
15032
15095
case ISD::SETCC:
15033
15096
return performSetCCCombine(N, DCI);
15034
15097
case ISD::FMAXNUM:
0 commit comments