@@ -903,6 +903,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
903903 ISD::FADD,
904904 ISD::FSUB,
905905 ISD::FDIV,
906+ ISD::FMUL,
906907 ISD::FMINNUM,
907908 ISD::FMAXNUM,
908909 ISD::FMINNUM_IEEE,
@@ -14595,6 +14596,70 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N,
1459514596 return SDValue();
1459614597}
1459714598
14599+ SDValue SITargetLowering::performFMulCombine(SDNode *N,
14600+ DAGCombinerInfo &DCI) const {
14601+ SelectionDAG &DAG = DCI.DAG;
14602+ EVT VT = N->getValueType(0);
14603+ EVT scalarVT = VT.getScalarType();
14604+ EVT IntVT = VT.changeElementType(MVT::i32);
14605+
14606+ SDLoc SL(N);
14607+ SDValue LHS = N->getOperand(0);
14608+ SDValue RHS = N->getOperand(1);
14609+
14610+ SDNodeFlags Flags = N->getFlags();
14611+ SDNodeFlags LHSFlags = LHS->getFlags();
14612+
14613+ // It is cheaper to realize i32 inline constants as compared against
14614+ // as materializing f16 or f64 (or even non-inline f32) values,
14615+ // possible via ldexp usage, as shown below :
14616+ //
14617+ // Given : A = 2^a & B = 2^b ; where a and b are integers.
14618+ // fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) )
14619+ // fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) )
14620+ if (scalarVT == MVT::f64 || scalarVT == MVT::f32 || scalarVT == MVT::f16) {
14621+ if (RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT) {
14622+ const ConstantFPSDNode *TrueNode =
14623+ isConstOrConstSplatFP(RHS.getOperand(1));
14624+ if (!TrueNode)
14625+ return SDValue();
14626+ const ConstantFPSDNode *FalseNode =
14627+ isConstOrConstSplatFP(RHS.getOperand(2));
14628+ if (!FalseNode)
14629+ return SDValue();
14630+
14631+ if (TrueNode->isNegative() != FalseNode->isNegative())
14632+ return SDValue();
14633+
14634+ // For f32, only non-inline constants should be transformed.
14635+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
14636+ if (scalarVT == MVT::f32 &&
14637+ TII->isInlineConstant(TrueNode->getValueAPF()) &&
14638+ TII->isInlineConstant(FalseNode->getValueAPF()))
14639+ return SDValue();
14640+
14641+ LHS = TrueNode->isNegative()
14642+ ? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHSFlags)
14643+ : LHS;
14644+
14645+ int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs();
14646+ if (TrueNodeExpVal == INT_MIN)
14647+ return SDValue();
14648+ int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs();
14649+ if (FalseNodeExpVal == INT_MIN)
14650+ return SDValue();
14651+
14652+ SDValue SelectNode =
14653+ DAG.getNode(ISD::SELECT, SL, IntVT, RHS.getOperand(0),
14654+ DAG.getConstant(TrueNodeExpVal, SL, IntVT),
14655+ DAG.getConstant(FalseNodeExpVal, SL, IntVT));
14656+ return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, Flags);
14657+ }
14658+ }
14659+
14660+ return SDValue();
14661+ }
14662+
1459814663SDValue SITargetLowering::performFMACombine(SDNode *N,
1459914664 DAGCombinerInfo &DCI) const {
1460014665 SelectionDAG &DAG = DCI.DAG;
@@ -14881,6 +14946,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1488114946 return performFSubCombine(N, DCI);
1488214947 case ISD::FDIV:
1488314948 return performFDivCombine(N, DCI);
14949+ case ISD::FMUL:
14950+ return performFMulCombine(N, DCI);
1488414951 case ISD::SETCC:
1488514952 return performSetCCCombine(N, DCI);
1488614953 case ISD::FMAXNUM:
0 commit comments