Move fma to selection stage

peterbell10 · peterbell10 · commit f63436134fc3 · 2025-01-09T18:44:58.000Z
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5325,23 +5325,6 @@ class TargetLowering : public TargetLoweringBase {
                            SDNodeFlags Flags, const SDLoc &DL,
                            SelectionDAG &DAG) const;
 
-  /// Expand floating point add
-  /// \param N Node to expand
-  /// \returns The expansion result or SDValue() if it fails.
-  SDValue expandFADD(SDNode *N, SelectionDAG &DAG) const;
-
-  /// Expand floating point multiply
-  /// \param N Node to expand
-  /// \param Result output after conversion
-  /// \returns The expansion result or SDValue() if it fails.
-  SDValue expandFMUL(SDNode *N, SelectionDAG &DAG) const;
-
-  /// Expand floating point subtract
-  /// \param N Node to expand
-  /// \param Result output after conversion
-  /// \returns The expansion result or SDValue() if it fails.
-  SDValue expandFSUB(SDNode *N, SelectionDAG &DAG) const;
-
   /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
   /// vector nodes can only succeed if all operations are legal/custom.
   /// \param N Node to expand
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17551,13 +17551,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
       return N2;
   }
 
-  const bool PreferFMAAdd = (TLI.isOperationLegal(ISD::FMA, VT) &&
-                             !TLI.isOperationLegal(ISD::FADD, VT));
-
   // FIXME: Support splat of constant.
-  if (!PreferFMAAdd && N0CFP && N0CFP->isExactlyValue(1.0))
+  if (N0CFP && N0CFP->isExactlyValue(1.0))
     return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
-  if (!PreferFMAAdd && N1CFP && N1CFP->isExactlyValue(1.0))
+  if (N1CFP && N1CFP->isExactlyValue(1.0))
     return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
 
   // Canonicalize (fma c, x, y) -> (fma x, c, y)
@@ -17589,7 +17586,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
 
   // (fma x, -1, y) -> (fadd (fneg x), y)
   // FIXME: Support splat of constant.
-  if (N1CFP && !PreferFMAAdd) {
+  if (N1CFP) {
     if (N1CFP->isExactlyValue(1.0))
       return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
 
@@ -17599,14 +17596,15 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
       AddToWorklist(RHSNeg.getNode());
       return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
     }
-  }
-  // fma (fneg x), K, y -> fma x -K, y
-  if (N1CFP && matcher.match(N0, ISD::FNEG) &&
-      (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
-       (N1.hasOneUse() &&
-        !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
-    return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
-                           matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
+
+    // fma (fneg x), K, y -> fma x -K, y
+    if (matcher.match(N0, ISD::FNEG) &&
+        (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+         (N1.hasOneUse() &&
+          !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
+      return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+                             matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
+    }
   }
 
   // FIXME: Support splat of constant.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3671,21 +3671,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(ExpandConstant(CP));
     break;
   }
-  case ISD::FADD: {
-    if (SDValue Expand = TLI.expandFADD(Node, DAG)) {
-      Results.push_back(Expand);
-    }
-    break;
-  }
-  case ISD::FMUL: {
-    if (SDValue Expand = TLI.expandFMUL(Node, DAG)) {
-      Results.push_back(Expand);
-    }
-    break;
-  }
   case ISD::FSUB: {
-    if (SDValue Expand = TLI.expandFSUB(Node, DAG)) {
-      Results.push_back(Expand);
+    EVT VT = Node->getValueType(0);
+    if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+        TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+      const SDNodeFlags Flags = Node->getFlags();
+      Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+      Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
+      Results.push_back(Tmp1);
     }
     break;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9068,60 +9068,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
   return Res;
 }
 
-SDValue TargetLowering::expandFADD(SDNode *Node, SelectionDAG &DAG) const {
-  auto VT = Node->getValueType(0);
-  if (!isOperationLegalOrCustom(ISD::FMA, VT)) {
-    return {};
-  }
-
-  // FADD(a, b) -> FMA(a, 1.0, b)
-  SDLoc DL(Node);
-  auto One = DAG.getConstantFP(1.0, DL, VT);
-  SmallVector<SDValue, 3> Operands{Node->getOperand(0), One,
-                                   Node->getOperand(1)};
-  return DAG.getNode(ISD::FMA, DL, VT, Operands, Node->getFlags());
-}
-
-SDValue TargetLowering::expandFMUL(SDNode *Node, SelectionDAG &DAG) const {
-  auto VT = Node->getValueType(0);
-  if (!isOperationLegalOrCustom(ISD::FMA, VT)) {
-    return {};
-  }
-
-  // FMUL(a, b) -> FMA(a, b, -0.0)
-  // NOTE: The identity is -0, not 0, because -0 + 0 == 0 for floats
-  SDLoc DL(Node);
-  auto NegZero = DAG.getConstantFP(-0.0, DL, VT);
-  SmallVector<SDValue, 3> Operands{Node->getOperand(0), Node->getOperand(1),
-                                   NegZero};
-  return DAG.getNode(ISD::FMA, DL, VT, Operands, Node->getFlags());
-}
-
-SDValue TargetLowering::expandFSUB(SDNode *Node, SelectionDAG &DAG) const {
-  SDLoc DL(Node);
-  SDNodeFlags SDFlags = Node->getFlags();
-  auto VT = Node->getValueType(0);
-
-  bool CanUseFMA = isOperationLegalOrCustom(ISD::FMA, VT);
-  bool CanUseAddSub = (isOperationLegalOrCustom(ISD::FADD, VT) &&
-                       isOperationLegalOrCustom(ISD::FNEG, VT));
-  bool PreferAddSub = CanUseAddSub && isFNegFree(VT);
-
-  // FSUB(a, b) -> FMA(b, -1.0, a)
-  if (CanUseFMA && !PreferAddSub) {
-    auto NegOne = DAG.getConstantFP(-1.0, DL, VT);
-    SmallVector<SDValue, 3> Operands{Node->getOperand(1), NegOne,
-                                     Node->getOperand(0)};
-    return DAG.getNode(ISD::FMA, DL, VT, Operands, SDFlags);
-  }
-  // FSUB(a, b) -> FADD(a, FNEG(b))
-  if (CanUseAddSub) {
-    auto Neg = DAG.getNode(ISD::FNEG, DL, VT, Node->getOperand(1));
-    return DAG.getNode(ISD::FADD, DL, VT, Node->getOperand(0), Neg, SDFlags);
-  }
-  return {};
-}
-
 // Only expand vector types if we have the appropriate vector bit operations.
 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
   assert(VT.isVector() && "Expected vector type");
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXISelDAGToDAG.h"
+#include "NVPTX.h"
 #include "NVPTXUtilities.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -190,6 +191,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
       return;
     }
     break;
+  case ISD::FADD:
+  case ISD::FMUL:
+  case ISD::FSUB:
+    if (tryBF16ArithToFMA(N))
+      return;
+    break;
   }
   default:
     break;
@@ -2450,6 +2457,66 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
   return true;
 }
 
+// Select bf16/bf16v2 FADD, FSUB, FMUL as fma on targets with only fma
+bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
+  EVT VT = SDValue(N, 0).getValueType();
+  if (VT.getScalarType() != MVT::bf16)
+    return false;
+
+  const NVPTXSubtarget *STI = TM.getSubtargetImpl();
+  const bool IsNativelySupported =
+      STI->getSmVersion() >= 90 && STI->getPTXVersion() >= 78;
+  if (IsNativelySupported)
+    return false;
+
+  assert(VT == MVT::bf16 || VT == MVT::v2bf16);
+  const bool IsVec = VT == MVT::v2bf16;
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SmallVector<SDValue, 3> Operands;
+  auto GetConstant = [&](float Value) -> SDValue {
+    APFloat APF(Value);
+    bool LosesInfo;
+    APF.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &LosesInfo);
+    assert(!LosesInfo);
+    if (IsVec) {
+      auto API = APF.bitcastToAPInt();
+      API = API.concat(API);
+      auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32);
+      return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32ri, DL, VT, Const), 0);
+    }
+    auto Const = CurDAG->getTargetConstantFP(APF, DL, VT);
+    return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16ri, DL, VT, Const), 0);
+  };
+
+  switch (N->getOpcode()) {
+  case ISD::FADD: {
+    // add(a, b) -> fma(a, 1.0, b)
+    Operands = {N0, GetConstant(1.0), N1};
+    break;
+  }
+  case ISD::FSUB: {
+    // sub(a, b) -> fma(b, -1.0, a)
+    Operands = {N1, GetConstant(-1.0), N0};
+    break;
+  }
+  case ISD::FMUL: {
+    // mul(a, b) -> fma(a, b, -0.0)
+    // NOTE: The identity is -0, not 0, because -0 + 0 == 0 for floats
+    Operands = {N0, N1, GetConstant(-0.0)};
+    break;
+  }
+  default:
+    llvm_unreachable("Unexpected opcode");
+  };
+
+  int Opcode = IsVec ? NVPTX::BFMA16x2rrr : NVPTX::BFMA16rrr;
+  MachineSDNode *FMA = CurDAG->getMachineNode(Opcode, DL, VT, Operands);
+  ReplaceNode(N, FMA);
+  return true;
+}
+
 static inline bool isAddLike(const SDValue V) {
   return V.getOpcode() == ISD::ADD ||
          (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -84,6 +84,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   bool tryFence(SDNode *N);
   void SelectAddrSpaceCast(SDNode *N);
   bool tryBFE(SDNode *N);
+  bool tryBF16ArithToFMA(SDNode *N);
   bool tryConstantFP(SDNode *N);
   bool SelectSETP_F16X2(SDNode *N);
   bool SelectSETP_BF16X2(SDNode *N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2519,8 +2519,8 @@ SDValue NVPTXTargetLowering::LowerFADD(SDValue Op, SelectionDAG &DAG) const {
     return PromoteBinOpToF32(Op.getNode(), DAG);
   }
 
-  // FADD(a, b) -> FMA(a, 1.0, b)
-  return expandFADD(Op.getNode(), DAG);
+  // Legal
+  return Op;
 }
 
 SDValue NVPTXTargetLowering::LowerFSUB(SDValue Op, SelectionDAG &DAG) const {
@@ -2529,8 +2529,8 @@ SDValue NVPTXTargetLowering::LowerFSUB(SDValue Op, SelectionDAG &DAG) const {
     return PromoteBinOpToF32(Op.getNode(), DAG);
   }
 
-  // FSUB(a, b) -> FMA(b, -1.0, a)
-  return expandFSUB(Op.getNode(), DAG);
+  // Legal
+  return Op;
 }
 
 SDValue NVPTXTargetLowering::LowerFMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -2539,8 +2539,8 @@ SDValue NVPTXTargetLowering::LowerFMUL(SDValue Op, SelectionDAG &DAG) const {
     return PromoteBinOpToF32(Op.getNode(), DAG);
   }
 
-  // FMUL(a, b) -> FMA(a, b, -0.0)
-  return expandFMUL(Op.getNode(), DAG);
+  // Legal
+  return Op;
 }
 
 SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -114,9 +114,9 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) {
 ; SM80-EMPTY:
 ; SM80-NEXT:  // %bb.0:
 ; SM80-NEXT:    ld.param.b16 %rs1, [test_fsub_param_0];
-; SM80-NEXT:    ld.param.b16 %rs2, [test_fsub_param_1];
-; SM80-NEXT:    mov.b16 %rs3, 0xBF80;
-; SM80-NEXT:    fma.rn.bf16 %rs4, %rs2, %rs3, %rs1;
+; SM80-NEXT:    mov.b16 %rs2, 0xBF80;
+; SM80-NEXT:    ld.param.b16 %rs3, [test_fsub_param_1];
+; SM80-NEXT:    fma.rn.bf16 %rs4, %rs3, %rs2, %rs1;
 ; SM80-NEXT:    st.param.b16 [func_retval0], %rs4;
 ; SM80-NEXT:    ret;
 ;
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -26,9 +26,9 @@ define <2 x bfloat> @test_fadd_imm_0(<2 x bfloat> %a) #0 {
 ; SM80-EMPTY:
 ; SM80-NEXT:  // %bb.0:
 ; SM80-NEXT:    ld.param.b32 %r1, [test_fadd_imm_0_param_0];
-; SM80-NEXT:    mov.b32 %r2, 1073758080;
-; SM80-NEXT:    mov.b32 %r3, 1065369472;
-; SM80-NEXT:    fma.rn.bf16x2 %r4, %r1, %r3, %r2;
+; SM80-NEXT:    mov.b32 %r2, 1065369472;
+; SM80-NEXT:    mov.b32 %r3, 1073758080;
+; SM80-NEXT:    fma.rn.bf16x2 %r4, %r1, %r2, %r3;
 ; SM80-NEXT:    st.param.b32 [func_retval0], %r4;
 ; SM80-NEXT:    ret;
 ;
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
@@ -361,10 +361,10 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
 ; CHECK-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
 ; CHECK-NEXT:    mov.b16 %rs5, 0x0000;
 ; CHECK-NEXT:    max.bf16 %rs6, %rs4, %rs5;
-; CHECK-NEXT:    mov.b16 %rs7, 0x40E0;
-; CHECK-NEXT:    mov.b16 %rs8, 0x3F80;
-; CHECK-NEXT:    fma.rn.bf16 %rs9, %rs4, %rs8, %rs7;
-; CHECK-NEXT:    fma.rn.bf16 %rs10, %rs6, %rs8, %rs9;
+; CHECK-NEXT:    mov.b16 %rs7, 0x3F80;
+; CHECK-NEXT:    mov.b16 %rs8, 0x40E0;
+; CHECK-NEXT:    fma.rn.bf16 %rs9, %rs4, %rs7, %rs8;
+; CHECK-NEXT:    fma.rn.bf16 %rs10, %rs6, %rs7, %rs9;
 ; CHECK-NEXT:    st.param.b16 [func_retval0], %rs10;
 ; CHECK-NEXT:    ret;
 ;
@@ -957,10 +957,10 @@ define <2 x bfloat> @fma_bf16x2_expanded_no_nans_multiple_uses_of_fma(<2 x bfloa
 ; CHECK-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
 ; CHECK-NEXT:    mov.b32 %r5, 0;
 ; CHECK-NEXT:    max.bf16x2 %r6, %r4, %r5;
-; CHECK-NEXT:    mov.b32 %r7, 1088438496;
-; CHECK-NEXT:    mov.b32 %r8, 1065369472;
-; CHECK-NEXT:    fma.rn.bf16x2 %r9, %r4, %r8, %r7;
-; CHECK-NEXT:    fma.rn.bf16x2 %r10, %r6, %r8, %r9;
+; CHECK-NEXT:    mov.b32 %r7, 1065369472;
+; CHECK-NEXT:    mov.b32 %r8, 1088438496;
+; CHECK-NEXT:    fma.rn.bf16x2 %r9, %r4, %r7, %r8;
+; CHECK-NEXT:    fma.rn.bf16x2 %r10, %r6, %r7, %r9;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r10;
 ; CHECK-NEXT:    ret;
 ;
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll
@@ -228,10 +228,10 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa
 ; CHECK-NEXT:    ld.param.b16 %rs2, [fma_bf16_no_nans_multiple_uses_of_fma_param_1];
 ; CHECK-NEXT:    ld.param.b16 %rs3, [fma_bf16_no_nans_multiple_uses_of_fma_param_2];
 ; CHECK-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs2, %rs3;
-; CHECK-NEXT:    mov.b16 %rs5, 0x40E0;
-; CHECK-NEXT:    mov.b16 %rs6, 0x3F80;
-; CHECK-NEXT:    fma.rn.bf16 %rs7, %rs4, %rs6, %rs5;
-; CHECK-NEXT:    fma.rn.bf16 %rs8, %rs7, %rs6, %rs4;
+; CHECK-NEXT:    mov.b16 %rs5, 0x3F80;
+; CHECK-NEXT:    mov.b16 %rs6, 0x40E0;
+; CHECK-NEXT:    fma.rn.bf16 %rs7, %rs4, %rs5, %rs6;
+; CHECK-NEXT:    fma.rn.bf16 %rs8, %rs7, %rs5, %rs4;
 ; CHECK-NEXT:    st.param.b16 [func_retval0], %rs8;
 ; CHECK-NEXT:    ret;
 ;
@@ -641,10 +641,10 @@ define <2 x bfloat> @fma_bf16x2_no_nans_multiple_uses_of_fma(<2 x bfloat> %a, <2
 ; CHECK-NEXT:    ld.param.b32 %r2, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_1];
 ; CHECK-NEXT:    ld.param.b32 %r3, [fma_bf16x2_no_nans_multiple_uses_of_fma_param_0];
 ; CHECK-NEXT:    fma.rn.bf16x2 %r4, %r3, %r2, %r1;
-; CHECK-NEXT:    mov.b32 %r5, 1088438496;
-; CHECK-NEXT:    mov.b32 %r6, 1065369472;
-; CHECK-NEXT:    fma.rn.bf16x2 %r7, %r4, %r6, %r5;
-; CHECK-NEXT:    fma.rn.bf16x2 %r8, %r7, %r6, %r4;
+; CHECK-NEXT:    mov.b32 %r5, 1065369472;
+; CHECK-NEXT:    mov.b32 %r6, 1088438496;
+; CHECK-NEXT:    fma.rn.bf16x2 %r7, %r4, %r5, %r6;
+; CHECK-NEXT:    fma.rn.bf16x2 %r8, %r7, %r5, %r4;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
 ; CHECK-NEXT:    ret;
 ;
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll b/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll

Original file line number	Diff line number	Diff line change
`@@ -2519,8 +2519,8 @@ SDValue NVPTXTargetLowering::LowerFADD(SDValue Op, SelectionDAG &DAG) const {`
`2519`	`2519`	`return PromoteBinOpToF32(Op.getNode(), DAG);`
`2520`	`2520`	`}`
`2521`	`2521`
`2522`		`- // FADD(a, b) -> FMA(a, 1.0, b)`
`2523`		`- return expandFADD(Op.getNode(), DAG);`
	`2522`	`+ // Legal`
	`2523`	`+ return Op;`
`2524`	`2524`	`}`
`2525`	`2525`
`2526`	`2526`	`SDValue NVPTXTargetLowering::LowerFSUB(SDValue Op, SelectionDAG &DAG) const {`
`@@ -2529,8 +2529,8 @@ SDValue NVPTXTargetLowering::LowerFSUB(SDValue Op, SelectionDAG &DAG) const {`
`2529`	`2529`	`return PromoteBinOpToF32(Op.getNode(), DAG);`
`2530`	`2530`	`}`
`2531`	`2531`
`2532`		`- // FSUB(a, b) -> FMA(b, -1.0, a)`
`2533`		`- return expandFSUB(Op.getNode(), DAG);`
	`2532`	`+ // Legal`
	`2533`	`+ return Op;`
`2534`	`2534`	`}`
`2535`	`2535`
`2536`	`2536`	`SDValue NVPTXTargetLowering::LowerFMUL(SDValue Op, SelectionDAG &DAG) const {`
`@@ -2539,8 +2539,8 @@ SDValue NVPTXTargetLowering::LowerFMUL(SDValue Op, SelectionDAG &DAG) const {`
`2539`	`2539`	`return PromoteBinOpToF32(Op.getNode(), DAG);`
`2540`	`2540`	`}`
`2541`	`2541`
`2542`		`- // FMUL(a, b) -> FMA(a, b, -0.0)`
`2543`		`- return expandFMUL(Op.getNode(), DAG);`
	`2542`	`+ // Legal`
	`2543`	`+ return Op;`
`2544`	`2544`	`}`
`2545`	`2545`
`2546`	`2546`	`SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,`