[RISCV] Expand Zfa fli+fneg cases during lowering instead of during isel. (#108316)

topperc · web-flow · commit de6d7a6c3093 · 2024-09-11T22:31:45.000-07:00
Most of the constants fli can generate are positive numbers. We can use fli+fneg to generate their negative versions. Previously, we considered such negative constants as "legal" and let isel generate the fli+fneg. However, it is useful to expose the fneg to DAG combines to fold with fadd to produce fsub or with fma to produce fnmadd, fnmsub, or fmsub. This patch moves the fneg creation to lowering so that the fneg will be visible to the last DAG combine. I might move the rest of Zfa handling from isel to lowering as a follow up. Fixes #107772.
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -889,33 +889,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
   }
   case ISD::ConstantFP: {
     const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
-    auto [FPImm, NeedsFNeg] =
-        static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
-                                                                        VT);
+    int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
+        APF, VT);
     if (FPImm >= 0) {
       unsigned Opc;
-      unsigned FNegOpc;
       switch (VT.SimpleTy) {
       default:
         llvm_unreachable("Unexpected size");
       case MVT::f16:
         Opc = RISCV::FLI_H;
-        FNegOpc = RISCV::FSGNJN_H;
         break;
       case MVT::f32:
         Opc = RISCV::FLI_S;
-        FNegOpc = RISCV::FSGNJN_S;
         break;
       case MVT::f64:
         Opc = RISCV::FLI_D;
-        FNegOpc = RISCV::FSGNJN_D;
         break;
       }
       SDNode *Res = CurDAG->getMachineNode(
           Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
-      if (NeedsFNeg)
-        Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
-                                     SDValue(Res, 0));
 
       ReplaceNode(Node, Res);
       return;
@@ -3563,9 +3555,8 @@ bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
   // Even if this FPImm requires an additional FNEG (i.e. the second element of
   // the returned pair is true) we still prefer FLI + FNEG over immediate
   // materialization as the latter might generate a longer instruction sequence.
-  if (static_cast<const RISCVTargetLowering *>(TLI)
-          ->getLegalZfaFPImm(APF, VT)
-          .first >= 0)
+  if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
+                                                                      VT) >= 0)
     return false;
 
   MVT XLenVT = Subtarget->getXLenVT();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -469,6 +469,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
                          Subtarget.hasStdExtZfa() ? Legal : Custom);
+      if (Subtarget.hasStdExtZfa())
+        setOperationAction(ISD::ConstantFP, MVT::f16, Custom);
     } else {
       setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
@@ -533,6 +535,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
 
     if (Subtarget.hasStdExtZfa()) {
+      setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
       setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
     } else {
@@ -550,6 +553,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
 
     if (Subtarget.hasStdExtZfa()) {
+      setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
       setOperationAction(FPRndMode, MVT::f64, Legal);
       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
@@ -2238,17 +2242,11 @@ bool RISCVTargetLowering::isOffsetFoldingLegal(
   return false;
 }
 
-// Return one of the followings:
-// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
-// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
-// positive counterpart, which will be materialized from the first returned
-// element. The second returned element indicated that there should be a FNEG
-// followed.
-// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
-std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
-                                                           EVT VT) const {
+// Returns 0-31 if the fli instruction is available for the type and this is
+// legal FP immediate for the type. Returns -1 otherwise.
+int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
   if (!Subtarget.hasStdExtZfa())
-    return std::make_pair(-1, false);
+    return -1;
 
   bool IsSupportedVT = false;
   if (VT == MVT::f16) {
@@ -2261,14 +2259,9 @@ std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
   }
 
   if (!IsSupportedVT)
-    return std::make_pair(-1, false);
+    return -1;
 
-  int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
-  if (Index < 0 && Imm.isNegative())
-    // Try the combination of its positive counterpart + FNEG.
-    return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
-  else
-    return std::make_pair(Index, false);
+  return RISCVLoadFPImm::getLoadFPImm(Imm);
 }
 
 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
@@ -2286,7 +2279,7 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
   if (!IsLegalVT)
     return false;
 
-  if (getLegalZfaFPImm(Imm, VT).first >= 0)
+  if (getLegalZfaFPImm(Imm, VT) >= 0)
     return true;
 
   // Cannot create a 64 bit floating-point immediate value for rv32.
@@ -5816,6 +5809,29 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
   return SDValue();
 }
 
+SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+  const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
+
+  if (getLegalZfaFPImm(Imm, VT) >= 0)
+    return Op;
+
+  if (!Imm.isNegative())
+    return SDValue();
+
+  int Index = getLegalZfaFPImm(-Imm, VT);
+  if (Index < 0)
+    return SDValue();
+
+  // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
+  SDLoc DL(Op);
+  SDValue Const =
+      DAG.getNode(RISCVISD::FLI, Op, VT,
+                  DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
+  return DAG.getNode(ISD::FNEG, Op, VT, Const);
+}
+
 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
                                  const RISCVSubtarget &Subtarget) {
   SDLoc dl(Op);
@@ -6435,6 +6451,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerGlobalTLSAddress(Op, DAG);
   case ISD::Constant:
     return lowerConstant(Op, DAG, Subtarget);
+  case ISD::ConstantFP:
+    return lowerConstantFP(Op, DAG);
   case ISD::SELECT:
     return lowerSELECT(Op, DAG);
   case ISD::BRCOND:
@@ -19978,6 +19996,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FSGNJX)
   NODE_NAME_CASE(FMAX)
   NODE_NAME_CASE(FMIN)
+  NODE_NAME_CASE(FLI)
   NODE_NAME_CASE(READ_COUNTER_WIDE)
   NODE_NAME_CASE(BREV8)
   NODE_NAME_CASE(ORC_B)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -130,6 +130,9 @@ enum NodeType : unsigned {
   // Floating point fmax and fmin matching the RISC-V instruction semantics.
   FMAX, FMIN,
 
+  // Zfa fli instruction for constant materialization.
+  FLI,
+
   // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)).
   // It takes a chain operand and another two target constant operands (the
   // CSR numbers of the low and high parts of the counter).
@@ -524,7 +527,7 @@ class RISCVTargetLowering : public TargetLowering {
                           SmallVectorImpl<Use *> &Ops) const override;
   bool shouldScalarizeBinop(SDValue VecOp) const override;
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-  std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
+  int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
@@ -914,6 +917,7 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
   SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
 
+  SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -11,6 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_RISCVFLI
+    : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, XLenVT>]>;
+
+def riscv_fli : SDNode<"RISCVISD::FLI", SDT_RISCVFLI>;
+
 //===----------------------------------------------------------------------===//
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
@@ -189,6 +198,8 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt",
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtZfa] in {
+def: Pat<(f32 (riscv_fli timm:$imm)), (FLI_S timm:$imm)>;
+
 def: PatFprFpr<fminimum, FMINM_S, FPR32, f32>;
 def: PatFprFpr<fmaximum, FMAXM_S, FPR32, f32>;
 
@@ -211,6 +222,8 @@ def: PatSetCC<FPR32, strict_fsetcc, SETOLE, FLEQ_S, f32>;
 } // Predicates = [HasStdExtZfa]
 
 let Predicates = [HasStdExtZfa, HasStdExtD] in {
+def: Pat<(f64 (riscv_fli timm:$imm)), (FLI_D timm:$imm)>;
+
 def: PatFprFpr<fminimum, FMINM_D, FPR64, f64>;
 def: PatFprFpr<fmaximum, FMAXM_D, FPR64, f64>;
 
@@ -239,6 +252,8 @@ def : Pat<(RISCVBuildPairF64 GPR:$rs1, GPR:$rs2),
 }
 
 let Predicates = [HasStdExtZfa, HasStdExtZfh] in {
+def: Pat<(f16 (riscv_fli timm:$imm)), (FLI_H timm:$imm)>;
+
 def: PatFprFpr<fminimum, FMINM_H, FPR16, f16>;
 def: PatFprFpr<fmaximum, FMAXM_H, FPR16, f16>;
 
diff --git a/llvm/test/CodeGen/RISCV/double-zfa.ll b/llvm/test/CodeGen/RISCV/double-zfa.ll
@@ -330,3 +330,44 @@ define double @fmvp_d_x(i64 %a) {
   %or = bitcast i64 %a to double
   ret double %or
 }
+
+define double @fadd_neg_0p5(double %x) {
+; CHECK-LABEL: fadd_neg_0p5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.d fa5, 0.5
+; CHECK-NEXT:    fsub.d fa0, fa0, fa5
+; CHECK-NEXT:    ret
+  %a = fadd double %x, -0.5
+  ret double %a
+}
+
+define double @fma_neg_addend(double %x, double %y) nounwind {
+; CHECK-LABEL: fma_neg_addend:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.d fa5, 0.5
+; CHECK-NEXT:    fmsub.d fa0, fa0, fa1, fa5
+; CHECK-NEXT:    ret
+  %a = call double @llvm.fma.f32(double %x, double %y, double -0.5)
+  ret double %a
+}
+
+define double @fma_neg_multiplicand(double %x, double %y) nounwind {
+; CHECK-LABEL: fma_neg_multiplicand:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.d fa5, 0.125
+; CHECK-NEXT:    fnmsub.d fa0, fa5, fa0, fa1
+; CHECK-NEXT:    ret
+  %a = call double @llvm.fma.f32(double %x, double -0.125, double %y)
+  ret double %a
+}
+
+define double @fma_neg_addend_multiplicand(double %x) nounwind {
+; CHECK-LABEL: fma_neg_addend_multiplicand:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.d fa5, 0.25
+; CHECK-NEXT:    fli.d fa4, 0.5
+; CHECK-NEXT:    fnmadd.d fa0, fa4, fa0, fa5
+; CHECK-NEXT:    ret
+  %a = call double @llvm.fma.f32(double %x, double -0.5, double -0.25)
+  ret double %a
+}
diff --git a/llvm/test/CodeGen/RISCV/float-zfa.ll b/llvm/test/CodeGen/RISCV/float-zfa.ll
@@ -269,3 +269,44 @@ define void @fli_remat() {
   tail call void @foo(float 1.000000e+00, float 1.000000e+00)
   ret void
 }
+
+define float @fadd_neg_0p5(float %x) {
+; CHECK-LABEL: fadd_neg_0p5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.s fa5, 0.5
+; CHECK-NEXT:    fsub.s fa0, fa0, fa5
+; CHECK-NEXT:    ret
+  %a = fadd float %x, -0.5
+  ret float %a
+}
+
+define float @fma_neg_addend(float %x, float %y) nounwind {
+; CHECK-LABEL: fma_neg_addend:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.s fa5, 0.5
+; CHECK-NEXT:    fmsub.s fa0, fa0, fa1, fa5
+; CHECK-NEXT:    ret
+  %a = call float @llvm.fma.f32(float %x, float %y, float -0.5)
+  ret float %a
+}
+
+define float @fma_neg_multiplicand(float %x, float %y) nounwind {
+; CHECK-LABEL: fma_neg_multiplicand:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.s fa5, 0.125
+; CHECK-NEXT:    fnmsub.s fa0, fa5, fa0, fa1
+; CHECK-NEXT:    ret
+  %a = call float @llvm.fma.f32(float %x, float -0.125, float %y)
+  ret float %a
+}
+
+define float @fma_neg_addend_multiplicand(float %x) nounwind {
+; CHECK-LABEL: fma_neg_addend_multiplicand:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fli.s fa5, 0.25
+; CHECK-NEXT:    fli.s fa4, 0.5
+; CHECK-NEXT:    fnmadd.s fa0, fa4, fa0, fa5
+; CHECK-NEXT:    ret
+  %a = call float @llvm.fma.f32(float %x, float -0.5, float -0.25)
+  ret float %a
+}
diff --git a/llvm/test/CodeGen/RISCV/half-zfa.ll b/llvm/test/CodeGen/RISCV/half-zfa.ll