[SelectionDAG] Share code for two of our multiply expansions. NFC

topperc · topperc · commit 0f0c16114254 · 2025-01-24T00:00:23.000-08:00
ExpandIntRes_MUL and forceExpandWideMul have very similar code.
ExpandIntRes_MUL calculates Lo and Hi half result from the 2 sources
with Hi and Lo halves. forceExpandWideMul calculates the Lo and Hi
half of the full product of 2 values.

The only differences are that forceExpandWideMul uses ISD::SRA
instead of ISD::SRL for a signed wide multiply. ExpandIntRes_MUL
needs 2 additionals multiplies and 2 adds to multiply HiRHS*LHS and
HiLHS*RHS and add them to Hi.

This patch introduces a new function that takes HiLHS and HiRHS as
optional values. If they are not null, they will be used in the
calculation of the Hi half. The Signed flag can only be set when
HiLHS/HiRHS are null.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5499,6 +5499,15 @@ class TargetLowering : public TargetLoweringBase {
   bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
                   SelectionDAG &DAG) const;
 
+  /// Calculate the product twice the width of LHS and RHS. If HiLHS/HiRHS are
+  /// non-null they will be included in the multiplication. The expansion works
+  /// by splitting the 2 inputs into 4 pieces that we can multiply and add
+  /// together without neding MULH or MUL_LOHI.
+  void forceExpandMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
+                      SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS,
+                      SDValue HiLHS = SDValue(),
+                      SDValue HiRHS = SDValue()) const;
+
   /// Calculate full product of LHS and RHS either via a libcall or through
   /// brute force expansion of the multiplication. The expansion works by
   /// splitting the 2 inputs into 4 pieces that we can multiply and add together
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -4294,44 +4294,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
     LC = RTLIB::MUL_I128;
 
   if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
-    // We'll expand the multiplication by brute force because we have no other
-    // options. This is a trivially-generalized version of the code from
-    // Hacker's Delight (itself derived from Knuth's Algorithm M from section
-    // 4.3.1).
-    unsigned Bits = NVT.getSizeInBits();
-    unsigned HalfBits = Bits >> 1;
-    SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
-                                   NVT);
-    SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
-    SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
-
-    SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
-    SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
-
-    SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
-    SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
-    SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
-    SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
-
-    SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
-                            DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
-    SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
-    SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
-
-    SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
-                            DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
-    SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
-
-    SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
-                            DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
-                            DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
-    Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
-                     DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
-
-    Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
-                     DAG.getNode(ISD::ADD, dl, NVT,
-                                 DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
-                                 DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+    TLI.forceExpandMUL(DAG, dl, /*Signed=*/false, Lo, Hi, LL, RL, LH, RH);
     return;
   }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10857,6 +10857,64 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
   return DAG.getSelect(dl, VT, Cond, SatVal, Result);
 }
 
+void TargetLowering::forceExpandMUL(SelectionDAG &DAG, const SDLoc &dl,
+                                    bool Signed, SDValue &Lo, SDValue &Hi,
+                                    SDValue LHS, SDValue RHS, SDValue HiLHS,
+                                    SDValue HiRHS) const {
+  EVT VT = LHS.getValueType();
+  assert(RHS.getValueType() == VT && "Mismatching operand types");
+
+  assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
+  assert((!Signed || !HiLHS) &&
+         "Signed flag should only be set when HiLHS and RiRHS are null");
+
+  // We'll expand the multiplication by brute force because we have no other
+  // options. This is a trivially-generalized version of the code from
+  // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+  // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
+  // sign bits while calculating the Hi half.
+  unsigned Bits = VT.getSizeInBits();
+  unsigned HalfBits = Bits / 2;
+  SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
+  SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
+  SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
+
+  SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
+  SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
+
+  SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
+  // This is always an unsigned shift.
+  SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
+
+  unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
+  SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
+  SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
+
+  SDValue U =
+      DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
+  SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
+  SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
+
+  SDValue V =
+      DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
+  SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
+
+  Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
+                   DAG.getNode(ISD::SHL, dl, VT, V, Shift));
+
+  Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
+                   DAG.getNode(ISD::ADD, dl, VT, UH, VH));
+
+  // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
+  // them to products to Hi.
+  if (HiLHS) {
+    Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
+                     DAG.getNode(ISD::ADD, dl, VT,
+                                 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
+                                 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
+  }
+}
+
 void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
                                         bool Signed, const SDValue LHS,
                                         const SDValue RHS, SDValue &Lo,
@@ -10876,7 +10934,11 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
   else if (WideVT == MVT::i128)
     LC = RTLIB::MUL_I128;
 
-  if (LC != RTLIB::UNKNOWN_LIBCALL && getLibcallName(LC)) {
+  if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
+    forceExpandMUL(DAG, dl, Signed, Lo, Hi, LHS, RHS);
+    return;
+  }
+
     SDValue HiLHS, HiRHS;
     if (Signed) {
       // The high part is obtained by SRA'ing all but one of the bits of low
@@ -10916,44 +10978,6 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
       Lo = Ret.getOperand(1);
       Hi = Ret.getOperand(0);
     }
-    return;
-  }
-
-  // Expand the multiplication by brute force. This is a generalized-version of
-  // the code from Hacker's Delight (itself derived from Knuth's Algorithm M
-  // from section 4.3.1) combined with the Hacker's delight code
-  // for calculating mulhs.
-  unsigned Bits = VT.getSizeInBits();
-  unsigned HalfBits = Bits / 2;
-  SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
-  SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
-  SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
-
-  SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
-  SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
-
-  SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
-  // This is always an unsigned shift.
-  SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
-
-  unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
-  SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
-  SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
-
-  SDValue U =
-      DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
-  SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
-  SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
-
-  SDValue V =
-      DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
-  SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
-
-  Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
-                   DAG.getNode(ISD::SHL, dl, VT, V, Shift));
-
-  Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
-                   DAG.getNode(ISD::ADD, dl, VT, UH, VH));
 }
 
 SDValue