Factor shift reducing combine logic into one function.

chrisjbris · chrisjbris · commit b4f391c05df3 · 2025-07-21T10:52:08.000-05:00
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4069,15 +4069,22 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
 }
 
-SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
-                                                DAGCombinerInfo &DCI) const {
-  EVT VT = N->getValueType(0);
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
-  SDLoc SL(N);
-  SelectionDAG &DAG = DCI.DAG;
-
+// Part of the shift combines is to optimise for the case where its possible
+// to reduce e.g shl64 to shl32 if shift range is [63-32]. This
+// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
+// '&' is then elided by ISel. The vector code for this was being
+// completely scalarised by the vector legalizer, but when v2i32 is
+// legal the vector legaliser only partially scalarises the
+// vector operations and the and is not elided. This function
+// scalarises the AND for this optimisation case.
+static SDValue getShiftForReduction(unsigned ShiftOpc, SDValue LHS, SDValue RHS,
+                                    SelectionDAG &DAG) {
+
+  assert(
+      (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
+      "Expected shift Opcode.");
+
+  SDLoc SL = SDLoc(RHS);
   if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     SDValue VAND = RHS.getOperand(0);
     if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
@@ -4086,15 +4093,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
         SDValue LHSAND = VAND.getOperand(0);
         SDValue RHSAND = VAND.getOperand(1);
         if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
-          // Part of shlcombine is to optimise for the case where its possible
-          // to reduce shl64 to shl32 if shift range is [63-32]. This
-          // transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
-          // '&' is then elided by ISel. The vector code for this was being
-          // completely scalarised by the vector legalizer, but now v2i32 is
-          // made legal the vector legaliser only partially scalarises the
-          // vector operations and the and was not elided. This check enables us
-          // to locate and scalarise the v2i32 and and re-enable ISel to elide
-          // the and instruction.
           ConstantSDNode *CANDL =
               dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
           ConstantSDNode *CANDR =
@@ -4108,19 +4106,33 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
                                      LHSAND, Zero);
             SDValue Hi =
                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
-            SDValue LoAnd =
-                DAG.getNode(ISD::AND, SL, MVT::i32, Lo, RHSAND->getOperand(0));
-            SDValue HiAnd =
-                DAG.getNode(ISD::AND, SL, MVT::i32, Hi, RHSAND->getOperand(0));
+            SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
+            SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
+            SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
             SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
             if (AndIndex == 0 || AndIndex == 1)
-              return DAG.getNode(ISD::SHL, SL, MVT::i32, Trunc,
-                                 AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
+              return DAG.getNode(ShiftOpc, SL, MVT::i32, Trunc,
+                                 AndIndex == 0 ? LoAnd : HiAnd,
+                                 RHS->getFlags());
           }
         }
       }
     }
   }
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+  SDLoc SL(N);
+  SelectionDAG &DAG = DCI.DAG;
+
+  if(SDValue SS = getShiftForReduction(ISD::SHL, LHS, RHS, DAG))
+    return SS;
 
   unsigned RHSVal;
   if (CRHS) {
@@ -4222,48 +4234,8 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
   SDLoc SL(N);
 
-  if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-    SDValue VAND = RHS.getOperand(0);
-    if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
-      uint64_t AndIndex = RHS->getConstantOperandVal(1);
-      if (VAND->getOpcode() == ISD::AND && CRRHS) {
-        SDValue LHSAND = VAND.getOperand(0);
-        SDValue RHSAND = VAND.getOperand(1);
-        if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
-          // Part of sracombine is to optimise for the case where its possible
-          // to reduce shl64 to shl32 if shift range is [63-32]. This
-          // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
-          // '&' is then elided by ISel. The vector code for this was being
-          // completely scalarised by the vector legalizer, but now v2i32 is
-          // made legal the vector legaliser only partially scalarises the
-          // vector operations and the and was not elided. This check enables us
-          // to locate and scalarise the v2i32 and and re-enable ISel to elide
-          // the and instruction.
-          ConstantSDNode *CANDL =
-              dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
-          ConstantSDNode *CANDR =
-              dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
-          if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
-              RHSAND->getConstantOperandVal(1) == 0x1f) {
-            // Get the non-const AND operands and produce scalar AND
-            const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
-            const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-            SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
-                                     LHSAND, Zero);
-            SDValue Hi =
-                DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
-            SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
-            SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
-            SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
-            SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
-            if (AndIndex == 0 || AndIndex == 1)
-              return DAG.getNode(ISD::SRA, SL, MVT::i32, Trunc,
-                                 AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
-          }
-        }
-      }
-    }
-  }
+  if(SDValue SS = getShiftForReduction(ISD::SRA, LHS, RHS, DAG))
+    return SS;
 
   if (VT.getScalarType() != MVT::i64)
     return SDValue();
@@ -4357,52 +4329,6 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
   return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
 }
 
-// static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
-//   SDLoc SL = SDLoc(RHS);
-//   if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-//     SDValue VAND = RHS.getOperand(0);
-//     if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
-//       uint64_t AndIndex = RHS->getConstantOperandVal(1);
-//       if (VAND->getOpcode() == ISD::AND && CRRHS) {
-//         SDValue LHSAND = VAND.getOperand(0);
-//         SDValue RHSAND = VAND.getOperand(1);
-//         if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
-//           // Part of srlcombine is to optimise for the case where its possible
-//           // to reduce shl64 to shl32 if shift range is [63-32]. This
-//           // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
-//           // '&' is then elided by ISel. The vector code for this was being
-//           // completely scalarised by the vector legalizer, but now v2i32 is
-//           // made legal the vector legaliser only partially scalarises the
-//           // vector operations and the and was not elided. This check enables us
-//           // to locate and scalarise the v2i32 and and re-enable ISel to elide
-//           // the and instruction.
-//           ConstantSDNode *CANDL =
-//               dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
-//           ConstantSDNode *CANDR =
-//               dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
-//           if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
-//               RHSAND->getConstantOperandVal(1) == 0x1f) {
-//             // Get the non-const AND operands and produce scalar AND
-//             const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
-//             const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-//             SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
-//                                      LHSAND, Zero);
-//             SDValue Hi =
-//                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
-//             SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
-//             SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
-//             SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
-//             SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
-//             if (AndIndex == 0 || AndIndex == 1)
-//               return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
-//                                  AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
-//           }
-//         }
-//       }
-//     }
-//   }
-//   return SDValue();
-// }
 
 SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
@@ -4414,49 +4340,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
   SDLoc SL(N);
   unsigned RHSVal;
 
-  if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-    SDValue VAND = RHS.getOperand(0);
-    if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
-      uint64_t AndIndex = RHS->getConstantOperandVal(1);
-      if (VAND->getOpcode() == ISD::AND && CRRHS) {
-        SDValue LHSAND = VAND.getOperand(0);
-        SDValue RHSAND = VAND.getOperand(1);
-        if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
-          // Part of srlcombine is to optimise for the case where its possible
-          // to reduce shl64 to shl32 if shift range is [63-32]. This
-          // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
-          // '&' is then elided by ISel. The vector code for this was being
-          // completely scalarised by the vector legalizer, but now v2i32 is
-          // made legal the vector legaliser only partially scalarises the
-          // vector operations and the and was not elided. This check enables us
-          // to locate and scalarise the v2i32 and and re-enable ISel to elide
-          // the and instruction.
-          ConstantSDNode *CANDL =
-              dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
-          ConstantSDNode *CANDR =
-              dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
-          if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
-              RHSAND->getConstantOperandVal(1) == 0x1f) {
-            // Get the non-const AND operands and produce scalar AND
-            const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
-            const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-            SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
-                                     LHSAND, Zero);
-            SDValue Hi =
-                DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
-            SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
-            SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
-            SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
-            SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
-            if (AndIndex == 0 || AndIndex == 1)
-              return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
-                                 AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
-          }
-        }
-      }
-    }
-  }
-
+  if(SDValue SS = getShiftForReduction(ISD::SRL, LHS, RHS, DAG))
+    return SS;
 
   if (CRHS) {
     RHSVal = CRHS->getZExtValue();