llvm
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Lines changed: 39 additions & 84 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Lines changed: 39 additions & 84 deletions
@@ -726,18 +726,6 @@ static bool selectSupportsSourceMods(const SDNode *N) {
   return N->getValueType(0) == MVT::f32;
 }
 
-LLVM_READONLY
-static bool buildVectorSupportsSourceMods(const SDNode *N) {
-  if (N->getValueType(0) != MVT::v2f32)
-    return true;
-
-  if (N->getOperand(0)->getOpcode() != ISD::SELECT ||
-      N->getOperand(1)->getOpcode() != ISD::SELECT)
-    return true;
-
-  return false;
-}
-
 // Most FP instructions support source modifiers, but this could be refined
 // slightly.
 LLVM_READONLY
@@ -771,8 +759,6 @@ static bool hasSourceMods(const SDNode *N) {
       return true;
     }
   }
-  case ISD::BUILD_VECTOR:
-    return buildVectorSupportsSourceMods(N);
   case ISD::SELECT:
     return selectSupportsSourceMods(N);
   default:
@@ -4069,15 +4055,21 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
 }
 
-SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
-                                                DAGCombinerInfo &DCI) const {
-  EVT VT = N->getValueType(0);
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
-  SDLoc SL(N);
-  SelectionDAG &DAG = DCI.DAG;
+// Part of the shift combines is to optimise for the case where its possible
+// to reduce e.g shl64 to shl32 if shift range is [63-32]. This
+// transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
+// '&' is then elided by ISel. The vector code for this was being
+// completely scalarised by the vector legalizer, but when v2i32 is
+// legal the vector legaliser only partially scalarises the
+// vector operations and the and is not elided. This function
+// scalarises the AND for this optimisation case.
+static SDValue getShiftForReduction(unsigned ShiftOpc, SDValue LHS, SDValue RHS,
+                                    SelectionDAG &DAG) {
+  assert(
+      (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
+      "Expected shift Opcode.");
 
+  SDLoc SL = SDLoc(RHS);
   if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     SDValue VAND = RHS.getOperand(0);
     if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
@@ -4086,15 +4078,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
         SDValue LHSAND = VAND.getOperand(0);
         SDValue RHSAND = VAND.getOperand(1);
         if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
-          // Part of shlcombine is to optimise for the case where its possible
-          // to reduce shl64 to shl32 if shift range is [63-32]. This
-          // transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
-          // '&' is then elided by ISel. The vector code for this was being
-          // completely scalarised by the vector legalizer, but now v2i32 is
-          // made legal the vector legaliser only partially scalarises the
-          // vector operations and the and was not elided. This check enables us
-          // to locate and scalarise the v2i32 and and re-enable ISel to elide
-          // the and instruction.
           ConstantSDNode *CANDL =
               dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
           ConstantSDNode *CANDR =
@@ -4108,19 +4091,33 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
                                      LHSAND, Zero);
             SDValue Hi =
                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
-            SDValue LoAnd =
-                DAG.getNode(ISD::AND, SL, MVT::i32, Lo, RHSAND->getOperand(0));
-            SDValue HiAnd =
-                DAG.getNode(ISD::AND, SL, MVT::i32, Hi, RHSAND->getOperand(0));
+            SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
+            SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
+            SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
             SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
             if (AndIndex == 0 || AndIndex == 1)
-              return DAG.getNode(ISD::SHL, SL, MVT::i32, Trunc,
-                                 AndIndex == 0 ? LoAnd : HiAnd, N->getFlags());
+              return DAG.getNode(ShiftOpc, SL, MVT::i32, Trunc,
+                                 AndIndex == 0 ? LoAnd : HiAnd,
+                                 RHS->getFlags());
           }
         }
       }
     }
   }
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+  SDLoc SL(N);
+  SelectionDAG &DAG = DCI.DAG;
+
+  if (SDValue SS = getShiftForReduction(ISD::SHL, LHS, RHS, DAG))
+    return SS;
 
   unsigned RHSVal;
   if (CRHS) {
@@ -4222,6 +4219,9 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
   SDLoc SL(N);
 
+  if (SDValue SS = getShiftForReduction(ISD::SRA, LHS, RHS, DAG))
+    return SS;
+
   if (VT.getScalarType() != MVT::i64)
     return SDValue();
 
@@ -4314,52 +4314,6 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
   return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
 }
 
-static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
-  SDLoc SL = SDLoc(RHS);
-  if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-    SDValue VAND = RHS.getOperand(0);
-    if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
-      uint64_t AndIndex = RHS->getConstantOperandVal(1);
-      if (VAND->getOpcode() == ISD::AND && CRRHS) {
-        SDValue LHSAND = VAND.getOperand(0);
-        SDValue RHSAND = VAND.getOperand(1);
-        if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
-          // Part of srlcombine is to optimise for the case where its possible
-          // to reduce shl64 to shl32 if shift range is [63-32]. This
-          // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
-          // '&' is then elided by ISel. The vector code for this was being
-          // completely scalarised by the vector legalizer, but now v2i32 is
-          // made legal the vector legaliser only partially scalarises the
-          // vector operations and the and was not elided. This check enables us
-          // to locate and scalarise the v2i32 and and re-enable ISel to elide
-          // the and instruction.
-          ConstantSDNode *CANDL =
-              dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
-          ConstantSDNode *CANDR =
-              dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
-          if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
-              RHSAND->getConstantOperandVal(1) == 0x1f) {
-            // Get the non-const AND operands and produce scalar AND
-            const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
-            const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-            SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
-                                     LHSAND, Zero);
-            SDValue Hi =
-                DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
-            SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
-            SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
-            SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
-            SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
-            if (AndIndex == 0 || AndIndex == 1)
-              return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
-                                 AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
-          }
-        }
-      }
-    }
-  }
-  return SDValue();
-}
 
 SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
@@ -4371,7 +4325,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
   SDLoc SL(N);
   unsigned RHSVal;
 
-
+  if (SDValue SS = getShiftForReduction(ISD::SRL, LHS, RHS, DAG))
+    return SS;
 
   if (CRHS) {
     RHSVal = CRHS->getZExtValue();