llvm
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Lines changed: 31 additions & 5 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Lines changed: 31 additions & 5 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 162 additions & 134 deletions b/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 162 additions & 134 deletions
@@ -3059,36 +3059,62 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
     Src = Src.getOperand(0);
   }
 
+  // v2i32 xor/or/and are legal. A vselect using these instructions as operands
+  // is scalarised into two selects with EXTRACT_VECTOR_ELT operands. Peek
+  // through this extract if possible.
+  auto getVectorBitWiseOp = [](SDValue S) -> SDValue {
+    if (S->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+      SDValue VecOp = S->getOperand(0);
+      if (VecOp.getOpcode() == ISD::XOR || VecOp.getOpcode() == ISD::AND ||
+          VecOp.getOpcode() == ISD::OR)
+        return VecOp;
+    }
+    return SDValue();
+  };
+
+  SDValue Vec = getVectorBitWiseOp(Src);
+  SDValue BWSrc = Vec ? Vec : Src;
   // Convert various sign-bit masks to src mods. Currently disabled for 16-bit
   // types as the codegen replaces the operand without adding a srcmod.
   // This is intentionally finding the cases where we are performing float neg
   // and abs on int types, the goal is not to obtain two's complement neg or
   // abs.
   // TODO: Add 16-bit support.
-  unsigned Opc = Src->getOpcode();
+  unsigned Opc = Vec ? Vec->getOpcode() : Src->getOpcode();
   EVT VT = Src.getValueType();
   if ((Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR) ||
       (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
     return true;
 
-  ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1));
+  ConstantSDNode *CRHS =
+      isConstOrConstSplat(Vec ? Vec->getOperand(1) : Src->getOperand(1));
   if (!CRHS)
     return true;
 
+  auto ReplaceSrc = [&]() -> SDValue {
+    if (Vec) {
+      SDValue LHS = BWSrc->getOperand(0);
+      SDValue Index = Src->getOperand(1);
+      return Src = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Src),
+                                   Src.getValueType(), LHS, Index);
+    }
+    return Src = BWSrc.getOperand(0);
+  };
+
   // Recognise (xor a, 0x80000000) as NEG SrcMod.
   // Recognise (and a, 0x7fffffff) as ABS SrcMod.
   // Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
   if (Opc == ISD::XOR && CRHS->getAPIntValue().isSignMask()) {
     Mods |= SISrcMods::NEG;
-    Src = Src.getOperand(0);
+    Src = ReplaceSrc();
   } else if (Opc == ISD::AND && AllowAbs &&
              CRHS->getAPIntValue().isMaxSignedValue()) {
     Mods |= SISrcMods::ABS;
-    Src = Src.getOperand(0);
+    Src = ReplaceSrc();
   } else if (Opc == ISD::OR && AllowAbs && CRHS->getAPIntValue().isSignMask()) {
     Mods |= SISrcMods::ABS;
     Mods |= SISrcMods::NEG;
-    Src = Src.getOperand(0);
+    Src = ReplaceSrc();
   }
 
   return true;
 
@@ -13361,43 +13361,31 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
-  // Fold the fneg of a vselect into the v2 vselect operands.
-  // xor (vselect c, a, b), 0x80000000 ->
-  //   bitcast (vselect c, (fneg (bitcast a)), (fneg (bitcast b)))
-  if (VT == MVT::v2i32 && LHS.getNumOperands() > 1) {
-
-    const ConstantSDNode *CRHS0 = dyn_cast<ConstantSDNode>(RHS.getOperand(0));
-    const ConstantSDNode *CRHS1 = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
-    SDValue LHS_0 = LHS.getOperand(0);
-    SDValue LHS_1 = LHS.getOperand(1);
-
-    if (LHS.getOpcode() == ISD::VSELECT && CRHS0 &&
-        CRHS0->getAPIntValue().isSignMask() &&
-        shouldFoldFNegIntoSrc(N, LHS_0) && CRHS1 &&
-        CRHS1->getAPIntValue().isSignMask() &&
-        shouldFoldFNegIntoSrc(N, LHS_1)) {
-
-      SDLoc DL(N);
-      SDValue CastLHS =
-          DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(1));
-      SDValue CastRHS =
-          DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(2));
-      SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastLHS);
-      SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastRHS);
-      SDValue NewSelect = DAG.getNode(ISD::VSELECT, DL, MVT::v2f32,
-                                      LHS->getOperand(0), FNegLHS, FNegRHS);
-      return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect);
-    }
-  }
-
-  const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+  const ConstantSDNode *CRHS = isConstOrConstSplat(RHS);
 
   if (CRHS && VT == MVT::i64) {
     if (SDValue Split =
             splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
       return Split;
   }
 
+  // v2i32 (xor (vselect cc, x, y), K) ->
+  // (v2i32 svelect cc, (xor x, K), (xor y, K)) This enables the xor to be
+  // replaced with source modifiers when the select is lowered to CNDMASK.
+  // TODO REMOVE: prevents regressions in fneg-modifier-casting.ll
+  unsigned Opc = LHS.getOpcode();
+  if(((Opc == ISD::VSELECT && VT==MVT::v2i32) || (Opc == ISD::SELECT && VT==MVT::i64)) && CRHS && CRHS->getAPIntValue().isSignMask()) {
+    SDValue CC = LHS->getOperand(0);
+    SDValue TRUE = LHS->getOperand(1);
+    SDValue FALSE = LHS->getOperand(2);
+    SDValue XTrue = DAG.getNode(ISD::XOR, SDLoc(N), VT, TRUE, RHS);
+    SDValue XFalse = DAG.getNode(ISD::XOR, SDLoc(N), VT, FALSE, RHS);
+    SDValue XSelect = DAG.getNode(ISD::VSELECT, SDLoc(N), VT, CC, XTrue, XFalse);
+    return XSelect;
+  }
+  
+
+
   // Make sure to apply the 64-bit constant splitting fold before trying to fold
   // fneg-like xors into 64-bit select.
   if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
@@ -14362,125 +14350,165 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
       EltSize, NumElem, Idx->isDivergent(), getSubtarget());
 }
 
-SDValue
-SITargetLowering::performExtractVectorEltCombine(SDNode *N,
-                                                 DAGCombinerInfo &DCI) const {
-  SDValue Vec = N->getOperand(0);
-  SelectionDAG &DAG = DCI.DAG;
+// SDValue
+// SITargetLowering::performBuildVectorCombine(SDNode *N,
+//                                             DAGCombinerInfo &DCI) const {
+//   // if (N->use_empty())
+//   //   return SDValue();
 
-  EVT VecVT = Vec.getValueType();
-  EVT VecEltVT = VecVT.getVectorElementType();
-  EVT ResVT = N->getValueType(0);
+//   // if(!N->getValueType(0).isFloatingPoint())
+//   //    return SDValue();
 
-  unsigned VecSize = VecVT.getSizeInBits();
-  unsigned VecEltSize = VecEltVT.getSizeInBits();
+//   //    SelectionDAG &DAG = DCI.DAG;
 
-  if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
-      allUsesHaveSourceMods(N)) {
-    SDLoc SL(N);
-    SDValue Idx = N->getOperand(1);
-    SDValue Elt =
-        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx);
-    return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
-  }
-
-  // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
-  //    =>
-  // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
-  // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
-  // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
-  if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
-    SDLoc SL(N);
-    SDValue Idx = N->getOperand(1);
-    unsigned Opc = Vec.getOpcode();
+//   // // Iterate the operands. Check if source modifier. If so, propogate the
+//   // source
+//   // // modifier to the user and the srcmod from the BUILD_VECTOR element.
+//   // for (unsigned I = 0; I < N->getNumOperands(); I++) {
+//   //   SDValue E = N->getOperand(I);
+//   //   if (E->getOpcode() != ISD::FNEG && E->getOpcode() != ISD::ABS)
+//   //     continue;
 
-    switch (Opc) {
-    default:
-      break;
-      // TODO: Support other binary operations.
-    case ISD::FADD:
-    case ISD::FSUB:
-    case ISD::FMUL:
-    case ISD::ADD:
-    case ISD::UMIN:
-    case ISD::UMAX:
-    case ISD::SMIN:
-    case ISD::SMAX:
-    case ISD::FMAXNUM:
-    case ISD::FMINNUM:
-    case ISD::FMAXNUM_IEEE:
-    case ISD::FMINNUM_IEEE:
-    case ISD::FMAXIMUM:
-    case ISD::FMINIMUM: {
-      SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
-                                 Vec.getOperand(0), Idx);
-      SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
-                                 Vec.getOperand(1), Idx);
-
-      DCI.AddToWorklist(Elt0.getNode());
-      DCI.AddToWorklist(Elt1.getNode());
-      return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
-    }
-    }
-  }
-
-  // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
-  if (shouldExpandVectorDynExt(N)) {
-    SDLoc SL(N);
-    SDValue Idx = N->getOperand(1);
-    SDValue V;
-    for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
-      SDValue IC = DAG.getVectorIdxConstant(I, SL);
-      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
-      if (I == 0)
-        V = Elt;
-      else
-        V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
+//   //   // Users through which we can propogate will include users of
+//   //   // extract_element on this vector, so need to peek-through.
+//   // }
+
+//   // SmallVector<SDNode*, 4> UsersToModify;
+
+//   // // If the use of the BUILD_VECTOR supports source mods it can be
+//   // propogated. for (SDNode *U : N->users()) {
+//   //   if(!U->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+//   //   if (!allUsesHaveSourceMods(U))
+//   //     continue;
+//   //   UsersToModify.push_back(U);
+//   // }
+
+//   //   for(auto Node: UsersToModify) {
+
+//   //   }
+
+//   return SDValue();
+// }
+
+  SDValue SITargetLowering::performExtractVectorEltCombine(
+      SDNode * N, DAGCombinerInfo & DCI) const {
+    SDValue Vec = N->getOperand(0);
+    SelectionDAG &DAG = DCI.DAG;
+
+    EVT VecVT = Vec.getValueType();
+    EVT VecEltVT = VecVT.getVectorElementType();
+    EVT ResVT = N->getValueType(0);
+
+    unsigned VecSize = VecVT.getSizeInBits();
+    unsigned VecEltSize = VecEltVT.getSizeInBits();
+
+    if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
+        allUsesHaveSourceMods(N)) {
+      SDLoc SL(N);
+      SDValue Idx = N->getOperand(1);
+      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
+                                Vec.getOperand(0), Idx);
+      return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
+    }
+
+    // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
+    //    =>
+    // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
+    // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
+    // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
+    if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
+      SDLoc SL(N);
+      SDValue Idx = N->getOperand(1);
+      unsigned Opc = Vec.getOpcode();
+
+      switch (Opc) {
+      default:
+        break;
+        // TODO: Support other binary operations.
+      case ISD::FADD:
+      case ISD::FSUB:
+      case ISD::FMUL:
+      case ISD::ADD:
+      case ISD::UMIN:
+      case ISD::UMAX:
+      case ISD::SMIN:
+      case ISD::SMAX:
+      case ISD::FMAXNUM:
+      case ISD::FMINNUM:
+      case ISD::FMAXNUM_IEEE:
+      case ISD::FMINNUM_IEEE:
+      case ISD::FMAXIMUM:
+      case ISD::FMINIMUM: {
+        SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
+                                   Vec.getOperand(0), Idx);
+        SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
+                                   Vec.getOperand(1), Idx);
+
+        DCI.AddToWorklist(Elt0.getNode());
+        DCI.AddToWorklist(Elt1.getNode());
+        return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
+      }
+      }
     }
-    return V;
-  }
 
-  if (!DCI.isBeforeLegalize())
-    return SDValue();
+    // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
+    if (shouldExpandVectorDynExt(N)) {
+      SDLoc SL(N);
+      SDValue Idx = N->getOperand(1);
+      SDValue V;
+      for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+        SDValue IC = DAG.getVectorIdxConstant(I, SL);
+        SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
+        if (I == 0)
+          V = Elt;
+        else
+          V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
+      }
+      return V;
+    }
 
-  // Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
-  // elements. This exposes more load reduction opportunities by replacing
-  // multiple small extract_vector_elements with a single 32-bit extract.
-  auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
-      VecSize > 32 && VecSize % 32 == 0 && Idx) {
-    EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
-
-    unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
-    unsigned EltIdx = BitIndex / 32;
-    unsigned LeftoverBitIdx = BitIndex % 32;
-    SDLoc SL(N);
+    if (!DCI.isBeforeLegalize())
+      return SDValue();
 
-    SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
-    DCI.AddToWorklist(Cast.getNode());
+    // Try to turn sub-dword accesses of vectors into accesses of the same
+    // 32-bit elements. This exposes more load reduction opportunities by
+    // replacing multiple small extract_vector_elements with a single 32-bit
+    // extract.
+    auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
+        VecSize > 32 && VecSize % 32 == 0 && Idx) {
+      EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
+
+      unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
+      unsigned EltIdx = BitIndex / 32;
+      unsigned LeftoverBitIdx = BitIndex % 32;
+      SDLoc SL(N);
 
-    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
-                              DAG.getConstant(EltIdx, SL, MVT::i32));
-    DCI.AddToWorklist(Elt.getNode());
-    SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
-                              DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
-    DCI.AddToWorklist(Srl.getNode());
+      SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
+      DCI.AddToWorklist(Cast.getNode());
 
-    EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
-    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
-    DCI.AddToWorklist(Trunc.getNode());
+      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
+                                DAG.getConstant(EltIdx, SL, MVT::i32));
+      DCI.AddToWorklist(Elt.getNode());
+      SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
+                                DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
+      DCI.AddToWorklist(Srl.getNode());
 
-    if (VecEltVT == ResVT) {
-      return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
+      EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
+      DCI.AddToWorklist(Trunc.getNode());
+
+      if (VecEltVT == ResVT) {
+        return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
+      }
+
+      assert(ResVT.isScalarInteger());
+      return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
     }
 
-    assert(ResVT.isScalarInteger());
-    return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
+    return SDValue();
   }
 
-  return SDValue();
-}
-
 SDValue
 SITargetLowering::performInsertVectorEltCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {