@@ -440,6 +440,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
440440 setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v2i32, MVT::v2f32}, Legal);
441441 }
442442
443+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::v2i32, Legal);
444+ // Prevent SELECT v2i32 from being implemented with the above bitwise ops and
445+ // instead lower to cndmask in SITargetLowering::LowerSELECT().
446+ setOperationAction(ISD::SELECT, MVT::v2i32, Custom);
447+ // Enable MatchRotate to produce ISD::ROTR, which is later transformed to
448+ // alignbit.
449+ setOperationAction(ISD::ROTR, MVT::v2i32, Custom);
450+
443451 setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16, MVT::v4bf16},
444452 Custom);
445453
@@ -6528,6 +6536,20 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
65286536 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
65296537}
65306538
6539+ // Enable lowering of ROTR for vxi32 types. This is a workaround for a
6540+ // regression whereby extra unnecessary instructions were added to codegen
6541+ // for rotr operations, casued by legalising v2i32 or. This resulted in extra
6542+ // instructions to extract the result from the vector.
6543+ SDValue SITargetLowering::lowerROTR(SDValue Op, SelectionDAG &DAG) const {
6544+ [[maybe_unused]] EVT VT = Op.getValueType();
6545+
6546+ assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v8i32 ||
6547+ VT == MVT::v16i32) &&
6548+ "Unexpected ValueType.");
6549+
6550+ return DAG.UnrollVectorOp(Op.getNode());
6551+ }
6552+
65316553// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
65326554// wider vector type is legal.
65336555SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
@@ -6719,6 +6741,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
67196741 return lowerGET_FPENV(Op, DAG);
67206742 case ISD::SET_FPENV:
67216743 return lowerSET_FPENV(Op, DAG);
6744+ case ISD::ROTR:
6745+ return lowerROTR(Op, DAG);
67226746 }
67236747 return SDValue();
67246748}
@@ -13801,6 +13825,47 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
1380113825 }
1380213826 }
1380313827
13828+ // Detect identity v2i32 OR and replace with identity source node.
13829+ // Specifically an Or that has operands constructed from the same source node
13830+ // via extract_vector_elt and build_vector. I.E.
13831+ // v2i32 or(
13832+ // v2i32 build_vector(
13833+ // i32 extract_elt(%IdentitySrc, 0),
13834+ // i32 0
13835+ // ),
13836+ // v2i32 build_vector(
13837+ // i32 0,
13838+ // i32 extract_elt(%IdentitySrc, 1)
13839+ // ) )
13840+ // =>
13841+ // v2i32 %IdentitySrc
13842+
13843+ if (VT == MVT::v2i32 && LHS->getOpcode() == ISD::BUILD_VECTOR &&
13844+ RHS->getOpcode() == ISD::BUILD_VECTOR) {
13845+
13846+ ConstantSDNode *LC = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
13847+ ConstantSDNode *RC = dyn_cast<ConstantSDNode>(RHS->getOperand(0));
13848+
13849+ // Test for and normalise build vectors.
13850+ if (LC && RC && LC->getZExtValue() == 0 && RC->getZExtValue() == 0) {
13851+
13852+ // Get the extract_vector_element operands.
13853+ SDValue LEVE = LHS->getOperand(0);
13854+ SDValue REVE = RHS->getOperand(1);
13855+
13856+ if (LEVE->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13857+ REVE->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13858+ // Check that different elements from the same vector are
13859+ // extracted.
13860+ if (LEVE->getOperand(0) == REVE->getOperand(0) &&
13861+ LEVE->getOperand(1) != REVE->getOperand(1)) {
13862+ SDValue IdentitySrc = LEVE.getOperand(0);
13863+ return IdentitySrc;
13864+ }
13865+ }
13866+ }
13867+ }
13868+
1380413869 if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
1380513870 return SDValue();
1380613871
@@ -13848,7 +13913,7 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
1384813913 SDValue LHS = N->getOperand(0);
1384913914 SDValue RHS = N->getOperand(1);
1385013915
13851- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode> (RHS);
13916+ const ConstantSDNode *CRHS = isConstOrConstSplat (RHS);
1385213917 SelectionDAG &DAG = DCI.DAG;
1385313918
1385413919 EVT VT = N->getValueType(0);
@@ -13858,6 +13923,23 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
1385813923 return Split;
1385913924 }
1386013925
13926+ // v2i32 (xor (vselect cc, x, y), K) ->
13927+ // (v2i32 svelect cc, (xor x, K), (xor y, K)) This enables the xor to be
13928+ // replaced with source modifiers when the select is lowered to CNDMASK.
13929+ unsigned Opc = LHS.getOpcode();
13930+ if (((Opc == ISD::VSELECT && VT == MVT::v2i32) ||
13931+ (Opc == ISD::SELECT && VT == MVT::i64)) &&
13932+ CRHS && CRHS->getAPIntValue().isSignMask()) {
13933+ SDValue CC = LHS->getOperand(0);
13934+ SDValue TRUE = LHS->getOperand(1);
13935+ SDValue FALSE = LHS->getOperand(2);
13936+ SDValue XTrue = DAG.getNode(ISD::XOR, SDLoc(N), VT, TRUE, RHS);
13937+ SDValue XFalse = DAG.getNode(ISD::XOR, SDLoc(N), VT, FALSE, RHS);
13938+ SDValue XSelect =
13939+ DAG.getNode(ISD::VSELECT, SDLoc(N), VT, CC, XTrue, XFalse);
13940+ return XSelect;
13941+ }
13942+
1386113943 // Make sure to apply the 64-bit constant splitting fold before trying to fold
1386213944 // fneg-like xors into 64-bit select.
1386313945 if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
@@ -14848,6 +14930,27 @@ SITargetLowering::performExtractVectorEltCombine(SDNode *N,
1484814930 return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
1484914931 }
1485014932
14933+ // (extract_vector_element (and {y0, y1}, (build_vector 0x1f, 0x1f)), index)
14934+ // -> (and (extract_vector_element {y0, y1}, index), 0x1f)
14935+ // There are optimisations to transform 64-bit shifts into 32-bit shifts
14936+ // depending on the shift operand. See e.g. performSraCombine().
14937+ // This combine ensures that the optimisation is compatible with v2i32
14938+ // legalised AND.
14939+ if (VecVT == MVT::v2i32 && Vec->getOpcode() == ISD::AND &&
14940+ Vec->getOperand(1)->getOpcode() == ISD::BUILD_VECTOR) {
14941+
14942+ const ConstantSDNode *C = isConstOrConstSplat(Vec.getOperand(1));
14943+ if (!C || C->getZExtValue() != 0x1f)
14944+ return SDValue();
14945+
14946+ SDLoc SL(N);
14947+ SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
14948+ SDValue EVE = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
14949+ Vec->getOperand(0), N->getOperand(1));
14950+ SDValue A = DAG.getNode(ISD::AND, SL, MVT::i32, EVE, AndMask);
14951+ DAG.ReplaceAllUsesWith(N, A.getNode());
14952+ }
14953+
1485114954 // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
1485214955 // =>
1485314956 // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
0 commit comments