@@ -440,6 +440,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
440
440
setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v2i32, MVT::v2f32}, Legal);
441
441
}
442
442
443
+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::v2i32, Legal);
444
+ // Prevent SELECT v2i32 from being implemented with the above bitwise ops and
445
+ // instead lower to cndmask in SITargetLowering::LowerSELECT().
446
+ setOperationAction(ISD::SELECT, MVT::v2i32, Custom);
447
+ // Enable MatchRotate to produce ISD::ROTR, which is later transformed to
448
+ // alignbit.
449
+ setOperationAction(ISD::ROTR, MVT::v2i32, Custom);
450
+
443
451
setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16, MVT::v4bf16},
444
452
Custom);
445
453
@@ -6528,6 +6536,20 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
6528
6536
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
6529
6537
}
6530
6538
6539
+ // Enable lowering of ROTR for vxi32 types. This is a workaround for a
6540
+ // regression whereby extra unnecessary instructions were added to codegen
6541
+ // for rotr operations, casued by legalising v2i32 or. This resulted in extra
6542
+ // instructions to extract the result from the vector.
6543
+ SDValue SITargetLowering::lowerROTR(SDValue Op, SelectionDAG &DAG) const {
6544
+ [[maybe_unused]] EVT VT = Op.getValueType();
6545
+
6546
+ assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v8i32 ||
6547
+ VT == MVT::v16i32) &&
6548
+ "Unexpected ValueType.");
6549
+
6550
+ return DAG.UnrollVectorOp(Op.getNode());
6551
+ }
6552
+
6531
6553
// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
6532
6554
// wider vector type is legal.
6533
6555
SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
@@ -6719,6 +6741,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
6719
6741
return lowerGET_FPENV(Op, DAG);
6720
6742
case ISD::SET_FPENV:
6721
6743
return lowerSET_FPENV(Op, DAG);
6744
+ case ISD::ROTR:
6745
+ return lowerROTR(Op, DAG);
6722
6746
}
6723
6747
return SDValue();
6724
6748
}
@@ -13801,6 +13825,47 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
13801
13825
}
13802
13826
}
13803
13827
13828
+ // Detect identity v2i32 OR and replace with identity source node.
13829
+ // Specifically an Or that has operands constructed from the same source node
13830
+ // via extract_vector_elt and build_vector. I.E.
13831
+ // v2i32 or(
13832
+ // v2i32 build_vector(
13833
+ // i32 extract_elt(%IdentitySrc, 0),
13834
+ // i32 0
13835
+ // ),
13836
+ // v2i32 build_vector(
13837
+ // i32 0,
13838
+ // i32 extract_elt(%IdentitySrc, 1)
13839
+ // ) )
13840
+ // =>
13841
+ // v2i32 %IdentitySrc
13842
+
13843
+ if (VT == MVT::v2i32 && LHS->getOpcode() == ISD::BUILD_VECTOR &&
13844
+ RHS->getOpcode() == ISD::BUILD_VECTOR) {
13845
+
13846
+ ConstantSDNode *LC = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
13847
+ ConstantSDNode *RC = dyn_cast<ConstantSDNode>(RHS->getOperand(0));
13848
+
13849
+ // Test for and normalise build vectors.
13850
+ if (LC && RC && LC->getZExtValue() == 0 && RC->getZExtValue() == 0) {
13851
+
13852
+ // Get the extract_vector_element operands.
13853
+ SDValue LEVE = LHS->getOperand(0);
13854
+ SDValue REVE = RHS->getOperand(1);
13855
+
13856
+ if (LEVE->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13857
+ REVE->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13858
+ // Check that different elements from the same vector are
13859
+ // extracted.
13860
+ if (LEVE->getOperand(0) == REVE->getOperand(0) &&
13861
+ LEVE->getOperand(1) != REVE->getOperand(1)) {
13862
+ SDValue IdentitySrc = LEVE.getOperand(0);
13863
+ return IdentitySrc;
13864
+ }
13865
+ }
13866
+ }
13867
+ }
13868
+
13804
13869
if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
13805
13870
return SDValue();
13806
13871
@@ -13848,7 +13913,7 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
13848
13913
SDValue LHS = N->getOperand(0);
13849
13914
SDValue RHS = N->getOperand(1);
13850
13915
13851
- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode> (RHS);
13916
+ const ConstantSDNode *CRHS = isConstOrConstSplat (RHS);
13852
13917
SelectionDAG &DAG = DCI.DAG;
13853
13918
13854
13919
EVT VT = N->getValueType(0);
@@ -13858,6 +13923,23 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
13858
13923
return Split;
13859
13924
}
13860
13925
13926
+ // v2i32 (xor (vselect cc, x, y), K) ->
13927
+ // (v2i32 svelect cc, (xor x, K), (xor y, K)) This enables the xor to be
13928
+ // replaced with source modifiers when the select is lowered to CNDMASK.
13929
+ unsigned Opc = LHS.getOpcode();
13930
+ if (((Opc == ISD::VSELECT && VT == MVT::v2i32) ||
13931
+ (Opc == ISD::SELECT && VT == MVT::i64)) &&
13932
+ CRHS && CRHS->getAPIntValue().isSignMask()) {
13933
+ SDValue CC = LHS->getOperand(0);
13934
+ SDValue TRUE = LHS->getOperand(1);
13935
+ SDValue FALSE = LHS->getOperand(2);
13936
+ SDValue XTrue = DAG.getNode(ISD::XOR, SDLoc(N), VT, TRUE, RHS);
13937
+ SDValue XFalse = DAG.getNode(ISD::XOR, SDLoc(N), VT, FALSE, RHS);
13938
+ SDValue XSelect =
13939
+ DAG.getNode(ISD::VSELECT, SDLoc(N), VT, CC, XTrue, XFalse);
13940
+ return XSelect;
13941
+ }
13942
+
13861
13943
// Make sure to apply the 64-bit constant splitting fold before trying to fold
13862
13944
// fneg-like xors into 64-bit select.
13863
13945
if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
@@ -14848,6 +14930,27 @@ SITargetLowering::performExtractVectorEltCombine(SDNode *N,
14848
14930
return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14849
14931
}
14850
14932
14933
+ // (extract_vector_element (and {y0, y1}, (build_vector 0x1f, 0x1f)), index)
14934
+ // -> (and (extract_vector_element {y0, y1}, index), 0x1f)
14935
+ // There are optimisations to transform 64-bit shifts into 32-bit shifts
14936
+ // depending on the shift operand. See e.g. performSraCombine().
14937
+ // This combine ensures that the optimisation is compatible with v2i32
14938
+ // legalised AND.
14939
+ if (VecVT == MVT::v2i32 && Vec->getOpcode() == ISD::AND &&
14940
+ Vec->getOperand(1)->getOpcode() == ISD::BUILD_VECTOR) {
14941
+
14942
+ const ConstantSDNode *C = isConstOrConstSplat(Vec.getOperand(1));
14943
+ if (!C || C->getZExtValue() != 0x1f)
14944
+ return SDValue();
14945
+
14946
+ SDLoc SL(N);
14947
+ SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
14948
+ SDValue EVE = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
14949
+ Vec->getOperand(0), N->getOperand(1));
14950
+ SDValue A = DAG.getNode(ISD::AND, SL, MVT::i32, EVE, AndMask);
14951
+ DAG.ReplaceAllUsesWith(N, A.getNode());
14952
+ }
14953
+
14851
14954
// ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14852
14955
// =>
14853
14956
// Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
0 commit comments