@@ -24722,6 +24722,49 @@ static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
2472224722 Op0ExtV, Op1ExtV, Op->getOperand(2));
2472324723}
2472424724
24725+ static SDValue skipElementSizePreservingCast(SDValue Op, EVT VT) {
24726+ if (Op->getOpcode() == ISD::BITCAST)
24727+ Op = Op->getOperand(0);
24728+ EVT OpVT = Op.getValueType();
24729+ if (OpVT.isVector() && OpVT.getVectorElementType().getSizeInBits() ==
24730+ VT.getVectorElementType().getSizeInBits())
24731+ return Op;
24732+ return SDValue();
24733+ }
24734+
24735+ static SDValue performZIP1Combine(SDNode *N, SelectionDAG &DAG) {
24736+ SDLoc DL(N);
24737+ EVT VT = N->getValueType(0);
24738+
24739+ // zip1(insert_vector_elt(undef, extract_vector_elt(vec, 0), 0),
24740+ // insert_vector_elt(undef, extract_vector_elt(vec, 1), 0))
24741+ // -> vec
24742+ SDValue Op0 = skipElementSizePreservingCast(N->getOperand(0), VT);
24743+ SDValue Op1 = skipElementSizePreservingCast(N->getOperand(1), VT);
24744+ if (Op0 && Op1 && Op0->getOpcode() == ISD::INSERT_VECTOR_ELT &&
24745+ Op1->getOpcode() == ISD::INSERT_VECTOR_ELT) {
24746+ SDValue Op00 = Op0->getOperand(0);
24747+ SDValue Op10 = Op1->getOperand(0);
24748+ if (Op00.isUndef() && Op10.isUndef() &&
24749+ Op0->getConstantOperandVal(2) == 0 &&
24750+ Op1->getConstantOperandVal(2) == 0) {
24751+ SDValue Op01 = Op0->getOperand(1);
24752+ SDValue Op11 = Op1->getOperand(1);
24753+ if (Op01->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24754+ Op11->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24755+ Op01->getConstantOperandVal(1) == 0 &&
24756+ Op11->getConstantOperandVal(1) == 1) {
24757+ SDValue Op010 = skipElementSizePreservingCast(Op01->getOperand(0), VT);
24758+ SDValue Op110 = skipElementSizePreservingCast(Op11->getOperand(0), VT);
24759+ if (Op010 && Op010 == Op110)
24760+ return DAG.getBitcast(VT, Op010);
24761+ }
24762+ }
24763+ }
24764+
24765+ return SDValue();
24766+ }
24767+
2472524768static SDValue
2472624769performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
2472724770 SelectionDAG &DAG) {
@@ -26163,6 +26206,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2616326206
2616426207 break;
2616526208 }
26209+ case AArch64ISD::ZIP1:
26210+ return performZIP1Combine(N, DAG);
2616626211 case ISD::XOR:
2616726212 return performXorCombine(N, DAG, DCI, Subtarget);
2616826213 case ISD::MUL:
@@ -29032,7 +29077,14 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2903229077 if (!IsSingleOp && !Subtarget.hasSVE2())
2903329078 return SDValue();
2903429079
29080+ // Small vectors (with few extracts) can be lowered more efficiently as a
29081+ // sequence of ZIPs.
2903529082 EVT VTOp1 = Op.getOperand(0).getValueType();
29083+ unsigned NumElts = VT.getVectorNumElements();
29084+ if (VT.isPow2VectorType() && VT.getFixedSizeInBits() <= 128 &&
29085+ (NumElts <= 2 || (NumElts <= 4 && !Op2.isUndef())))
29086+ return SDValue();
29087+
2903629088 unsigned BitsPerElt = VTOp1.getVectorElementType().getSizeInBits();
2903729089 unsigned IndexLen = MinSVESize / BitsPerElt;
2903829090 unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();
0 commit comments