@@ -13390,172 +13390,6 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
1339013390 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
1339113391}
1339213392
13393- /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
13394- /// the specified operations to build the shuffle. ID is the perfect-shuffle
13395- //ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
13396- //table entry and LHS/RHS are the immediate inputs for this stage of the
13397- //shuffle.
13398- static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
13399- SDValue V2, unsigned PFEntry, SDValue LHS,
13400- SDValue RHS, SelectionDAG &DAG,
13401- const SDLoc &dl) {
13402- unsigned OpNum = (PFEntry >> 26) & 0x0F;
13403- unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13404- unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13405-
13406- enum {
13407- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
13408- OP_VREV,
13409- OP_VDUP0,
13410- OP_VDUP1,
13411- OP_VDUP2,
13412- OP_VDUP3,
13413- OP_VEXT1,
13414- OP_VEXT2,
13415- OP_VEXT3,
13416- OP_VUZPL, // VUZP, left result
13417- OP_VUZPR, // VUZP, right result
13418- OP_VZIPL, // VZIP, left result
13419- OP_VZIPR, // VZIP, right result
13420- OP_VTRNL, // VTRN, left result
13421- OP_VTRNR, // VTRN, right result
13422- OP_MOVLANE // Move lane. RHSID is the lane to move into
13423- };
13424-
13425- if (OpNum == OP_COPY) {
13426- if (LHSID == (1 * 9 + 2) * 9 + 3)
13427- return LHS;
13428- assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
13429- return RHS;
13430- }
13431-
13432- if (OpNum == OP_MOVLANE) {
13433- // Decompose a PerfectShuffle ID to get the Mask for lane Elt
13434- auto getPFIDLane = [](unsigned ID, int Elt) -> int {
13435- assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
13436- Elt = 3 - Elt;
13437- while (Elt > 0) {
13438- ID /= 9;
13439- Elt--;
13440- }
13441- return (ID % 9 == 8) ? -1 : ID % 9;
13442- };
13443-
13444- // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
13445- // get the lane to move from the PFID, which is always from the
13446- // original vectors (V1 or V2).
13447- SDValue OpLHS = GeneratePerfectShuffle(
13448- LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
13449- EVT VT = OpLHS.getValueType();
13450- assert(RHSID < 8 && "Expected a lane index for RHSID!");
13451- unsigned ExtLane = 0;
13452- SDValue Input;
13453-
13454- // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
13455- // convert into a higher type.
13456- if (RHSID & 0x4) {
13457- int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
13458- if (MaskElt == -1)
13459- MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13460- assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
13461- ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13462- Input = MaskElt < 2 ? V1 : V2;
13463- if (VT.getScalarSizeInBits() == 16) {
13464- Input = DAG.getBitcast(MVT::v2f32, Input);
13465- OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
13466- } else {
13467- assert(VT.getScalarSizeInBits() == 32 &&
13468- "Expected 16 or 32 bit shuffle elemements");
13469- Input = DAG.getBitcast(MVT::v2f64, Input);
13470- OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
13471- }
13472- } else {
13473- int MaskElt = getPFIDLane(ID, RHSID);
13474- assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
13475- ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13476- Input = MaskElt < 4 ? V1 : V2;
13477- // Be careful about creating illegal types. Use f16 instead of i16.
13478- if (VT == MVT::v4i16) {
13479- Input = DAG.getBitcast(MVT::v4f16, Input);
13480- OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
13481- }
13482- }
13483- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13484- Input.getValueType().getVectorElementType(),
13485- Input, DAG.getVectorIdxConstant(ExtLane, dl));
13486- SDValue Ins =
13487- DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
13488- Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
13489- return DAG.getBitcast(VT, Ins);
13490- }
13491-
13492- SDValue OpLHS, OpRHS;
13493- OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
13494- RHS, DAG, dl);
13495- OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
13496- RHS, DAG, dl);
13497- EVT VT = OpLHS.getValueType();
13498-
13499- switch (OpNum) {
13500- default:
13501- llvm_unreachable("Unknown shuffle opcode!");
13502- case OP_VREV:
13503- // VREV divides the vector in half and swaps within the half.
13504- if (VT.getVectorElementType() == MVT::i32 ||
13505- VT.getVectorElementType() == MVT::f32)
13506- return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
13507- // vrev <4 x i16> -> REV32
13508- if (VT.getVectorElementType() == MVT::i16 ||
13509- VT.getVectorElementType() == MVT::f16 ||
13510- VT.getVectorElementType() == MVT::bf16)
13511- return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
13512- // vrev <4 x i8> -> REV16
13513- assert(VT.getVectorElementType() == MVT::i8);
13514- return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
13515- case OP_VDUP0:
13516- case OP_VDUP1:
13517- case OP_VDUP2:
13518- case OP_VDUP3: {
13519- EVT EltTy = VT.getVectorElementType();
13520- unsigned Opcode;
13521- if (EltTy == MVT::i8)
13522- Opcode = AArch64ISD::DUPLANE8;
13523- else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
13524- Opcode = AArch64ISD::DUPLANE16;
13525- else if (EltTy == MVT::i32 || EltTy == MVT::f32)
13526- Opcode = AArch64ISD::DUPLANE32;
13527- else if (EltTy == MVT::i64 || EltTy == MVT::f64)
13528- Opcode = AArch64ISD::DUPLANE64;
13529- else
13530- llvm_unreachable("Invalid vector element type?");
13531-
13532- if (VT.getSizeInBits() == 64)
13533- OpLHS = WidenVector(OpLHS, DAG);
13534- SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
13535- return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
13536- }
13537- case OP_VEXT1:
13538- case OP_VEXT2:
13539- case OP_VEXT3: {
13540- unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
13541- return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
13542- DAG.getConstant(Imm, dl, MVT::i32));
13543- }
13544- case OP_VUZPL:
13545- return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
13546- case OP_VUZPR:
13547- return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
13548- case OP_VZIPL:
13549- return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
13550- case OP_VZIPR:
13551- return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
13552- case OP_VTRNL:
13553- return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
13554- case OP_VTRNR:
13555- return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
13556- }
13557- }
13558-
1355913393static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
1356013394 SelectionDAG &DAG) {
1356113395 // Check to see if we can use the TBL instruction.
@@ -13979,8 +13813,95 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1397913813 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
1398013814 PFIndexes[2] * 9 + PFIndexes[3];
1398113815 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
13982- return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
13983- dl);
13816+
13817+ auto BuildRev = [&DAG, &dl](SDValue OpLHS) {
13818+ EVT VT = OpLHS.getValueType();
13819+ unsigned Opcode = VT.getScalarSizeInBits() == 32 ? AArch64ISD::REV64
13820+ : VT.getScalarSizeInBits() == 16 ? AArch64ISD::REV32
13821+ : AArch64ISD::REV16;
13822+ return DAG.getNode(Opcode, dl, VT, OpLHS);
13823+ };
13824+ auto BuildDup = [&DAG, &dl](SDValue OpLHS, unsigned Lane) {
13825+ EVT VT = OpLHS.getValueType();
13826+ unsigned Opcode;
13827+ if (VT.getScalarSizeInBits() == 8)
13828+ Opcode = AArch64ISD::DUPLANE8;
13829+ else if (VT.getScalarSizeInBits() == 16)
13830+ Opcode = AArch64ISD::DUPLANE16;
13831+ else if (VT.getScalarSizeInBits() == 32)
13832+ Opcode = AArch64ISD::DUPLANE32;
13833+ else if (VT.getScalarSizeInBits() == 64)
13834+ Opcode = AArch64ISD::DUPLANE64;
13835+ else
13836+ llvm_unreachable("Invalid vector element type?");
13837+
13838+ if (VT.getSizeInBits() == 64)
13839+ OpLHS = WidenVector(OpLHS, DAG);
13840+ return DAG.getNode(Opcode, dl, VT, OpLHS,
13841+ DAG.getConstant(Lane, dl, MVT::i64));
13842+ };
13843+ auto BuildExt = [&DAG, &dl](SDValue OpLHS, SDValue OpRHS, unsigned Imm) {
13844+ EVT VT = OpLHS.getValueType();
13845+ Imm = Imm * getExtFactor(OpLHS);
13846+ return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
13847+ DAG.getConstant(Imm, dl, MVT::i32));
13848+ };
13849+ auto BuildZipLike = [&DAG, &dl](unsigned OpNum, SDValue OpLHS,
13850+ SDValue OpRHS) {
13851+ EVT VT = OpLHS.getValueType();
13852+ switch (OpNum) {
13853+ default:
13854+ llvm_unreachable("Unexpected perfect shuffle opcode\n");
13855+ case OP_VUZPL:
13856+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
13857+ case OP_VUZPR:
13858+ return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
13859+ case OP_VZIPL:
13860+ return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
13861+ case OP_VZIPR:
13862+ return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
13863+ case OP_VTRNL:
13864+ return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
13865+ case OP_VTRNR:
13866+ return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
13867+ }
13868+ };
13869+ auto BuildExtractInsert64 = [&DAG, &dl](SDValue ExtSrc, unsigned ExtLane,
13870+ SDValue InsSrc, unsigned InsLane) {
13871+ EVT VT = InsSrc.getValueType();
13872+ if (VT.getScalarSizeInBits() == 16) {
13873+ ExtSrc = DAG.getBitcast(MVT::v2f32, ExtSrc);
13874+ InsSrc = DAG.getBitcast(MVT::v2f32, InsSrc);
13875+ } else if (VT.getScalarSizeInBits() == 32) {
13876+ ExtSrc = DAG.getBitcast(MVT::v2f64, ExtSrc);
13877+ InsSrc = DAG.getBitcast(MVT::v2f64, InsSrc);
13878+ }
13879+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13880+ ExtSrc.getValueType().getVectorElementType(),
13881+ ExtSrc, DAG.getVectorIdxConstant(ExtLane, dl));
13882+ SDValue Ins =
13883+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtSrc.getValueType(), InsSrc,
13884+ Ext, DAG.getVectorIdxConstant(InsLane, dl));
13885+ return DAG.getBitcast(VT, Ins);
13886+ };
13887+ auto BuildExtractInsert32 = [&DAG, &dl](SDValue ExtSrc, unsigned ExtLane,
13888+ SDValue InsSrc, unsigned InsLane) {
13889+ EVT VT = InsSrc.getValueType();
13890+ if (VT.getScalarSizeInBits() == 16) {
13891+ ExtSrc = DAG.getBitcast(MVT::v4f16, ExtSrc);
13892+ InsSrc = DAG.getBitcast(MVT::v4f16, InsSrc);
13893+ }
13894+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13895+ ExtSrc.getValueType().getVectorElementType(),
13896+ ExtSrc, DAG.getVectorIdxConstant(ExtLane, dl));
13897+ SDValue Ins =
13898+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtSrc.getValueType(), InsSrc,
13899+ Ext, DAG.getVectorIdxConstant(InsLane, dl));
13900+ return DAG.getBitcast(VT, Ins);
13901+ };
13902+ return generatePerfectShuffle<SDValue, MVT>(
13903+ PFTableIndex, V1, V2, PFEntry, V1, V2, BuildExtractInsert64,
13904+ BuildExtractInsert32, BuildRev, BuildDup, BuildExt, BuildZipLike);
1398413905 }
1398513906
1398613907 // Check for a "select shuffle", generating a BSL to pick between lanes in
0 commit comments