@@ -13466,172 +13466,6 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
1346613466 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
1346713467}
1346813468
13469- /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
13470- /// the specified operations to build the shuffle. ID is the perfect-shuffle
13471- //ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
13472- //table entry and LHS/RHS are the immediate inputs for this stage of the
13473- //shuffle.
13474- static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
13475- SDValue V2, unsigned PFEntry, SDValue LHS,
13476- SDValue RHS, SelectionDAG &DAG,
13477- const SDLoc &dl) {
13478- unsigned OpNum = (PFEntry >> 26) & 0x0F;
13479- unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13480- unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13481-
13482- enum {
13483- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
13484- OP_VREV,
13485- OP_VDUP0,
13486- OP_VDUP1,
13487- OP_VDUP2,
13488- OP_VDUP3,
13489- OP_VEXT1,
13490- OP_VEXT2,
13491- OP_VEXT3,
13492- OP_VUZPL, // VUZP, left result
13493- OP_VUZPR, // VUZP, right result
13494- OP_VZIPL, // VZIP, left result
13495- OP_VZIPR, // VZIP, right result
13496- OP_VTRNL, // VTRN, left result
13497- OP_VTRNR, // VTRN, right result
13498- OP_MOVLANE // Move lane. RHSID is the lane to move into
13499- };
13500-
13501- if (OpNum == OP_COPY) {
13502- if (LHSID == (1 * 9 + 2) * 9 + 3)
13503- return LHS;
13504- assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
13505- return RHS;
13506- }
13507-
13508- if (OpNum == OP_MOVLANE) {
13509- // Decompose a PerfectShuffle ID to get the Mask for lane Elt
13510- auto getPFIDLane = [](unsigned ID, int Elt) -> int {
13511- assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
13512- Elt = 3 - Elt;
13513- while (Elt > 0) {
13514- ID /= 9;
13515- Elt--;
13516- }
13517- return (ID % 9 == 8) ? -1 : ID % 9;
13518- };
13519-
13520- // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
13521- // get the lane to move from the PFID, which is always from the
13522- // original vectors (V1 or V2).
13523- SDValue OpLHS = GeneratePerfectShuffle(
13524- LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
13525- EVT VT = OpLHS.getValueType();
13526- assert(RHSID < 8 && "Expected a lane index for RHSID!");
13527- unsigned ExtLane = 0;
13528- SDValue Input;
13529-
13530- // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
13531- // convert into a higher type.
13532- if (RHSID & 0x4) {
13533- int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
13534- if (MaskElt == -1)
13535- MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13536- assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
13537- ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13538- Input = MaskElt < 2 ? V1 : V2;
13539- if (VT.getScalarSizeInBits() == 16) {
13540- Input = DAG.getBitcast(MVT::v2f32, Input);
13541- OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
13542- } else {
13543- assert(VT.getScalarSizeInBits() == 32 &&
13544- "Expected 16 or 32 bit shuffle elemements");
13545- Input = DAG.getBitcast(MVT::v2f64, Input);
13546- OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
13547- }
13548- } else {
13549- int MaskElt = getPFIDLane(ID, RHSID);
13550- assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
13551- ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13552- Input = MaskElt < 4 ? V1 : V2;
13553- // Be careful about creating illegal types. Use f16 instead of i16.
13554- if (VT == MVT::v4i16) {
13555- Input = DAG.getBitcast(MVT::v4f16, Input);
13556- OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
13557- }
13558- }
13559- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13560- Input.getValueType().getVectorElementType(),
13561- Input, DAG.getVectorIdxConstant(ExtLane, dl));
13562- SDValue Ins =
13563- DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
13564- Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
13565- return DAG.getBitcast(VT, Ins);
13566- }
13567-
13568- SDValue OpLHS, OpRHS;
13569- OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
13570- RHS, DAG, dl);
13571- OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
13572- RHS, DAG, dl);
13573- EVT VT = OpLHS.getValueType();
13574-
13575- switch (OpNum) {
13576- default:
13577- llvm_unreachable("Unknown shuffle opcode!");
13578- case OP_VREV:
13579- // VREV divides the vector in half and swaps within the half.
13580- if (VT.getVectorElementType() == MVT::i32 ||
13581- VT.getVectorElementType() == MVT::f32)
13582- return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
13583- // vrev <4 x i16> -> REV32
13584- if (VT.getVectorElementType() == MVT::i16 ||
13585- VT.getVectorElementType() == MVT::f16 ||
13586- VT.getVectorElementType() == MVT::bf16)
13587- return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
13588- // vrev <4 x i8> -> REV16
13589- assert(VT.getVectorElementType() == MVT::i8);
13590- return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
13591- case OP_VDUP0:
13592- case OP_VDUP1:
13593- case OP_VDUP2:
13594- case OP_VDUP3: {
13595- EVT EltTy = VT.getVectorElementType();
13596- unsigned Opcode;
13597- if (EltTy == MVT::i8)
13598- Opcode = AArch64ISD::DUPLANE8;
13599- else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
13600- Opcode = AArch64ISD::DUPLANE16;
13601- else if (EltTy == MVT::i32 || EltTy == MVT::f32)
13602- Opcode = AArch64ISD::DUPLANE32;
13603- else if (EltTy == MVT::i64 || EltTy == MVT::f64)
13604- Opcode = AArch64ISD::DUPLANE64;
13605- else
13606- llvm_unreachable("Invalid vector element type?");
13607-
13608- if (VT.getSizeInBits() == 64)
13609- OpLHS = WidenVector(OpLHS, DAG);
13610- SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
13611- return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
13612- }
13613- case OP_VEXT1:
13614- case OP_VEXT2:
13615- case OP_VEXT3: {
13616- unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
13617- return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
13618- DAG.getConstant(Imm, dl, MVT::i32));
13619- }
13620- case OP_VUZPL:
13621- return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
13622- case OP_VUZPR:
13623- return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
13624- case OP_VZIPL:
13625- return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
13626- case OP_VZIPR:
13627- return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
13628- case OP_VTRNL:
13629- return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
13630- case OP_VTRNR:
13631- return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
13632- }
13633- }
13634-
1363513469static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
1363613470 SelectionDAG &DAG) {
1363713471 // Check to see if we can use the TBL instruction.
@@ -14055,8 +13889,95 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1405513889 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
1405613890 PFIndexes[2] * 9 + PFIndexes[3];
1405713891 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
14058- return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
14059- dl);
13892+
13893+ auto BuildRev = [&DAG, &dl](SDValue OpLHS) {
13894+ EVT VT = OpLHS.getValueType();
13895+ unsigned Opcode = VT.getScalarSizeInBits() == 32 ? AArch64ISD::REV64
13896+ : VT.getScalarSizeInBits() == 16 ? AArch64ISD::REV32
13897+ : AArch64ISD::REV16;
13898+ return DAG.getNode(Opcode, dl, VT, OpLHS);
13899+ };
13900+ auto BuildDup = [&DAG, &dl](SDValue OpLHS, unsigned Lane) {
13901+ EVT VT = OpLHS.getValueType();
13902+ unsigned Opcode;
13903+ if (VT.getScalarSizeInBits() == 8)
13904+ Opcode = AArch64ISD::DUPLANE8;
13905+ else if (VT.getScalarSizeInBits() == 16)
13906+ Opcode = AArch64ISD::DUPLANE16;
13907+ else if (VT.getScalarSizeInBits() == 32)
13908+ Opcode = AArch64ISD::DUPLANE32;
13909+ else if (VT.getScalarSizeInBits() == 64)
13910+ Opcode = AArch64ISD::DUPLANE64;
13911+ else
13912+ llvm_unreachable("Invalid vector element type?");
13913+
13914+ if (VT.getSizeInBits() == 64)
13915+ OpLHS = WidenVector(OpLHS, DAG);
13916+ return DAG.getNode(Opcode, dl, VT, OpLHS,
13917+ DAG.getConstant(Lane, dl, MVT::i64));
13918+ };
13919+ auto BuildExt = [&DAG, &dl](SDValue OpLHS, SDValue OpRHS, unsigned Imm) {
13920+ EVT VT = OpLHS.getValueType();
13921+ Imm = Imm * getExtFactor(OpLHS);
13922+ return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
13923+ DAG.getConstant(Imm, dl, MVT::i32));
13924+ };
13925+ auto BuildZipLike = [&DAG, &dl](unsigned OpNum, SDValue OpLHS,
13926+ SDValue OpRHS) {
13927+ EVT VT = OpLHS.getValueType();
13928+ switch (OpNum) {
13929+ default:
13930+ llvm_unreachable("Unexpected perfect shuffle opcode\n");
13931+ case OP_VUZPL:
13932+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
13933+ case OP_VUZPR:
13934+ return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
13935+ case OP_VZIPL:
13936+ return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
13937+ case OP_VZIPR:
13938+ return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
13939+ case OP_VTRNL:
13940+ return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
13941+ case OP_VTRNR:
13942+ return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
13943+ }
13944+ };
13945+ auto BuildExtractInsert64 = [&DAG, &dl](SDValue ExtSrc, unsigned ExtLane,
13946+ SDValue InsSrc, unsigned InsLane) {
13947+ EVT VT = InsSrc.getValueType();
13948+ if (VT.getScalarSizeInBits() == 16) {
13949+ ExtSrc = DAG.getBitcast(MVT::v2f32, ExtSrc);
13950+ InsSrc = DAG.getBitcast(MVT::v2f32, InsSrc);
13951+ } else if (VT.getScalarSizeInBits() == 32) {
13952+ ExtSrc = DAG.getBitcast(MVT::v2f64, ExtSrc);
13953+ InsSrc = DAG.getBitcast(MVT::v2f64, InsSrc);
13954+ }
13955+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13956+ ExtSrc.getValueType().getVectorElementType(),
13957+ ExtSrc, DAG.getVectorIdxConstant(ExtLane, dl));
13958+ SDValue Ins =
13959+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtSrc.getValueType(), InsSrc,
13960+ Ext, DAG.getVectorIdxConstant(InsLane, dl));
13961+ return DAG.getBitcast(VT, Ins);
13962+ };
13963+ auto BuildExtractInsert32 = [&DAG, &dl](SDValue ExtSrc, unsigned ExtLane,
13964+ SDValue InsSrc, unsigned InsLane) {
13965+ EVT VT = InsSrc.getValueType();
13966+ if (VT.getScalarSizeInBits() == 16) {
13967+ ExtSrc = DAG.getBitcast(MVT::v4f16, ExtSrc);
13968+ InsSrc = DAG.getBitcast(MVT::v4f16, InsSrc);
13969+ }
13970+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13971+ ExtSrc.getValueType().getVectorElementType(),
13972+ ExtSrc, DAG.getVectorIdxConstant(ExtLane, dl));
13973+ SDValue Ins =
13974+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtSrc.getValueType(), InsSrc,
13975+ Ext, DAG.getVectorIdxConstant(InsLane, dl));
13976+ return DAG.getBitcast(VT, Ins);
13977+ };
13978+ return generatePerfectShuffle<SDValue, MVT>(
13979+ PFTableIndex, V1, V2, PFEntry, V1, V2, BuildExtractInsert64,
13980+ BuildExtractInsert32, BuildRev, BuildDup, BuildExt, BuildZipLike);
1406013981 }
1406113982
1406213983 // Check for a "select shuffle", generating a BSL to pick between lanes in
0 commit comments