@@ -6098,6 +6098,157 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
60986098 DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
60996099}
61006100
6101+ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
6102+ SelectionDAG &DAG) {
6103+ EVT VT = N->getValueType(0);
6104+ unsigned ValSize = VT.getSizeInBits();
6105+ unsigned IID = N->getConstantOperandVal(0);
6106+ SDLoc SL(N);
6107+ MVT IntVT = MVT::getIntegerVT(ValSize);
6108+
6109+ auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
6110+ SDValue Src2, MVT ValT) -> SDValue {
6111+ SmallVector<SDValue, 8> Operands;
6112+ Operands.push_back(DAG.getTargetConstant(IID, SL, MVT::i32));
6113+ switch (IID) {
6114+ case Intrinsic::amdgcn_readfirstlane:
6115+ Operands.push_back(Src0);
6116+ break;
6117+ case Intrinsic::amdgcn_readlane:
6118+ Operands.push_back(Src0);
6119+ Operands.push_back(Src1);
6120+ break;
6121+ case Intrinsic::amdgcn_writelane:
6122+ Operands.push_back(Src0);
6123+ Operands.push_back(Src1);
6124+ Operands.push_back(Src2);
6125+ break;
6126+ }
6127+
6128+ if (SDNode *GL = N->getGluedNode()) {
6129+ assert(GL->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
6130+ GL = GL->getOperand(0).getNode();
6131+ Operands.push_back(DAG.getNode(ISD::CONVERGENCECTRL_GLUE, SL, MVT::Glue,
6132+ SDValue(GL, 0)));
6133+ }
6134+
6135+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, ValT, Operands);
6136+ };
6137+
6138+ SDValue Src0 = N->getOperand(1);
6139+ SDValue Src1, Src2;
6140+ if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) {
6141+ Src1 = N->getOperand(2);
6142+ if (IID == Intrinsic::amdgcn_writelane)
6143+ Src2 = N->getOperand(3);
6144+ }
6145+
6146+ if (ValSize == 32) {
6147+ // Already legal
6148+ return SDValue();
6149+ }
6150+
6151+ if (ValSize < 32) {
6152+ bool IsFloat = VT.isFloatingPoint();
6153+ Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
6154+ SL, MVT::i32);
6155+ if (Src2.getNode()) {
6156+ Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
6157+ SL, MVT::i32);
6158+ }
6159+ SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
6160+ SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
6161+ return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
6162+ }
6163+
6164+ if (ValSize % 32 != 0)
6165+ return SDValue();
6166+
6167+ auto unrollLaneOp = [&DAG, &SL](SDNode *N) -> SDValue {
6168+ EVT VT = N->getValueType(0);
6169+ unsigned NE = VT.getVectorNumElements();
6170+ EVT EltVT = VT.getVectorElementType();
6171+ SmallVector<SDValue, 8> Scalars;
6172+ unsigned NumOperands = N->getNumOperands();
6173+ SmallVector<SDValue, 4> Operands(NumOperands);
6174+ SDNode *GL = N->getGluedNode();
6175+
6176+ // only handle convergencectrl_glue
6177+ assert(!GL || GL->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
6178+
6179+ for (unsigned i = 0; i != NE; ++i) {
6180+ for (unsigned j = 0, e = GL ? NumOperands - 1 : NumOperands; j != e;
6181+ ++j) {
6182+ SDValue Operand = N->getOperand(j);
6183+ EVT OperandVT = Operand.getValueType();
6184+ if (OperandVT.isVector()) {
6185+ // A vector operand; extract a single element.
6186+ EVT OperandEltVT = OperandVT.getVectorElementType();
6187+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, OperandEltVT,
6188+ Operand, DAG.getVectorIdxConstant(i, SL));
6189+ } else {
6190+ // A scalar operand; just use it as is.
6191+ Operands[j] = Operand;
6192+ }
6193+ }
6194+
6195+ if (GL)
6196+ Operands[NumOperands - 1] =
6197+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, SL, MVT::Glue,
6198+ SDValue(GL->getOperand(0).getNode(), 0));
6199+
6200+ Scalars.push_back(DAG.getNode(N->getOpcode(), SL, EltVT, Operands));
6201+ }
6202+
6203+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NE);
6204+ return DAG.getBuildVector(VecVT, SL, Scalars);
6205+ };
6206+
6207+ if (VT.isVector()) {
6208+ switch (MVT::SimpleValueType EltTy =
6209+ VT.getVectorElementType().getSimpleVT().SimpleTy) {
6210+ case MVT::i32:
6211+ case MVT::f32: {
6212+ SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VT.getSimpleVT());
6213+ return unrollLaneOp(LaneOp.getNode());
6214+ }
6215+ case MVT::i16:
6216+ case MVT::f16:
6217+ case MVT::bf16: {
6218+ MVT SubVecVT = MVT::getVectorVT(EltTy, 2);
6219+ SmallVector<SDValue, 4> Pieces;
6220+ for (unsigned i = 0, EltIdx = 0; i < ValSize / 32; i++) {
6221+ SDValue Src0SubVec =
6222+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src0,
6223+ DAG.getConstant(EltIdx, SL, MVT::i32));
6224+
6225+ SDValue Src2SubVec;
6226+ if (Src2)
6227+ Src2SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src2,
6228+ DAG.getConstant(EltIdx, SL, MVT::i32));
6229+
6230+ Pieces.push_back(createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
6231+ EltIdx += 2;
6232+ }
6233+ return DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, Pieces);
6234+ }
6235+ default:
6236+ // Handle all other cases by bitcasting to i32 vectors
6237+ break;
6238+ }
6239+ }
6240+
6241+ MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);
6242+ Src0 = DAG.getBitcast(VecVT, Src0);
6243+
6244+ if (Src2)
6245+ Src2 = DAG.getBitcast(VecVT, Src2);
6246+
6247+ SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
6248+ SDValue UnrolledLaneOp = unrollLaneOp(LaneOp.getNode());
6249+ return DAG.getBitcast(VT, UnrolledLaneOp);
6250+ }
6251+
61016252void SITargetLowering::ReplaceNodeResults(SDNode *N,
61026253 SmallVectorImpl<SDValue> &Results,
61036254 SelectionDAG &DAG) const {
@@ -8564,6 +8715,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
85648715 }
85658716 case Intrinsic::amdgcn_addrspacecast_nonnull:
85668717 return lowerADDRSPACECAST(Op, DAG);
8718+ case Intrinsic::amdgcn_readlane:
8719+ case Intrinsic::amdgcn_readfirstlane:
8720+ case Intrinsic::amdgcn_writelane:
8721+ return lowerLaneOp(*this, Op.getNode(), DAG);
85678722 default:
85688723 if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
85698724 AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
0 commit comments