@@ -11763,6 +11763,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1176311763 return Op;
1176411764}
1176511765
11766+ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11767+ SelectionDAG &DAG) const {
11768+ SDLoc dl(Op);
11769+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11770+ SDValue LoadChain = LN->getChain();
11771+ SDValue BasePtr = LN->getBasePtr();
11772+ EVT VT = Op.getValueType();
11773+
11774+ // Type v1024i1 is used for Dense Math dmr registers.
11775+ assert(VT == MVT::v1024i1 && "Unsupported type.");
11776+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11777+ "Dense Math support required.");
11778+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11779+
11780+ SmallVector<SDValue, 4> Loads;
11781+ SmallVector<SDValue, 4> LoadChains;
11782+ SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11783+ SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11784+ MachineMemOperand *MMO = LN->getMemOperand();
11785+ unsigned NumVecs = VT.getSizeInBits() / 256;
11786+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11787+ MachineMemOperand *NewMMO =
11788+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11789+ if (Idx > 0) {
11790+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11791+ DAG.getConstant(32, dl, BasePtr.getValueType()));
11792+ LoadOps[2] = BasePtr;
11793+ }
11794+ SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11795+ DAG.getVTList(MVT::v256i1, MVT::Other),
11796+ LoadOps, MVT::v256i1, NewMMO);
11797+ LoadChains.push_back(Ld.getValue(1));
11798+ Loads.push_back(Ld);
11799+ }
11800+
11801+ if (Subtarget.isLittleEndian()) {
11802+ std::reverse(Loads.begin(), Loads.end());
11803+ std::reverse(LoadChains.begin(), LoadChains.end());
11804+ }
11805+
11806+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11807+ SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0],
11808+ Loads[1]),
11809+ 0);
11810+ SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11811+ SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
11812+ Loads[2], Loads[3]),
11813+ 0);
11814+ SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11815+ SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11816+ const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11817+ SDValue Value =
11818+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11819+
11820+ SDValue RetOps[] = {Value, TF};
11821+ return DAG.getMergeValues(RetOps, dl);
11822+ }
11823+
1176611824SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1176711825 SelectionDAG &DAG) const {
1176811826 SDLoc dl(Op);
@@ -11771,12 +11829,11 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1177111829 SDValue BasePtr = LN->getBasePtr();
1177211830 EVT VT = Op.getValueType();
1177311831
11774- if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT ! = MVT::v1024i1)
11775- return Op ;
11832+ if (VT = = MVT::v1024i1)
11833+ return LowerDMFVectorLoad(Op, DAG) ;
1177611834
11777- // Used for dense math registers.
11778- assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
11779- "Type unsupported for this processor");
11835+ if (VT != MVT::v256i1 && VT != MVT::v512i1)
11836+ return Op;
1178011837
1178111838 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
1178211839 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
@@ -11805,57 +11862,91 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1180511862 std::reverse(LoadChains.begin(), LoadChains.end());
1180611863 }
1180711864 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11808- SDValue Value;
11809- if (VT == MVT::v1024i1) {
11810- SmallVector<SDValue, 4> Pairs;
11811- SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
11812- SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
11813- SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
11814- NumVecs >>= 1;
11815- for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11816- const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
11817- Loads[Idx * 2 + 1], Vsx1Idx};
11818- Pairs.push_back(SDValue(
11819- DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
11820- }
11821- SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
11822- Pairs[0], Pairs[1]),
11823- 0);
11824- SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11825- SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
11826- Pairs[2], Pairs[3]),
11827- 0);
11828- SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11829- SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11830- const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11831- Value = SDValue(
11832- DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11833- } else {
11834- Value =
11835- DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11836- dl, VT, Loads);
11837- }
11865+ SDValue Value =
11866+ DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11867+ dl, VT, Loads);
1183811868 SDValue RetOps[] = {Value, TF};
1183911869 return DAG.getMergeValues(RetOps, dl);
1184011870}
1184111871
11872+ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
11873+ SelectionDAG &DAG) const {
11874+
11875+ SDLoc dl(Op);
11876+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11877+ SDValue StoreChain = SN->getChain();
11878+ SDValue BasePtr = SN->getBasePtr();
11879+ SmallVector<SDValue, 4> Values;
11880+ SmallVector<SDValue, 4> Stores;
11881+ EVT VT = SN->getValue().getValueType();
11882+
11883+ // Type v1024i1 is used for Dense Math dmr registers.
11884+ assert(VT == MVT::v1024i1 && "Unsupported type.");
11885+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11886+ "Dense Math support required.");
11887+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11888+
11889+ SDValue Lo(
11890+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11891+ Op.getOperand(1),
11892+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
11893+ 0);
11894+ SDValue Hi(
11895+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11896+ Op.getOperand(1),
11897+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
11898+ 0);
11899+ EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11900+ MachineSDNode *ExtNode =
11901+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
11902+ Values.push_back(SDValue(ExtNode, 0));
11903+ Values.push_back(SDValue(ExtNode, 1));
11904+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
11905+ Values.push_back(SDValue(ExtNode, 0));
11906+ Values.push_back(SDValue(ExtNode, 1));
11907+
11908+ if (Subtarget.isLittleEndian())
11909+ std::reverse(Values.begin(), Values.end());
11910+
11911+ SDVTList Tys = DAG.getVTList(MVT::Other);
11912+ SmallVector<SDValue, 4> Ops{
11913+ StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
11914+ Values[0], BasePtr};
11915+ MachineMemOperand *MMO = SN->getMemOperand();
11916+ unsigned NumVecs = VT.getSizeInBits() / 256;
11917+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11918+ MachineMemOperand *NewMMO =
11919+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11920+ if (Idx > 0) {
11921+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11922+ DAG.getConstant(32, dl, BasePtr.getValueType()));
11923+ Ops[3] = BasePtr;
11924+ }
11925+ Ops[2] = Values[Idx];
11926+ SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
11927+ MVT::v256i1, NewMMO);
11928+ Stores.push_back(St);
11929+ }
11930+
11931+ SDValue TF = DAG.getTokenFactor(dl, Stores);
11932+ return TF;
11933+ }
11934+
1184211935SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
1184311936 SelectionDAG &DAG) const {
1184411937 SDLoc dl(Op);
1184511938 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1184611939 SDValue StoreChain = SN->getChain();
1184711940 SDValue BasePtr = SN->getBasePtr();
1184811941 SDValue Value = SN->getValue();
11942+ SDValue Value2 = SN->getValue();
1184911943 EVT StoreVT = Value.getValueType();
11850- SmallVector<SDValue, 4> ValueVec;
1185111944
11852- if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 &&
11853- StoreVT != MVT::v1024i1)
11854- return Op;
11945+ if (StoreVT == MVT::v1024i1)
11946+ return LowerDMFVectorStore(Op, DAG);
1185511947
11856- // Used for dense math registers.
11857- assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
11858- "Type unsupported for this processor");
11948+ if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11949+ return Op;
1185911950
1186011951 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
1186111952 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
@@ -11873,43 +11964,20 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
1187311964 MachineSDNode *ExtNode = DAG.getMachineNode(
1187411965 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
1187511966
11876- ValueVec.push_back( SDValue(ExtNode, 0) );
11877- ValueVec.push_back( SDValue(ExtNode, 1) );
11967+ Value = SDValue(ExtNode, 0);
11968+ Value2 = SDValue(ExtNode, 1);
1187811969 } else
1187911970 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
1188011971 NumVecs = 4;
11881-
11882- } else if (StoreVT == MVT::v1024i1) {
11883- SDValue Lo(DAG.getMachineNode(
11884- TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11885- Op.getOperand(1),
11886- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
11887- 0);
11888- SDValue Hi(DAG.getMachineNode(
11889- TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11890- Op.getOperand(1),
11891- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
11892- 0);
11893- EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11894- MachineSDNode *ExtNode =
11895- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
11896- ValueVec.push_back(SDValue(ExtNode, 0));
11897- ValueVec.push_back(SDValue(ExtNode, 1));
11898- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
11899- ValueVec.push_back(SDValue(ExtNode, 0));
11900- ValueVec.push_back(SDValue(ExtNode, 1));
11901- NumVecs = 8;
1190211972 }
1190311973 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
1190411974 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
1190511975 SDValue Elt;
1190611976 if (Subtarget.isISAFuture()) {
1190711977 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11908- unsigned Pairx =
11909- Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2;
11910- Elt = DAG.getNode(
11911- PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx],
11912- DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11978+ Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11979+ Idx > 1 ? Value2 : Value,
11980+ DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
1191311981 } else
1191411982 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
1191511983 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
0 commit comments