diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index bdc1ac7c7da58..f204988afe20c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1355,10 +1355,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STORE, MVT::v256i1, Custom); } if (Subtarget.hasMMA()) { - if (Subtarget.isISAFuture()) + if (Subtarget.isISAFuture()) { addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass); - else + addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass); + setOperationAction(ISD::LOAD, MVT::v1024i1, Custom); + setOperationAction(ISD::STORE, MVT::v1024i1, Custom); + } else { addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); + } setOperationAction(ISD::LOAD, MVT::v512i1, Custom); setOperationAction(ISD::STORE, MVT::v512i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); @@ -11758,6 +11762,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, return Op; } +SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + LoadSDNode *LN = cast(Op.getNode()); + SDValue LoadChain = LN->getChain(); + SDValue BasePtr = LN->getBasePtr(); + EVT VT = Op.getValueType(); + + // Type v1024i1 is used for Dense Math dmr registers. + assert(VT == MVT::v1024i1 && "Unsupported type."); + assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) && + "Dense Math support required."); + assert(Subtarget.pairedVectorMemops() && "Vector pair support required."); + + SmallVector Loads; + SmallVector LoadChains; + SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32); + SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr}; + MachineMemOperand *MMO = LN->getMemOperand(); + unsigned NumVecs = VT.getSizeInBits() / 256; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + MachineMemOperand *NewMMO = + DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32); + if (Idx > 0) { + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(32, dl, BasePtr.getValueType())); + LoadOps[2] = BasePtr; + } + SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, + DAG.getVTList(MVT::v256i1, MVT::Other), + LoadOps, MVT::v256i1, NewMMO); + LoadChains.push_back(Ld.getValue(1)); + Loads.push_back(Ld); + } + + if (Subtarget.isLittleEndian()) { + std::reverse(Loads.begin(), Loads.end()); + std::reverse(LoadChains.begin(), LoadChains.end()); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0], + Loads[1]), + 0); + SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32); + SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1, + Loads[2], Loads[3]), + 0); + SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32); + SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32); + const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub}; + SDValue Value = + SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0); + + SDValue RetOps[] = {Value, TF}; + return DAG.getMergeValues(RetOps, dl); +} + SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11766,6 +11828,9 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SDValue BasePtr = LN->getBasePtr(); EVT VT = Op.getValueType(); + if (VT == MVT::v1024i1) + return LowerDMFVectorLoad(Op, DAG); + if (VT != MVT::v256i1 && VT != MVT::v512i1) return Op; @@ -11803,6 +11868,69 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, return DAG.getMergeValues(RetOps, dl); } +SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc dl(Op); + StoreSDNode *SN = cast(Op.getNode()); + SDValue StoreChain = SN->getChain(); + SDValue BasePtr = SN->getBasePtr(); + SmallVector Values; + SmallVector Stores; + EVT VT = SN->getValue().getValueType(); + + // Type v1024i1 is used for Dense Math dmr registers. + assert(VT == MVT::v1024i1 && "Unsupported type."); + assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) && + "Dense Math support required."); + assert(Subtarget.pairedVectorMemops() && "Vector pair support required."); + + SDValue Lo( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, + Op.getOperand(1), + DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), + 0); + SDValue Hi( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, + Op.getOperand(1), + DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), + 0); + EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1}; + MachineSDNode *ExtNode = + DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo); + Values.push_back(SDValue(ExtNode, 0)); + Values.push_back(SDValue(ExtNode, 1)); + ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi); + Values.push_back(SDValue(ExtNode, 0)); + Values.push_back(SDValue(ExtNode, 1)); + + if (Subtarget.isLittleEndian()) + std::reverse(Values.begin(), Values.end()); + + SDVTList Tys = DAG.getVTList(MVT::Other); + SmallVector Ops{ + StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32), + Values[0], BasePtr}; + MachineMemOperand *MMO = SN->getMemOperand(); + unsigned NumVecs = VT.getSizeInBits() / 256; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + MachineMemOperand *NewMMO = + DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32); + if (Idx > 0) { + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(32, dl, BasePtr.getValueType())); + Ops[3] = BasePtr; + } + Ops[2] = Values[Idx]; + SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, + MVT::v256i1, NewMMO); + Stores.push_back(St); + } + + SDValue TF = DAG.getTokenFactor(dl, Stores); + return TF; +} + SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11813,6 +11941,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SDValue Value2 = SN->getValue(); EVT StoreVT = Value.getValueType(); + if (StoreVT == MVT::v1024i1) + return LowerDMFVectorStore(Op, DAG); + if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) return Op; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 514329bbe92d7..1f22aa16a89be 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1344,6 +1344,8 @@ namespace llvm { SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll new file mode 100644 index 0000000000000..c7f6911f9ddbc --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp) { +; CHECK-LABEL: v1024ls: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: v1024ls: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vqp, align 64 + store <1024 x i1> %0, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()