-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[PowerPC] custom lower v1024i1 load/store #126969
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1363,6 +1363,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, | |
| setOperationAction(ISD::STORE, MVT::v512i1, Custom); | ||
| setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); | ||
| } | ||
| if (Subtarget.isISAFuture()) { | ||
| setOperationAction(ISD::LOAD, MVT::v1024i1, Custom); | ||
| setOperationAction(ISD::STORE, MVT::v1024i1, Custom); | ||
| addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass); | ||
| } | ||
|
|
||
| if (Subtarget.has64BitSupport()) | ||
| setOperationAction(ISD::PREFETCH, MVT::Other, Legal); | ||
|
|
@@ -11766,9 +11771,13 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, | |
| SDValue BasePtr = LN->getBasePtr(); | ||
| EVT VT = Op.getValueType(); | ||
|
|
||
| if (VT != MVT::v256i1 && VT != MVT::v512i1) | ||
| if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT != MVT::v1024i1) | ||
| return Op; | ||
|
|
||
| // Used for dense math registers. | ||
| assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) && | ||
| "Type unsupported for this processor"); | ||
|
|
||
| // Type v256i1 is used for pairs and v512i1 is used for accumulators. | ||
| // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in | ||
| // 2 or 4 vsx registers. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe this doc need to be updated since it's generating loads also for v1024i1 types now?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Code removed. |
||
|
|
@@ -11796,9 +11805,36 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, | |
| std::reverse(LoadChains.begin(), LoadChains.end()); | ||
| } | ||
| SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); | ||
| SDValue Value = | ||
| DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD, | ||
| dl, VT, Loads); | ||
| SDValue Value; | ||
| if (VT == MVT::v1024i1) { | ||
| SmallVector<SDValue, 4> Pairs; | ||
| SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32); | ||
| SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32); | ||
| SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32); | ||
| NumVecs >>= 1; | ||
| for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { | ||
| const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx, | ||
| Loads[Idx * 2 + 1], Vsx1Idx}; | ||
| Pairs.push_back(SDValue( | ||
| DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0)); | ||
| } | ||
| SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, | ||
| Pairs[0], Pairs[1]), | ||
| 0); | ||
| SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32); | ||
|
||
| SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1, | ||
| Pairs[2], Pairs[3]), | ||
| 0); | ||
| SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32); | ||
| SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32); | ||
| const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub}; | ||
| Value = SDValue( | ||
| DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0); | ||
|
||
| } else { | ||
| Value = | ||
| DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD, | ||
| dl, VT, Loads); | ||
| } | ||
| SDValue RetOps[] = {Value, TF}; | ||
| return DAG.getMergeValues(RetOps, dl); | ||
| } | ||
|
|
@@ -11810,12 +11846,17 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, | |
| SDValue StoreChain = SN->getChain(); | ||
| SDValue BasePtr = SN->getBasePtr(); | ||
| SDValue Value = SN->getValue(); | ||
| SDValue Value2 = SN->getValue(); | ||
| EVT StoreVT = Value.getValueType(); | ||
| SmallVector<SDValue, 4> ValueVec; | ||
|
|
||
| if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) | ||
| if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 && | ||
| StoreVT != MVT::v1024i1) | ||
| return Op; | ||
|
|
||
| // Used for dense math registers. | ||
| assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) && | ||
| "Type unsupported for this processor"); | ||
|
|
||
| // Type v256i1 is used for pairs and v512i1 is used for accumulators. | ||
| // Here we create 2 or 4 v16i8 stores to store the pair or accumulator | ||
| // underlying registers individually. | ||
|
|
@@ -11832,20 +11873,43 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, | |
| MachineSDNode *ExtNode = DAG.getMachineNode( | ||
| PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1)); | ||
|
|
||
| Value = SDValue(ExtNode, 0); | ||
| Value2 = SDValue(ExtNode, 1); | ||
| ValueVec.push_back(SDValue(ExtNode, 0)); | ||
| ValueVec.push_back(SDValue(ExtNode, 1)); | ||
| } else | ||
| Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value); | ||
| NumVecs = 4; | ||
|
|
||
| } else if (StoreVT == MVT::v1024i1) { | ||
| SDValue Lo(DAG.getMachineNode( | ||
| TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, | ||
| Op.getOperand(1), | ||
| DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), | ||
| 0); | ||
| SDValue Hi(DAG.getMachineNode( | ||
| TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, | ||
| Op.getOperand(1), | ||
| DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), | ||
| 0); | ||
| EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1}; | ||
| MachineSDNode *ExtNode = | ||
| DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo); | ||
| ValueVec.push_back(SDValue(ExtNode, 0)); | ||
| ValueVec.push_back(SDValue(ExtNode, 1)); | ||
| ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi); | ||
| ValueVec.push_back(SDValue(ExtNode, 0)); | ||
| ValueVec.push_back(SDValue(ExtNode, 1)); | ||
| NumVecs = 8; | ||
| } | ||
| for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { | ||
| unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx; | ||
| SDValue Elt; | ||
| if (Subtarget.isISAFuture()) { | ||
| VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2); | ||
| Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, | ||
| Idx > 1 ? Value2 : Value, | ||
| DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout()))); | ||
| unsigned Pairx = | ||
| Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2; | ||
|
||
| Elt = DAG.getNode( | ||
| PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx], | ||
| DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout()))); | ||
| } else | ||
| Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value, | ||
| DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout()))); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,10 +31,10 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) { | |
| ; CHECK-NEXT: vmr v3, v2 | ||
| ; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r3) | ||
| ; CHECK-NEXT: stxv v5, 32(r3) | ||
| ; CHECK-NEXT: stxv v2, 16(r3) | ||
| ; CHECK-NEXT: stxv v3, 0(r3) | ||
| ; CHECK-NEXT: stxv v2, 48(r3) | ||
| ; CHECK-NEXT: stxv v3, 32(r3) | ||
| ; CHECK-NEXT: stxv v4, 16(r3) | ||
| ; CHECK-NEXT: stxv v5, 0(r3) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: ass_acc: | ||
|
|
@@ -55,7 +55,7 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) { | |
| ; CHECK-O0-NEXT: vmr v3, v4 | ||
| ; CHECK-O0-NEXT: vmr v2, v4 | ||
| ; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
|
||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
@@ -121,10 +121,10 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-NEXT: lxv v4, 48(r3) | ||
| ; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r7) | ||
| ; CHECK-NEXT: stxv v5, 32(r7) | ||
| ; CHECK-NEXT: stxv v2, 16(r7) | ||
| ; CHECK-NEXT: stxv v3, 0(r7) | ||
| ; CHECK-NEXT: stxv v2, 48(r7) | ||
| ; CHECK-NEXT: stxv v3, 32(r7) | ||
| ; CHECK-NEXT: stxv v4, 16(r7) | ||
| ; CHECK-NEXT: stxv v5, 0(r7) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: ld_st_xxmtacc: | ||
|
|
@@ -154,7 +154,7 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-O0-NEXT: lxv vs0, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor v2, vs0, vs0 | ||
| ; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r7) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
@@ -236,10 +236,10 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0 | ||
| ; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r7) | ||
| ; CHECK-NEXT: stxv v5, 32(r7) | ||
| ; CHECK-NEXT: stxv v2, 16(r7) | ||
| ; CHECK-NEXT: stxv v3, 0(r7) | ||
| ; CHECK-NEXT: stxv v2, 48(r7) | ||
| ; CHECK-NEXT: stxv v3, 32(r7) | ||
| ; CHECK-NEXT: stxv v4, 16(r7) | ||
| ; CHECK-NEXT: stxv v5, 0(r7) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: ld_op_st_xxmtacc: | ||
|
|
@@ -271,7 +271,7 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-O0-NEXT: xxlor v4, vs0, vs0 | ||
| ; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0 | ||
| ; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r7) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
@@ -356,14 +356,14 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-NEXT: lxv v4, 48(r3) | ||
| ; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r3) | ||
| ; CHECK-NEXT: stxv v5, 32(r3) | ||
| ; CHECK-NEXT: stxv v2, 16(r3) | ||
| ; CHECK-NEXT: stxv v3, 0(r3) | ||
| ; CHECK-NEXT: stxv v4, 48(r7) | ||
| ; CHECK-NEXT: stxv v5, 32(r7) | ||
| ; CHECK-NEXT: stxv v2, 16(r7) | ||
| ; CHECK-NEXT: stxv v3, 0(r7) | ||
| ; CHECK-NEXT: stxv v2, 48(r3) | ||
| ; CHECK-NEXT: stxv v3, 32(r3) | ||
| ; CHECK-NEXT: stxv v4, 16(r3) | ||
| ; CHECK-NEXT: stxv v5, 0(r3) | ||
| ; CHECK-NEXT: stxv v2, 48(r7) | ||
| ; CHECK-NEXT: stxv v3, 32(r7) | ||
| ; CHECK-NEXT: stxv v4, 16(r7) | ||
| ; CHECK-NEXT: stxv v5, 0(r7) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: ld_st_xxmfacc: | ||
|
|
@@ -397,7 +397,7 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-O0-NEXT: lxv vs0, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor v2, vs0, vs0 | ||
| ; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs3, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs3, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor vs2, v5, v5 | ||
|
|
@@ -496,10 +496,10 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0 | ||
| ; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r7) | ||
| ; CHECK-NEXT: stxv v5, 32(r7) | ||
| ; CHECK-NEXT: stxv v2, 16(r7) | ||
| ; CHECK-NEXT: stxv v3, 0(r7) | ||
| ; CHECK-NEXT: stxv v2, 48(r7) | ||
| ; CHECK-NEXT: stxv v3, 32(r7) | ||
| ; CHECK-NEXT: stxv v4, 16(r7) | ||
| ; CHECK-NEXT: stxv v5, 0(r7) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: ld_op_st_xxmfacc: | ||
|
|
@@ -531,7 +531,7 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { | |
| ; CHECK-O0-NEXT: xxlor v4, vs0, vs0 | ||
| ; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0 | ||
| ; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r7) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
@@ -621,10 +621,10 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2) | |
| ; CHECK-NEXT: xvf64gerpp wacc0, vsp34, v5 | ||
| ; CHECK-NEXT: xvf64gerpp wacc0, vsp36, v4 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r3) | ||
| ; CHECK-NEXT: stxv v5, 32(r3) | ||
| ; CHECK-NEXT: stxv v2, 16(r3) | ||
| ; CHECK-NEXT: stxv v3, 0(r3) | ||
| ; CHECK-NEXT: stxv v2, 48(r3) | ||
| ; CHECK-NEXT: stxv v3, 32(r3) | ||
| ; CHECK-NEXT: stxv v4, 16(r3) | ||
| ; CHECK-NEXT: stxv v5, 0(r3) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: cmplx_xxmacc: | ||
|
|
@@ -673,7 +673,7 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2) | |
| ; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp32, vs0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp34, vs0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
@@ -783,10 +783,10 @@ define void @int_xxsetaccz(ptr %ptr) { | |
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: xxsetaccz wacc0 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r3) | ||
| ; CHECK-NEXT: stxv v5, 32(r3) | ||
| ; CHECK-NEXT: stxv v2, 16(r3) | ||
| ; CHECK-NEXT: stxv v3, 0(r3) | ||
| ; CHECK-NEXT: stxv v2, 48(r3) | ||
| ; CHECK-NEXT: stxv v3, 32(r3) | ||
| ; CHECK-NEXT: stxv v4, 16(r3) | ||
| ; CHECK-NEXT: stxv v5, 0(r3) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: int_xxsetaccz: | ||
|
|
@@ -802,7 +802,7 @@ define void @int_xxsetaccz(ptr %ptr) { | |
| ; CHECK-O0-LABEL: int_xxsetaccz: | ||
| ; CHECK-O0: # %bb.0: # %entry | ||
| ; CHECK-O0-NEXT: xxsetaccz wacc0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
@@ -946,14 +946,14 @@ define void @testcse(ptr %res, <16 x i8> %vc) { | |
| ; CHECK-NEXT: xxsetaccz wacc0 | ||
| ; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r3) | ||
| ; CHECK-NEXT: stxv v5, 32(r3) | ||
| ; CHECK-NEXT: stxv v2, 16(r3) | ||
| ; CHECK-NEXT: stxv v3, 0(r3) | ||
| ; CHECK-NEXT: stxv v4, 112(r3) | ||
| ; CHECK-NEXT: stxv v5, 96(r3) | ||
| ; CHECK-NEXT: stxv v2, 80(r3) | ||
| ; CHECK-NEXT: stxv v3, 64(r3) | ||
| ; CHECK-NEXT: stxv v2, 48(r3) | ||
| ; CHECK-NEXT: stxv v3, 32(r3) | ||
| ; CHECK-NEXT: stxv v4, 16(r3) | ||
| ; CHECK-NEXT: stxv v5, 0(r3) | ||
| ; CHECK-NEXT: stxv v2, 112(r3) | ||
| ; CHECK-NEXT: stxv v3, 96(r3) | ||
| ; CHECK-NEXT: stxv v4, 80(r3) | ||
| ; CHECK-NEXT: stxv v5, 64(r3) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: testcse: | ||
|
|
@@ -975,7 +975,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) { | |
| ; CHECK-O0: # %bb.0: # %entry | ||
| ; CHECK-O0-NEXT: xxsetaccz wacc0 | ||
| ; CHECK-O0-NEXT: xvf32gerpp wacc0, v2, v2 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
| ; CHECK-O0-NEXT: xxlor vs3, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs3, 48(r3) | ||
| ; CHECK-O0-NEXT: xxlor vs2, v5, v5 | ||
|
|
@@ -1065,10 +1065,10 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p | |
| ; CHECK-NEXT: plxvp vsp36, 8(r4), 0 | ||
| ; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 | ||
| ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-NEXT: stxv v4, 48(r7) | ||
| ; CHECK-NEXT: stxv v5, 32(r7) | ||
| ; CHECK-NEXT: stxv v2, 16(r7) | ||
| ; CHECK-NEXT: stxv v3, 0(r7) | ||
| ; CHECK-NEXT: stxv v2, 48(r7) | ||
| ; CHECK-NEXT: stxv v3, 32(r7) | ||
| ; CHECK-NEXT: stxv v4, 16(r7) | ||
| ; CHECK-NEXT: stxv v5, 0(r7) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: test_ldst_1: | ||
|
|
@@ -1104,7 +1104,7 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p | |
| ; CHECK-O0-NEXT: plxvp vsp34, 8(r4), 0 | ||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: pmxvf64gernn wacc0, vsp34, vs0, 0, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 | ||
| ; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0 | ||
|
||
| ; CHECK-O0-NEXT: xxlor vs0, v4, v4 | ||
| ; CHECK-O0-NEXT: stxv vs0, 48(r7) | ||
| ; CHECK-O0-NEXT: xxlor vs0, v5, v5 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be within the
if (Subtarget.hasMMA())block on line 1357? I am basing this on the fact that we needhadMMA()for this support.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to PPC.cpp future CPU should include all of the features of Power11, and Power11 includes all the same features as Power10 where mma feature is set to true for Power10, so I think isISAFuture implies hasMMA.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All the checks of isISAFuture should I think be interpreted as placeholders. At some point a concrete target processor needs to be added and at that point all the checks for future should be updated to check specific features rather than just future - "future + 1" would also have those features but a check for future would fail.
Also for that reason I think the single condition is more correct, since we should be asking about one feature and not a cpu name in addition.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes Power11 implies hasMMA. I was more thinking of issues where users manually turn on/off features on a specific CPU. This patch uses
lxvp|stvp, so if user explicitly turn offmmaorpaired-vector-memopswe shouldn't be generating these code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that you have the guards in the custom lowering functions though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the checks are inconsistent. I could make them consistent. The final switching code is TBD because the target features are TBD.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For paired support not being there, I think hitting the assert is better. It would be a case of manually forcing it off since ISA level supports it.