Skip to content

Commit f4ca19e

Browse files
committed
separate 1024 code
1 parent b3b8416 commit f4ca19e

File tree

5 files changed

+213
-161
lines changed

5 files changed

+213
-161
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 138 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -11763,6 +11763,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1176311763
return Op;
1176411764
}
1176511765

11766+
SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11767+
SelectionDAG &DAG) const {
11768+
SDLoc dl(Op);
11769+
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11770+
SDValue LoadChain = LN->getChain();
11771+
SDValue BasePtr = LN->getBasePtr();
11772+
EVT VT = Op.getValueType();
11773+
11774+
// Type v1024i1 is used for Dense Math dmr registers.
11775+
assert(VT == MVT::v1024i1 && "Unsupported type.");
11776+
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11777+
"Dense Math support required.");
11778+
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11779+
11780+
SmallVector<SDValue, 4> Loads;
11781+
SmallVector<SDValue, 4> LoadChains;
11782+
SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11783+
SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11784+
MachineMemOperand *MMO = LN->getMemOperand();
11785+
unsigned NumVecs = VT.getSizeInBits() / 256;
11786+
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11787+
MachineMemOperand *NewMMO =
11788+
DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11789+
if (Idx > 0) {
11790+
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11791+
DAG.getConstant(32, dl, BasePtr.getValueType()));
11792+
LoadOps[2] = BasePtr;
11793+
}
11794+
SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11795+
DAG.getVTList(MVT::v256i1, MVT::Other),
11796+
LoadOps, MVT::v256i1, NewMMO);
11797+
LoadChains.push_back(Ld.getValue(1));
11798+
Loads.push_back(Ld);
11799+
}
11800+
11801+
if (Subtarget.isLittleEndian()) {
11802+
std::reverse(Loads.begin(), Loads.end());
11803+
std::reverse(LoadChains.begin(), LoadChains.end());
11804+
}
11805+
11806+
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11807+
SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0],
11808+
Loads[1]),
11809+
0);
11810+
SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11811+
SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
11812+
Loads[2], Loads[3]),
11813+
0);
11814+
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11815+
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11816+
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11817+
SDValue Value =
11818+
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11819+
11820+
SDValue RetOps[] = {Value, TF};
11821+
return DAG.getMergeValues(RetOps, dl);
11822+
}
11823+
1176611824
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1176711825
SelectionDAG &DAG) const {
1176811826
SDLoc dl(Op);
@@ -11771,12 +11829,11 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1177111829
SDValue BasePtr = LN->getBasePtr();
1177211830
EVT VT = Op.getValueType();
1177311831

11774-
if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT != MVT::v1024i1)
11775-
return Op;
11832+
if (VT == MVT::v1024i1)
11833+
return LowerDMFVectorLoad(Op, DAG);
1177611834

11777-
// Used for dense math registers.
11778-
assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
11779-
"Type unsupported for this processor");
11835+
if (VT != MVT::v256i1 && VT != MVT::v512i1)
11836+
return Op;
1178011837

1178111838
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
1178211839
// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
@@ -11805,57 +11862,91 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1180511862
std::reverse(LoadChains.begin(), LoadChains.end());
1180611863
}
1180711864
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11808-
SDValue Value;
11809-
if (VT == MVT::v1024i1) {
11810-
SmallVector<SDValue, 4> Pairs;
11811-
SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
11812-
SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
11813-
SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
11814-
NumVecs >>= 1;
11815-
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11816-
const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
11817-
Loads[Idx * 2 + 1], Vsx1Idx};
11818-
Pairs.push_back(SDValue(
11819-
DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
11820-
}
11821-
SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
11822-
Pairs[0], Pairs[1]),
11823-
0);
11824-
SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11825-
SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
11826-
Pairs[2], Pairs[3]),
11827-
0);
11828-
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11829-
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11830-
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11831-
Value = SDValue(
11832-
DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11833-
} else {
11834-
Value =
11835-
DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11836-
dl, VT, Loads);
11837-
}
11865+
SDValue Value =
11866+
DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11867+
dl, VT, Loads);
1183811868
SDValue RetOps[] = {Value, TF};
1183911869
return DAG.getMergeValues(RetOps, dl);
1184011870
}
1184111871

11872+
SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
11873+
SelectionDAG &DAG) const {
11874+
11875+
SDLoc dl(Op);
11876+
StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11877+
SDValue StoreChain = SN->getChain();
11878+
SDValue BasePtr = SN->getBasePtr();
11879+
SmallVector<SDValue, 4> Values;
11880+
SmallVector<SDValue, 4> Stores;
11881+
EVT VT = SN->getValue().getValueType();
11882+
11883+
// Type v1024i1 is used for Dense Math dmr registers.
11884+
assert(VT == MVT::v1024i1 && "Unsupported type.");
11885+
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11886+
"Dense Math support required.");
11887+
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11888+
11889+
SDValue Lo(
11890+
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11891+
Op.getOperand(1),
11892+
DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
11893+
0);
11894+
SDValue Hi(
11895+
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11896+
Op.getOperand(1),
11897+
DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
11898+
0);
11899+
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11900+
MachineSDNode *ExtNode =
11901+
DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
11902+
Values.push_back(SDValue(ExtNode, 0));
11903+
Values.push_back(SDValue(ExtNode, 1));
11904+
ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
11905+
Values.push_back(SDValue(ExtNode, 0));
11906+
Values.push_back(SDValue(ExtNode, 1));
11907+
11908+
if (Subtarget.isLittleEndian())
11909+
std::reverse(Values.begin(), Values.end());
11910+
11911+
SDVTList Tys = DAG.getVTList(MVT::Other);
11912+
SmallVector<SDValue, 4> Ops{
11913+
StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
11914+
Values[0], BasePtr};
11915+
MachineMemOperand *MMO = SN->getMemOperand();
11916+
unsigned NumVecs = VT.getSizeInBits() / 256;
11917+
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11918+
MachineMemOperand *NewMMO =
11919+
DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11920+
if (Idx > 0) {
11921+
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11922+
DAG.getConstant(32, dl, BasePtr.getValueType()));
11923+
Ops[3] = BasePtr;
11924+
}
11925+
Ops[2] = Values[Idx];
11926+
SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
11927+
MVT::v256i1, NewMMO);
11928+
Stores.push_back(St);
11929+
}
11930+
11931+
SDValue TF = DAG.getTokenFactor(dl, Stores);
11932+
return TF;
11933+
}
11934+
1184211935
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
1184311936
SelectionDAG &DAG) const {
1184411937
SDLoc dl(Op);
1184511938
StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1184611939
SDValue StoreChain = SN->getChain();
1184711940
SDValue BasePtr = SN->getBasePtr();
1184811941
SDValue Value = SN->getValue();
11942+
SDValue Value2 = SN->getValue();
1184911943
EVT StoreVT = Value.getValueType();
11850-
SmallVector<SDValue, 4> ValueVec;
1185111944

11852-
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 &&
11853-
StoreVT != MVT::v1024i1)
11854-
return Op;
11945+
if (StoreVT == MVT::v1024i1)
11946+
return LowerDMFVectorStore(Op, DAG);
1185511947

11856-
// Used for dense math registers.
11857-
assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
11858-
"Type unsupported for this processor");
11948+
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11949+
return Op;
1185911950

1186011951
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
1186111952
// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
@@ -11873,43 +11964,20 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
1187311964
MachineSDNode *ExtNode = DAG.getMachineNode(
1187411965
PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
1187511966

11876-
ValueVec.push_back(SDValue(ExtNode, 0));
11877-
ValueVec.push_back(SDValue(ExtNode, 1));
11967+
Value = SDValue(ExtNode, 0);
11968+
Value2 = SDValue(ExtNode, 1);
1187811969
} else
1187911970
Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
1188011971
NumVecs = 4;
11881-
11882-
} else if (StoreVT == MVT::v1024i1) {
11883-
SDValue Lo(DAG.getMachineNode(
11884-
TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11885-
Op.getOperand(1),
11886-
DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
11887-
0);
11888-
SDValue Hi(DAG.getMachineNode(
11889-
TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11890-
Op.getOperand(1),
11891-
DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
11892-
0);
11893-
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11894-
MachineSDNode *ExtNode =
11895-
DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
11896-
ValueVec.push_back(SDValue(ExtNode, 0));
11897-
ValueVec.push_back(SDValue(ExtNode, 1));
11898-
ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
11899-
ValueVec.push_back(SDValue(ExtNode, 0));
11900-
ValueVec.push_back(SDValue(ExtNode, 1));
11901-
NumVecs = 8;
1190211972
}
1190311973
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
1190411974
unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
1190511975
SDValue Elt;
1190611976
if (Subtarget.isISAFuture()) {
1190711977
VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11908-
unsigned Pairx =
11909-
Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2;
11910-
Elt = DAG.getNode(
11911-
PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx],
11912-
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11978+
Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11979+
Idx > 1 ? Value2 : Value,
11980+
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
1191311981
} else
1191411982
Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
1191511983
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,8 @@ namespace llvm {
13441344

13451345
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
13461346
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
1347+
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
1348+
SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
13471349

13481350
SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
13491351
CallingConv::ID CallConv, bool isVarArg,

llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
4646
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
4747
; CHECK-NEXT: xvf16ger2pp wacc0, v28, v30
4848
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
49-
; CHECK-NEXT: stxv v2, 48(r30)
50-
; CHECK-NEXT: stxv v3, 32(r30)
51-
; CHECK-NEXT: stxv v4, 16(r30)
52-
; CHECK-NEXT: stxv v5, 0(r30)
49+
; CHECK-NEXT: stxv v4, 48(r30)
50+
; CHECK-NEXT: stxv v5, 32(r30)
51+
; CHECK-NEXT: stxv v2, 16(r30)
52+
; CHECK-NEXT: stxv v3, 0(r30)
5353
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
5454
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
5555
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload

0 commit comments

Comments
 (0)