Skip to content

Commit f3470bf

Browse files
committed
[AIE2P] Spill to register instead of stack
VEC/ACC/FIFO registers can be copied to each other and which can be used as an alternative to costly stack spill.
1 parent 5b35a87 commit f3470bf

File tree

4 files changed

+104
-5
lines changed

4 files changed

+104
-5
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,6 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
554554
MCRegister SrcReg, bool KillSrc) const {
555555
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
556556
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
557-
558557
if (AIE2P::mMvSclSrcRegClass.contains(SrcReg) &&
559558
AIE2P::mMvSclDstRegClass.contains(DstReg)) {
560559
// Build MultiSlotPseudo in preference
@@ -753,8 +752,16 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
753752
getKillRegState(KillSrc));
754753
} else if ((AIE2P::ePSRFLdFRegClass.contains(SrcReg)) &&
755754
(AIE2P::ePSRFLdFRegClass.contains(DstReg))) {
756-
copyThroughSubRegs(MBB, MBBI, DL, DstReg, SrcReg, KillSrc);
755+
// copyThroughSubRegs(MBB, MBBI, DL, DstReg, SrcReg, KillSrc);
756+
copyPhysReg(MBB, MBBI, DL, TRI.getSubReg(DstReg, AIE2P::sub_ptr),
757+
TRI.getSubReg(SrcReg, AIE2P::sub_ptr), KillSrc);
758+
copyPhysReg(MBB, MBBI, DL, TRI.getSubReg(DstReg, AIE2P::sub_fifo),
759+
TRI.getSubReg(SrcReg, AIE2P::sub_fifo), KillSrc);
760+
copyPhysReg(MBB, MBBI, DL, TRI.getSubReg(DstReg, AIE2P::sub_avail),
761+
TRI.getSubReg(SrcReg, AIE2P::sub_avail), KillSrc);
757762
} else {
763+
MBBI->dump();
764+
LLVM_DEBUG(MBBI->dump());
758765
llvm_unreachable("unhandled case in copyPhysReg");
759766
}
760767
}
@@ -906,6 +913,14 @@ void AIE2PInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
906913
Opcode = AIE2P::VST_E_SPILL;
907914
} else if (regClassMatches(AIE2P::VEC576RegClass, RC, SrcReg)) {
908915
Opcode = AIE2P::VST_EX_SPILL;
916+
} else if (&AIE2P::spill_acc1024_to_compositeRegClass == RC) {
917+
Opcode = AIE2P::VST_CM_SPILL;
918+
} else if (&AIE2P::spill_acc512_to_compositeRegClass == RC) {
919+
Opcode = AIE2P::VST_dmx_sts_bm_spill;
920+
} else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) {
921+
Opcode = AIE2P::VST_Y_SPILL;
922+
} else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) {
923+
Opcode = AIE2P::VST_dmx_sts_x_spill;
909924
} else if (regClassMatches(AIE2P::eSRegClass, RC, SrcReg) ||
910925
regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, SrcReg)) {
911926
// Can't spill these directly. Need to bounce through a GPR.
@@ -962,6 +977,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
962977
} else if (regClassMatches(AIE2P::VEC256RegClass, RC, DstReg)) {
963978
Opcode = AIE2P::VLDA_dmw_lda_w_spill;
964979
} else if (regClassMatches(AIE2P::mBMsRegClass, RC, DstReg)) {
980+
I->dump();
965981
Opcode = AIE2P::VLDA_dmx_lda_bm_spill;
966982
} else if (regClassMatches(AIE2P::mFifoHLRegRegClass, RC, DstReg)) {
967983
Opcode = AIE2P::VLDA_dmx_lda_fifohl_spill;
@@ -970,6 +986,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
970986
} else if (regClassMatches(AIE2P::ACC2048RegClass, RC, DstReg)) {
971987
Opcode = AIE2P::VLDA_DM_SPILL;
972988
} else if (regClassMatches(AIE2P::ACC1024RegClass, RC, DstReg)) {
989+
I->dump();
973990
Opcode = AIE2P::VLDA_CM_SPILL;
974991
} else if (regClassMatches(AIE2P::FIFO1024RegClass, RC, DstReg)) {
975992
Opcode = AIE2P::VLDA_FIFO_SPILL;
@@ -987,6 +1004,15 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
9871004
Opcode = AIE2P::VLDA_E_SPILL;
9881005
} else if (regClassMatches(AIE2P::VEC576RegClass, RC, DstReg)) {
9891006
Opcode = AIE2P::VLDA_EX_SPILL;
1007+
} else if (&AIE2P::spill_acc1024_to_compositeRegClass == RC) {
1008+
Opcode = AIE2P::VLDA_CM_SPILL;
1009+
} else if (&AIE2P::spill_acc512_to_compositeRegClass == RC) {
1010+
// I->dump();
1011+
Opcode = AIE2P::VLDA_dmx_lda_bm_spill;
1012+
} else if (&AIE2P::spill_vec1024_to_compositeRegClass == RC) {
1013+
Opcode = AIE2P::VLDA_Y_SPILL;
1014+
} else if (&AIE2P::spill_vec512_to_compositeRegClass == RC) {
1015+
Opcode = AIE2P::VLDA_dmx_lda_x_spill;
9901016
} else if (regClassMatches(AIE2P::eSRegClass, RC, DstReg) ||
9911017
regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, DstReg)) {
9921018
// Can't spill these directly. Need to bounce through a GPR.
@@ -999,6 +1025,7 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
9991025
.addReg(Reg, getKillRegState(true));
10001026
return;
10011027
} else {
1028+
I->dump();
10021029
llvm_unreachable(
10031030
"Can't load this register from stack slot: is it virtual?");
10041031
}

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ cl::opt<bool> EnableCoalescingForWideCopy(
4343

4444
extern llvm::cl::opt<unsigned> ReservedGPRs;
4545

46+
static llvm::cl::opt<bool>
47+
SpillAccToVecOrAcc("aie2p-spill-accumulator-to-vec-or-acc", cl::Hidden,
48+
cl::init(true),
49+
cl::desc("Allow spilling accumulator registers to "
50+
"vector or accumulator registers"));
51+
4652
AIE2PRegisterInfo::AIE2PRegisterInfo(unsigned HwMode)
4753
: AIE2PGenRegisterInfo(AIE2P::sp, /*DwarfFlavour*/ 0, /*EHFlavor*/ 0,
4854
/*PC*/ 0, HwMode) {}
@@ -482,9 +488,34 @@ AIE2PRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
482488

483489
if (AIE2P::eSRegClass.hasSubClassEq(RC))
484490
return &AIE2P::spill_eS_to_eRRegClass;
491+
if (SpillAccToVecOrAcc && RC == &AIE2P::ACC1024RegClass)
492+
return &AIE2P::spill_acc1024_to_compositeRegClass;
493+
if (SpillAccToVecOrAcc && RC == &AIE2P::ACC512RegClass)
494+
return &AIE2P::spill_acc512_to_compositeRegClass;
495+
if (SpillAccToVecOrAcc && RC == &AIE2P::VEC1024RegClass)
496+
return &AIE2P::spill_vec1024_to_compositeRegClass;
497+
if (SpillAccToVecOrAcc && RC == &AIE2P::VEC512RegClass)
498+
return &AIE2P::spill_vec512_to_compositeRegClass;
485499
return RC;
486500
}
487501

502+
const TargetRegisterClass *
503+
AIE2PRegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
504+
unsigned Idx) const {
505+
if ((RC == &AIE2P::spill_vec512_to_compositeRegClass ||
506+
RC == &AIE2P::spill_acc512_to_compositeRegClass) &&
507+
(Idx == AIE2P::sub_256_lo || Idx == AIE2P::sub_256_hi)) {
508+
return &AIE2P::VEC512RegClass;
509+
}
510+
if ((RC == &AIE2P::spill_vec1024_to_compositeRegClass ||
511+
RC == &AIE2P::spill_acc1024_to_compositeRegClass) &&
512+
(Idx == AIE2P::sub_512_hi_256_lo || Idx == AIE2P::sub_512_hi_256_hi)) {
513+
return &AIE2P::VEC1024RegClass;
514+
}
515+
// Forward to TableGen's default version.
516+
return AIE2PGenRegisterInfo::getSubClassWithSubReg(RC, Idx);
517+
}
518+
488519
const std::set<int> &AIE2PRegisterInfo::getSubRegSplit(int RegClassId) const {
489520
static const std::set<int> NoSplit = {AIE2P::NoSubRegister};
490521
static const std::set<int> Mod2DSplit = {AIE2P::sub_mod, AIE2P::sub_dim_size,
@@ -618,6 +649,42 @@ bool AIE2PRegisterInfo::shouldCoalesce(
618649

619650
const unsigned SrcSize = getRegSizeInBits(*SrcRC);
620651
const unsigned DstSize = getRegSizeInBits(*DstRC);
652+
653+
// if (SrcSize == 256 && (AIE2P::ACC2048RegClass.hasSubClassEq(DstRC) ||
654+
// AIE2P::ACC1024RegClass.hasSubClassEq(DstRC) ||
655+
// AIE2P::ACC512RegClass.hasSubClassEq(DstRC) ||
656+
// &AIE2P::spill_vec512_to_compositeRegClass == DstRC))
657+
// {
658+
// return false;
659+
// }
660+
// if (DstSize == 256 && (AIE2P::ACC2048RegClass.hasSubClassEq(SrcRC) ||
661+
// AIE2P::ACC1024RegClass.hasSubClassEq(SrcRC) ||
662+
// AIE2P::ACC512RegClass.hasSubClassEq(SrcRC) ||
663+
// &AIE2P::spill_vec512_to_compositeRegClass == SrcRC))
664+
// {
665+
// return false;
666+
// }
667+
// if ((&AIE2P::spill_vec512_to_compositeRegClass == DstRC &&
668+
// (AIE2P::ACC2048RegClass.hasSubClassEq(SrcRC) ||
669+
// AIE2P::ACC1024RegClass.hasSubClassEq(SrcRC) ||
670+
// AIE2P::ACC512RegClass.hasSubClassEq(SrcRC)))) {
671+
// return false;
672+
// }
673+
// if ((&AIE2P::spill_vec512_to_compositeRegClass == SrcRC &&
674+
// (AIE2P::ACC2048RegClass.hasSubClassEq(DstRC) ||
675+
// AIE2P::ACC1024RegClass.hasSubClassEq(DstRC) ||
676+
// AIE2P::ACC512RegClass.hasSubClassEq(DstRC)))) {
677+
// return false;
678+
// }
679+
// if (((&AIE2P::spill_vec512_to_compositeRegClass == SrcRC ||
680+
// &AIE2P::spill_vec1024_to_compositeRegClass == SrcRC ||
681+
// &AIE2P::spill_vec512_to_compositeRegClass == DstRC ||
682+
// &AIE2P::spill_vec1024_to_compositeRegClass == DstRC) &&
683+
// (AIE2P::ACC2048RegClass.hasSubClassEq(NewRC) ||
684+
// AIE2P::ACC1024RegClass.hasSubClassEq(NewRC) ||
685+
// AIE2P::ACC512RegClass.hasSubClassEq(NewRC)))) {
686+
// return false;
687+
// }
621688
MachineFunction *MF = MI->getMF();
622689
const AIEBaseInstrInfo *TII =
623690
static_cast<const AIEBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ struct AIE2PRegisterInfo : public AIE2PGenRegisterInfo {
7777
getLargestLegalSuperClass(const TargetRegisterClass *RC,
7878
const MachineFunction &MF) const override;
7979
const TargetRegisterClass *
80+
getSubClassWithSubReg(const TargetRegisterClass *RC,
81+
unsigned Idx) const override;
82+
const TargetRegisterClass *
8083
getGPRRegClass(const MachineFunction &MF) const override;
8184

8285
unsigned getVectorRegBankID() const override;

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ def sub_256_hi : SubRegIndex<256, 256>; // sub_512_lo_256_hi
3030
def sub_512_lo : SubRegIndex<512, 0>;
3131
def sub_512_hi : SubRegIndex<512, 512>;
3232

33-
def sub_512_acc_lo : SubRegIndex<512, 0>;
34-
def sub_512_acc_hi : SubRegIndex<512, 512>;
35-
3633
def sub_1024_lo : SubRegIndex<1024, 0>;
3734
def sub_1024_hi : SubRegIndex<1024, 1024>;
3835

@@ -993,4 +990,9 @@ def spill_eDN_to_eR : AIE2PScalarRegisterClass<(add eDN, eR)>;
993990
def spill_eDJ_to_eR : AIE2PScalarRegisterClass<(add eDJ, eR, eDN)>;
994991
def spill_eDC_to_eR : AIE2PScalarRegisterClass<(add eDC, eR)>;
995992

993+
def spill_vec512_to_composite : AIE2PVector512RegisterClass<(add mXm, mBMm)>;
994+
def spill_vec1024_to_composite : AIE2PVector1024RegisterClass<(add eY, mCMm)>;
995+
def spill_acc512_to_composite : AIE2PVector512RegisterClass<(add mBMm, mXm)>;
996+
def spill_acc1024_to_composite : AIE2PVector1024RegisterClass<(add mCMm, eY)>;
997+
996998
} // End AIE2P Namespace

0 commit comments

Comments
 (0)