Skip to content

Commit c1748ab

Browse files
committed
Piece 1
1 parent cfc6147 commit c1748ab

File tree

5 files changed

+233
-75
lines changed

5 files changed

+233
-75
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
533533

534534
// Remove dead registers or mask bits.
535535
SmallSet<Register, 8> SeenRegs;
536+
//errs() << "Advance before next: "; CurrMI->dump();
536537
for (auto &MO : CurrMI->operands()) {
537538
if (!MO.isReg() || !MO.getReg().isVirtual())
538539
continue;
@@ -549,8 +550,10 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
549550
if (!S.liveAt(SI)) {
550551
if (It == LiveRegs.end()) {
551552
It = LiveRegs.find(MO.getReg());
552-
if (It == LiveRegs.end())
553+
if (It == LiveRegs.end()) {
554+
errs() << "BadReg: " << printReg(MO.getReg()) << "\n";
553555
llvm_unreachable("register isn't live");
556+
}
554557
}
555558
auto PrevMask = It->second;
556559
It->second &= ~S.LaneMask;
@@ -561,8 +564,10 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
561564
LiveRegs.erase(It);
562565
} else if (!LI.liveAt(SI)) {
563566
auto It = LiveRegs.find(MO.getReg());
564-
if (It == LiveRegs.end())
567+
if (It == LiveRegs.end()) {
568+
errs() << "BadReg: " << printReg(MO.getReg()) << "\n";
565569
llvm_unreachable("register isn't live");
570+
}
566571
CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
567572
LiveRegs.erase(It);
568573
}

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,6 @@ void GCNScheduleDAGMILive::runSchedStages() {
988988
->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
989989
Stage->getRegionIdx()));
990990
}
991-
992991
ScheduleDAGMILive::schedule();
993992
Stage->finalizeGCNRegion();
994993
}
@@ -1818,7 +1817,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18181817
auto NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
18191818
unsigned I = OptIt->getFirst();
18201819
unsigned &Excess = OptIt->getSecond();
1821-
if (NumRegs >= Excess)
1820+
if (NumRegs >= Excess)
18221821
OptRegions.erase(I);
18231822
else
18241823
Excess -= NumRegs;
@@ -1854,41 +1853,32 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18541853
if (!UseMI || DefMI.getParent() == UseMI->getParent())
18551854
continue;
18561855

1857-
// Do not rematerialize an instruction if it uses or is used by an
1858-
// instruction that we have designated for rematerialization.
1859-
// FIXME: Allow for rematerialization chains: this requires 1. updating
1860-
// remat points to account for uses that are rematerialized, and 2. either
1861-
// rematerializing the candidates in careful ordering, or deferring the
1862-
// MBB RP walk until the entire chain has been rematerialized.
1863-
if (Rematerializations.contains(UseMI) ||
1864-
llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {
1865-
return MO.isReg() && RematRegs.contains(MO.getReg());
1866-
}))
1867-
continue;
1868-
18691856
// Do not rematerialize an instruction it it uses registers that aren't
18701857
// available at its use. This ensures that we are not extending any live
18711858
// range while rematerializing.
18721859
SlotIndex DefIdx = DAG.LIS->getInstructionIndex(DefMI);
18731860
SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);
1861+
18741862
if (!allUsesAvailableAt(&DefMI, DefIdx, UseIdx))
18751863
continue;
18761864

18771865
REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);
1878-
RematInstruction &Remat =
1879-
Rematerializations.try_emplace(&DefMI, I, UseMI).first->second;
1866+
RematInstruction &Temp = *Remats.insert({&DefMI, I, UseMI});
18801867

18811868
bool RematUseful = false;
18821869
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
18831870
// Optimistically consider that moving the instruction out of its
18841871
// defining region will reduce RP in the latter; this assumes that
18851872
// maximum RP in the region is reached somewhere between the defining
18861873
// instruction and the end of the region.
1874+
// Since we only remat instructions with one use, we can assume that we
1875+
// adding a new remat instead of merely updating the remat position.
18871876
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
18881877
RematUseful = true;
18891878
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
1890-
if (ReduceRPInRegion(It, Mask))
1891-
return true;
1879+
if (ReduceRPInRegion(It, Mask)) {
1880+
return Remats.resolveInsertPos(&DAG.MRI, DAG.LIS);
1881+
}
18921882
}
18931883

18941884
for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {
@@ -1897,7 +1887,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18971887
auto It = DAG.LiveIns[LIRegion].find(Reg);
18981888
if (It == DAG.LiveIns[LIRegion].end() || It->second.none())
18991889
continue;
1900-
Remat.LiveInRegions.insert(LIRegion);
1890+
Temp.LiveInRegions.insert(LIRegion);
19011891

19021892
// Account for the reduction in RP due to the rematerialization in an
19031893
// optimizable region in which the defined register is a live-in. This
@@ -1909,14 +1899,13 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19091899
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
19101900
RematUseful = true;
19111901
if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg]))
1912-
return true;
1902+
return Remats.resolveInsertPos(&DAG.MRI, DAG.LIS);
19131903
}
19141904
}
19151905

19161906
// If the instruction is not a live-in or live-out in any optimizable
19171907
// region then there is no point in rematerializing it.
19181908
if (!RematUseful) {
1919-
Rematerializations.pop_back();
19201909
REMAT_DEBUG(dbgs() << " No impact, not rematerializing instruction\n");
19211910
} else {
19221911
RematRegs.insert(Reg);
@@ -1927,11 +1916,11 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19271916
if (IncreaseOccupancy) {
19281917
// We were trying to increase occupancy but failed, abort the stage.
19291918
REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");
1930-
Rematerializations.clear();
1919+
Remats.clear();
19311920
return false;
19321921
}
19331922
REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");
1934-
return !Rematerializations.empty();
1923+
return !Remats.empty() && Remats.resolveInsertPos(&DAG.MRI, DAG.LIS);
19351924
}
19361925

19371926
void PreRARematStage::rematerialize() {
@@ -1943,19 +1932,23 @@ void PreRARematStage::rematerialize() {
19431932
DenseSet<unsigned> RecomputeRP;
19441933
SlotIndexes *Slots = DAG.LIS->getSlotIndexes();
19451934

1935+
// Remat the dependencies last
1936+
Remats.sort(&DAG.MRI);
1937+
19461938
// Rematerialize all instructions.
1947-
for (auto &[DefMI, Remat] : Rematerializations) {
1948-
MachineBasicBlock::iterator InsertPos(Remat.UseMI);
1939+
for (auto &Remat : Remats) {
1940+
MachineInstr *DefMI = Remat.DefMI;
1941+
MachineBasicBlock::iterator InsertPos = Remat.InsertPos;
19491942
Register Reg = DefMI->getOperand(0).getReg();
19501943
unsigned SubReg = DefMI->getOperand(0).getSubReg();
1951-
19521944
// Rematerialize DefMI to its use block.
19531945
TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, SubReg, *DefMI,
19541946
*DAG.TRI);
19551947
Remat.RematMI = &*std::prev(InsertPos);
19561948
Remat.RematMI->getOperand(0).setSubReg(SubReg);
19571949
DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
19581950

1951+
19591952
// Update region boundaries in regions we sinked from (remove defining MI)
19601953
// and to (insert MI rematerialized in use block). Only then we can erase
19611954
// the original MI.
@@ -2054,6 +2047,20 @@ void PreRARematStage::rematerialize() {
20542047
UpdateLiveRange(SubRange);
20552048
}
20562049
}
2050+
for (auto &Remat : reverse(Remats)) {
2051+
if (Remat.HasDependency) {
2052+
for (auto &ROp : Remat.RematMI->operands()) {
2053+
if (!ROp.isReg() || !ROp.getReg() || !ROp.readsReg())
2054+
continue;
2055+
auto UseReg = ROp.getReg();
2056+
if (!UseReg.isVirtual())
2057+
continue;
2058+
2059+
DAG.LIS->removeInterval(UseReg);
2060+
DAG.LIS->createAndComputeVirtRegInterval(UseReg);
2061+
}
2062+
}
2063+
}
20572064

20582065
// All regions impacted by at least one rematerialization must be rescheduled.
20592066
// Maximum pressure must also be recomputed for all regions where it changed
@@ -2066,6 +2073,7 @@ void PreRARematStage::rematerialize() {
20662073
continue;
20672074

20682075
GCNRegPressure RP;
2076+
20692077
if (IsEmptyRegion) {
20702078
RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
20712079
} else {
@@ -2140,10 +2148,11 @@ void PreRARematStage::finalizeGCNSchedStage() {
21402148
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
21412149

21422150
// Rollback the rematerializations.
2143-
for (const auto &[_, Remat] : Rematerializations) {
2151+
for (auto &Remat : Remats) {
21442152
MachineInstr &RematMI = *Remat.RematMI;
2145-
MachineBasicBlock::iterator InsertPos(DAG.Regions[Remat.DefRegion].second);
21462153
MachineBasicBlock *MBB = getRegionMBB(MF, DAG.Regions[Remat.DefRegion]);
2154+
MachineBasicBlock::iterator InsertPos(MBB->end());
2155+
21472156
Register Reg = RematMI.getOperand(0).getReg();
21482157
unsigned SubReg = RematMI.getOperand(0).getSubReg();
21492158

@@ -2156,6 +2165,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
21562165
DAG.LIS->InsertMachineInstrInMaps(*NewMI);
21572166

21582167
// Erase rematerialized MI.
2168+
DAG.updateRegionBoundaries(DAG.Regions, RematMI, nullptr);
21592169
RematMI.eraseFromParent();
21602170
DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
21612171

@@ -2166,6 +2176,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
21662176
// Re-add the register as a live-in in all regions it used to be one in.
21672177
for (unsigned LIRegion : Remat.LiveInRegions)
21682178
DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});
2179+
21692180
}
21702181

21712182
// Reset RP in all impacted regions.

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 146 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,9 @@ class PreRARematStage : public GCNSchedStage {
451451
private:
452452
/// Useful information about a rematerializable instruction.
453453
struct RematInstruction {
454+
/// Single use of the rematerializable instruction's defined register,
455+
/// located in a different block.
456+
MachineInstr *DefMI;
454457
/// Single use of the rematerializable instruction's defined register,
455458
/// located in a different block.
456459
MachineInstr *UseMI;
@@ -463,12 +466,151 @@ class PreRARematStage : public GCNSchedStage {
463466
/// Region containing the rematerializable instruction.
464467
unsigned DefRegion;
465468

466-
RematInstruction(unsigned DefRegion, MachineInstr *UseMI)
467-
: UseMI(UseMI), DefRegion(DefRegion) {}
469+
/// The position at which to insert the remat.
470+
MachineBasicBlock::iterator InsertPos;
471+
472+
/// The position at which we should rollback
473+
MachineBasicBlock::iterator RollbackPos;
474+
475+
bool HasDependency = false;
476+
477+
RematInstruction(MachineInstr *DefMI, unsigned DefRegion, MachineInstr *UseMI)
478+
: DefMI(DefMI), UseMI(UseMI), DefRegion(DefRegion), InsertPos(UseMI) {}
479+
480+
RematInstruction(MachineInstr *DefMI, unsigned DefRegion, MachineInstr *UseMI, MachineBasicBlock::iterator InsertPos)
481+
: DefMI(DefMI), UseMI(UseMI), DefRegion(DefRegion), InsertPos(InsertPos) {}
482+
};
483+
484+
class RematInstructions {
485+
private:
486+
SmallVector<RematInstruction, 8> Rematerializations;
487+
488+
public:
489+
using iterator = typename SmallVectorImpl<RematInstruction>::iterator;
490+
using const_iterator = typename SmallVectorImpl<RematInstruction>::const_iterator;
491+
using reverse_iterator = typename SmallVectorImpl<RematInstruction>::reverse_iterator;
492+
using const_reverse_iterator = typename SmallVectorImpl<RematInstruction>::const_reverse_iterator;
493+
494+
iterator begin() { return Rematerializations.begin(); }
495+
const_iterator begin() const { return Rematerializations.begin(); }
496+
iterator end() { return Rematerializations.end(); }
497+
const_iterator end() const { return Rematerializations.end(); }
498+
499+
reverse_iterator rbegin() { return Rematerializations.rbegin(); }
500+
const_reverse_iterator rbegin() const { return Rematerializations.rbegin(); }
501+
reverse_iterator rend() { return Rematerializations.rend(); }
502+
const_reverse_iterator rend() const { return Rematerializations.rend(); }
503+
504+
unsigned size() {return Rematerializations.size();}
505+
506+
RematInstruction *insert(const RematInstruction &R) {
507+
Rematerializations.push_back(R);
508+
return &Rematerializations[size() - 1];
509+
}
510+
511+
RematInstruction *insert(MachineInstr *DefMI, unsigned DefRegion, MachineInstr *UseMI) {
512+
Rematerializations.push_back({DefMI, DefRegion, UseMI});
513+
return &Rematerializations[size() - 1];
514+
}
515+
516+
bool erase(const RematInstruction &R) {
517+
auto Match = find_if(Rematerializations, [&R](const RematInstruction &Other){
518+
return R.DefMI == Other.DefMI && R.UseMI == Other.UseMI;
519+
});
520+
if (Match == Rematerializations.end())
521+
return false;
522+
return !Rematerializations.erase(Match);
523+
}
524+
525+
void clear() {
526+
Rematerializations.clear();
527+
}
528+
529+
bool empty() {
530+
return Rematerializations.empty();
531+
}
532+
533+
// We may be rematiarlizing an instruction used by another instruciton we are rematerializing. Be
534+
// sure that we insert the user remats after -- the user remats and def remats will have the same InsertPt,
535+
// by inserting the users last, they will occur after the defs. Thus, we must sort the remats so
536+
// the users occur after the defs.
537+
void sort(const MachineRegisterInfo *MRI) {
538+
std::sort(Rematerializations.begin(), Rematerializations.end(), [MRI](RematInstruction &A, RematInstruction &B) {
539+
if (A.HasDependency && B.HasDependency) {
540+
for (auto BOp : B.DefMI->operands()) {
541+
if (!BOp.isReg() || !BOp.getReg() || !BOp.readsReg())
542+
continue;
543+
auto UseReg = BOp.getReg();
544+
if (!UseReg.isVirtual())
545+
continue;
546+
MachineInstr *DefInst = &*MRI->def_instr_begin(UseReg);
547+
if (DefInst == A.DefMI)
548+
return true;
549+
}
550+
return false;
551+
}
552+
553+
return !A.HasDependency;
554+
});
555+
}
556+
557+
bool resolveInsertPos(const MachineRegisterInfo *MRI,
558+
const LiveIntervals *LIS) {
559+
// We may have added remat candidates which are used by other remat
560+
// candidates -- be sure that we have correct insert points for this
561+
bool FixedPoint = false;
562+
unsigned IterCount = 0;
563+
while (!FixedPoint && IterCount < 5) {
564+
++IterCount;
565+
FixedPoint = true;
566+
for (auto &Remat : Rematerializations) {
567+
MachineInstr *RematInst = Remat.DefMI;
568+
569+
for (auto MO : RematInst->operands()) {
570+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
571+
continue;
572+
auto UseReg = MO.getReg();
573+
if (!UseReg.isVirtual())
574+
continue;
575+
for (MachineInstr &DefInst : MRI->def_instructions(UseReg)) {
576+
577+
578+
auto Match = find_if(Rematerializations, [&DefInst](const RematInstruction &R) {
579+
return R.DefMI == &DefInst;
580+
});
581+
582+
if (Match == Rematerializations.end())
583+
continue;
584+
585+
RematInstruction &TheMatch = *const_cast<RematInstruction *>(&*Match);
586+
Remat.HasDependency = true;
587+
588+
// Since the remats 1. only have one use, and 2. the operands of the remat are live at the remat point
589+
// we do not need further analysis to check whether changing the TheMatch.InsertPos will effect the condition
590+
// of allUsesAvailableAt.
591+
// The questionable case is when we are moving a remat pt to a later MBB. For such a condition to occur, we must have
592+
// instructions A, B and C where we have previously set the remat point of A to B, but B is being remat to C so we
593+
// would like to update the remat point of A to C. We will remat B to C iff the use operands (i.e. A) are live at C.
594+
// since there is only 1 use of A, this can occur iff A is defined outside a loop and is live-in / live-out. Thus, B
595+
// must be in the body of a loop. Moreover, since we know A can be remat at B, then the use operands of A must also be defined
596+
// outside a loop and are live-in / live-out. Since these operands are live throughout the body of the loop, we are safe to
597+
// remat A to C without further checking.
598+
// TODO: handle mulit-use case.
599+
if (TheMatch.InsertPos == Remat.InsertPos)
600+
continue;
601+
FixedPoint = false;
602+
TheMatch.InsertPos = Remat.InsertPos;
603+
}
604+
}
605+
606+
}
607+
}
608+
return FixedPoint;
609+
}
468610
};
469611

470-
/// Collects instructions to rematerialize.
471-
MapVector<MachineInstr *, RematInstruction> Rematerializations;
612+
RematInstructions Remats;
613+
472614
/// Collect regions whose live-ins or register pressure will change due to
473615
/// rematerializations.
474616
DenseMap<unsigned, GCNRegPressure> ImpactedRegions;

0 commit comments

Comments
 (0)