Skip to content

Commit d8a7254

Browse files
jrbyrneskerbowa
authored andcommitted
Auto select good flags
Rebase: 5708851
1 parent c8d27ce commit d8a7254

File tree

8 files changed

+47
-49
lines changed

8 files changed

+47
-49
lines changed

llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class RegAllocPriorityAdvisor {
4646
SlotIndexes *const Indexes;
4747
const bool RegClassPriorityTrumpsGlobalness;
4848
const bool ReverseLocalAssignment;
49+
const bool ForceLocalAssignment;
4950
};
5051

5152
class DefaultPriorityAdvisor : public RegAllocPriorityAdvisor {

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
10201020
/// (3) Bottom-up allocation is no longer guaranteed to optimally color.
10211021
virtual bool reverseLocalAssignment() const { return false; }
10221022

1023+
virtual bool forceLocalAssignment() const { return false; }
1024+
10231025
/// Allow the target to override the cost of using a callee-saved register for
10241026
/// the first time. Default value of 0 means we will use a callee-saved
10251027
/// register if it is available.

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,11 @@ static cl::opt<bool> GreedyReverseLocalAssignment(
132132
"shorter local live ranges will tend to be allocated first"),
133133
cl::Hidden);
134134

135-
static cl::opt<bool> ForceLocalAssignment(
135+
static cl::opt<bool> GreedyForceLocalAssignment(
136136
"force-local-assignment",
137137
cl::desc("Force allocation order of local live ranges, such that "
138138
"shorter local live ranges will tend to be allocated first"),
139-
cl::init(false), cl::Hidden);
139+
cl::Hidden);
140140

141141
static cl::opt<unsigned> SplitThresholdForRegWithHint(
142142
"split-threshold-for-reg-with-hint",
@@ -2898,6 +2898,10 @@ bool RAGreedy::run(MachineFunction &mf) {
28982898
? GreedyReverseLocalAssignment
28992899
: TRI->reverseLocalAssignment();
29002900

2901+
ForceLocalAssignment = GreedyForceLocalAssignment.getNumOccurrences()
2902+
? GreedyForceLocalAssignment
2903+
: TRI->forceLocalAssignment();
2904+
29012905
ExtraInfo.emplace();
29022906

29032907
EvictAdvisor = EvictProvider->getAdvisor(*MF, *this, MBFI, Loops);

llvm/lib/CodeGen/RegAllocGreedy.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public RegAllocBase,
153153
return RegClassPriorityTrumpsGlobalness;
154154
}
155155
bool getReverseLocalAssignment() const { return ReverseLocalAssignment; }
156+
bool getForceLocalAssignment() const { return ForceLocalAssignment; }
156157
// end (interface to priority advisers)
157158

158159
private:
@@ -286,6 +287,8 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public RegAllocBase,
286287

287288
bool ReverseLocalAssignment = false;
288289

290+
bool ForceLocalAssignment = false;
291+
289292
public:
290293
RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F = nullptr);
291294

llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,5 @@ RegAllocPriorityAdvisor::RegAllocPriorityAdvisor(const MachineFunction &MF,
215215
RegClassInfo(RA.getRegClassInfo()), Indexes(Indexes),
216216
RegClassPriorityTrumpsGlobalness(
217217
RA.getRegClassPriorityTrumpsGlobalness()),
218-
ReverseLocalAssignment(RA.getReverseLocalAssignment()) {}
218+
ReverseLocalAssignment(RA.getReverseLocalAssignment()),
219+
ForceLocalAssignment(RA.getForceLocalAssignment()) {}

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static cl::opt<bool>
7070
RematLiveThru("amdgpu-remat-livethru", cl::Hidden,
7171
cl::desc("Rematerialize the LiveThru registers for the first "
7272
"loop found in the code"),
73-
cl::init(true));
73+
cl::init(false));
7474

7575
static cl::opt<bool> RematLiveIn(
7676
"amdgpu-remat-into", cl::Hidden,
@@ -516,8 +516,10 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
516516
if (IsXDL) {
517517
// FIXME: Hack since XDL is only actually occupying for 24 cycles with 8
518518
// pass MFMA.
519-
if (Cycles > 2)
520-
Cycles -= 2;
519+
if (Cycles > 2) {
520+
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
521+
Cycles -= ST.hasGFX950Insts() ? 2 : 1;
522+
}
521523
XDLProcRes.reset();
522524
XDLProcRes.reserve(Cycles);
523525
} else if (IsALU) {
@@ -686,6 +688,19 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
686688
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
687689
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
688690
if (!DisableRemat) SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
691+
692+
CI.clear();
693+
CI.compute(*C->MF);
694+
695+
unsigned CycleCount = 0;
696+
for (auto C : CI.toplevel_cycles()) {
697+
++CycleCount;
698+
}
699+
if (CycleCount >= 2) {
700+
GCNTrackers = true;
701+
RematLiveThru = true;
702+
}
703+
689704
GCNTrackers = GCNTrackers & !IsLegacyScheduler;
690705
}
691706

@@ -1642,6 +1657,9 @@ bool PreRARematStage::initGCNSchedStage() {
16421657
// need to be fixed if there is another pass after this pass.
16431658
assert(!S.hasNextStage());
16441659

1660+
SIRegisterInfo *SRI = const_cast<SIRegisterInfo *>(
1661+
static_cast<const SIRegisterInfo *>(DAG.TRI));
1662+
SRI->setLocalAssignment(true);
16451663
CI.clear();
16461664
CI.compute(MF);
16471665
PDT.recalculate(MF);
@@ -2834,8 +2852,10 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28342852
if (IsXDL) {
28352853
// FIXME: Hack since XDL is only actually occupying for 24 cycles with 8
28362854
// pass MFMA.
2837-
if (Cycles > 2)
2838-
Cycles -= 2;
2855+
if (Cycles > 2) {
2856+
const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
2857+
Cycles -= ST.hasGFX950Insts() ? 2 : 1;
2858+
}
28392859
XDLProcRes.reset();
28402860
XDLProcRes.reserve(Cycles);
28412861
} else if (IsALU) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ class GCNSchedStrategy : public GenericScheduler {
101101
// GCN RP Tracker for botttom-up scheduling
102102
mutable GCNUpwardRPTracker UpwardTracker;
103103

104+
MachineCycleInfo CI;
105+
104106
public:
105107
// schedule() have seen register pressure over the critical limits and had to
106108
// track register pressure for actual scheduling heuristics.
@@ -672,24 +674,18 @@ class RematCandidates {
672674

673675
std::set<RematCandidate> Cache;
674676

675-
// errs() << "HoistToDominator\n";
676677
for (auto RematInfo : RematMap) {
677-
// errs() << "\nRemat Inst: "; RematInfo.first->dump();
678678
std::set<unsigned> HighRPs;
679679
SmallVector<MachineBasicBlock *> MBBs;
680680
for (auto R : RematInfo.second) {
681681
for (auto HRP : R.HighRPRegions) {
682682
HighRPs.insert(HRP);
683683
}
684684
MBBs.push_back(R.InsertPt->getParent());
685-
// errs() << "Has remat point in: " <<
686-
// printMBBReference(*R.InsertPt->getParent()) << "\n";
687685
}
688686

689687
auto DomBlock = PDT->findNearestCommonDominator(iterator_range(MBBs));
690688
if (DomBlock && isReachableFrom(TargetBlock, DomBlock)) {
691-
// errs() << "Found dom block: " << printMBBReference(*DomBlock) <<
692-
// "\n";
693689
RematCandidate New(RematInfo.first, CI.getCycleDepth(DomBlock), HighRPs,
694690
DomBlock->begin());
695691
Cache.insert(New);
@@ -700,37 +696,13 @@ class RematCandidates {
700696
}
701697
}
702698

703-
// errs() << "Condensed: " << Entries.size() << " into: " << Cache.size() <<
704-
// "\n";
705699
Entries.clear();
706700
Entries = Cache;
707701
return true;
708702
}
709703

710704
bool update(RematCandidate &RNew, const LiveIntervals *LIS) {
711-
// errs() << "Update: "; RNew.Def->dump();
712-
////errs() << "Calling update for cand: ";
713-
// RNew.Def->dump();
714-
////errs() << "With Regions: ";
715-
// for (auto Regi : RNew.HighRPRegions) {
716-
// //errs() << Regi;
717-
// }
718-
////errs() << "\n";
719705
auto Match = find_if(Entries, [RNew](const RematCandidate &R) {
720-
if (R.Def == RNew.Def) {
721-
////errs() << "equal defs for cand match: \n";
722-
723-
// R.Def->dump();
724-
////errs() << "With Regions: ";
725-
// for (auto Regi : R.HighRPRegions) {
726-
// //errs() << Regi;
727-
// }
728-
////errs() << "\n";
729-
730-
////errs() << "RNew parent: " << RNew.InsertPt->getParent()->getName()
731-
///<< "\n"; /errs() << "R parent: " <<
732-
///R.InsertPt->getParent()->getName() << "\n";
733-
}
734706
return R.Def == RNew.Def &&
735707
RNew.InsertPt->getParent() == R.InsertPt->getParent();
736708
});
@@ -766,32 +738,21 @@ class RematCandidates {
766738

767739
void resolveSameBlockUses(const MachineRegisterInfo *MRI,
768740
const LiveIntervals *LIS) {
769-
// errs() << "\nResolve Same Block uses";
770741
// We may have added remat candidates which are used by other remat
771742
// candidates -- be sure that we have correct insert points for this
772743
bool FixedPoint = false;
773744
while (!FixedPoint) {
774-
// errs() << "Fixed Point iter\n";
775-
// //errs() << "Doling fixed point\n";
776745
FixedPoint = true;
777746
for (auto &RematEntry : Entries) {
778747

779748
MachineInstr *RematInst = RematEntry.Def;
780-
// errs() << "R: "; RematInst->dump();
781-
// errs() << "For Regions: ";
782-
// errs() << "\n";
783749
MachineBasicBlock::iterator RematPt = RematEntry.InsertPt;
784-
// for (auto RematInst : RematEntry.second) {
785-
// //errs() << "Have Remat Inst: "; RematInst.first->dump();
786-
// //errs() << "With Insert Point: " <<
787-
// DAG.LIS->getInstructionIndex(*RematInst.second) << "\n";
788750
for (auto MO : RematInst->operands()) {
789751
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
790752
continue;
791753
auto UseReg = MO.getReg();
792754
if (!UseReg.isVirtual())
793755
continue;
794-
// //errs() << "Found UseReg: " << printReg(UseReg) << "\n";
795756
for (MachineInstr &DefInst : MRI->def_instructions(UseReg)) {
796757

797758
auto Match =

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
4444
bool isWave32;
4545
BitVector RegPressureIgnoredUnits;
4646

47+
bool ForceLocalAssignment = false;
48+
4749
/// Sub reg indexes for getRegSplitParts.
4850
/// First index represents subreg size from 1 to 32 Half DWORDS.
4951
/// The inner vector is sorted by bit offset.
@@ -111,6 +113,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
111113
return 100;
112114
}
113115

116+
bool forceLocalAssignment() const override { return ForceLocalAssignment; }
117+
118+
void setLocalAssignment(bool Flag) { ForceLocalAssignment = Flag; }
119+
114120
// When building a block VGPR load, we only really transfer a subset of the
115121
// registers in the block, based on a mask. Liveness analysis is not aware of
116122
// the mask, so it might consider that any register in the block is available

0 commit comments

Comments
 (0)