Skip to content

Commit c113760

Browse files
committed
Review comments
Change-Id: I99db02cea2777024b4948a55d6a298c384f40534
1 parent f5edfbb commit c113760

File tree

3 files changed

+74
-81
lines changed

3 files changed

+74
-81
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ struct GCNRegPressure {
9090
DynamicVGPRBlockSize));
9191
}
9292

93-
unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) {
93+
unsigned getVGPRSpills(MachineFunction &MF) {
94+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
9495
if (!ST.hasGFX90AInsts())
9596
return 0;
9697

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 62 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,22 +1041,20 @@ bool GCNSchedStage::initGCNSchedStage() {
10411041
return true;
10421042
}
10431043

1044-
SlotIndex
1045-
RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
1046-
LiveIntervals *LIS,
1047-
SmallVectorImpl<SlotIndex> &DefIdxs) {
1044+
void RewriteScheduleStage::findReachingDefs(
1045+
MachineOperand &UseMO, LiveIntervals *LIS,
1046+
SmallVectorImpl<SlotIndex> &DefIdxs) {
10481047
assert(UseMO.isReg());
10491048
MachineInstr *UseMI = UseMO.getParent();
10501049
LiveInterval &UseLI = LIS->getInterval(UseMO.getReg());
1051-
auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
1050+
VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
10521051

1053-
SlotIndex DefMBBStart =
1054-
LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def));
1052+
SlotIndex DefMBBStart = LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNI->def));
10551053

10561054
// If the def is in the block, then it must be the only reaching def.
1057-
if (DefMBBStart != VNInfo->def) {
1058-
DefIdxs.push_back(VNInfo->def);
1059-
return VNInfo->def;
1055+
if (DefMBBStart != VNI->def) {
1056+
DefIdxs.push_back(VNI->def);
1057+
return;
10601058
}
10611059

10621060
SmallPtrSet<MachineBasicBlock *, 8> Visited;
@@ -1074,15 +1072,15 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
10741072
MachineBasicBlock *CurrMBB = Worklist.pop_back_val();
10751073

10761074
SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB);
1077-
auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
1075+
VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
10781076

1079-
MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def);
1077+
MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def);
10801078
SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB);
10811079

10821080
// If there is a def in this block, then add it to the list. This is the
10831081
// reaching def of this path.
1084-
if (DefMBBStart != VNInfo->def) {
1085-
DefIdxs.push_back(VNInfo->def);
1082+
if (DefMBBStart != VNI->def) {
1083+
DefIdxs.push_back(VNI->def);
10861084
continue;
10871085
}
10881086

@@ -1091,8 +1089,6 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
10911089
Worklist.push_back(PredMBB);
10921090
}
10931091
}
1094-
1095-
return VNInfo->def;
10961092
}
10971093

10981094
void RewriteScheduleStage::findReachingUses(
@@ -1106,9 +1102,9 @@ void RewriteScheduleStage::findReachingUses(
11061102

11071103
// If we find a use that contains this DefMI in its reachingDefs, then it is
11081104
// a reaching use.
1109-
if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1105+
if (any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
11101106
return SlotIndex::isSameInstr(RDIdx, DefIdx);
1111-
}) != ReachingDefIndexes.end())
1107+
}))
11121108
ReachingUses.push_back(&UseMO);
11131109
}
11141110
}
@@ -1769,27 +1765,29 @@ bool RewriteScheduleStage::initHeuristics(
17691765
// Prepare for the heuristics
17701766
for (auto &MBB : MF) {
17711767
for (auto &MI : MBB) {
1772-
if (isRewriteCandidate(&MI)) {
1773-
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
1774-
if (ReplacementOp == -1)
1775-
continue;
1768+
if (!isRewriteCandidate(&MI))
1769+
continue;
17761770

1777-
RewriteCands.push_back({&MI, MI.getOpcode()});
1778-
MI.setDesc(TII->get(ReplacementOp));
1771+
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
1772+
if (ReplacementOp == -1)
1773+
continue;
17791774

1780-
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1781-
if (Src2->isReg()) {
1782-
SmallVector<SlotIndex, 8> Src2ReachingDefs;
1783-
findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
1775+
RewriteCands.push_back({&MI, MI.getOpcode()});
1776+
MI.setDesc(TII->get(ReplacementOp));
17841777

1785-
// For any definition of the src2 register which is non-MFMA, we
1786-
// insert a copy.
1787-
for (SlotIndex RDIdx : Src2ReachingDefs) {
1788-
MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
1789-
if (!TII->isMAI(*RD))
1790-
CopyForDef.insert(RD);
1791-
}
1778+
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1779+
if (Src2->isReg()) {
1780+
SmallVector<SlotIndex, 8> Src2ReachingDefs;
1781+
findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
1782+
1783+
// For any definition of the src2 register which is non-MFMA, we
1784+
// insert a copy.
1785+
for (SlotIndex RDIdx : Src2ReachingDefs) {
1786+
MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
1787+
if (!TII->isMAI(*RD))
1788+
CopyForDef.insert(RD);
17921789
}
1790+
}
17931791

17941792
MachineOperand &Dst = MI.getOperand(0);
17951793
SmallVector<MachineOperand *, 8> DstReachingUses;
@@ -1827,36 +1825,39 @@ bool RewriteScheduleStage::initHeuristics(
18271825
DAG.MRI.setRegClass(Dst.getReg(), AGPRRC);
18281826
if (Src2->isReg())
18291827
DAG.MRI.setRegClass(Src2->getReg(), AGPRRC);
1830-
}
18311828
}
18321829
}
18331830

18341831
return true;
18351832
}
18361833

18371834
int64_t RewriteScheduleStage::getRewriteCost(
1838-
std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
1839-
DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
1840-
SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
1835+
const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
1836+
const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
1837+
const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
1838+
MachineBranchProbabilityInfo MBPI;
1839+
MachineBlockFrequencyInfo MBFI;
1840+
18411841
MBFI.calculate(MF, MBPI, *DAG.MLI);
18421842
int64_t BestSpillCost = 0;
18431843
int64_t Cost = 0;
18441844

1845+
uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
1846+
18451847
for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {
18461848
if (!RegionsWithExcessArchVGPR[Region])
18471849
continue;
18481850

1849-
auto PressureBefore = DAG.Pressure[Region];
1850-
unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF);
1851+
GCNRegPressure &PressureBefore = DAG.Pressure[Region];
1852+
unsigned SpillCostBefore = PressureBefore.getVGPRSpills(MF);
18511853

18521854
// For the cases we care about (i.e. ArchVGPR usage is greater than the
18531855
// addressable limit), rewriting alone should bring pressure to manageable
18541856
// level. If we find any such region, then the rewrite is potentially
18551857
// beneficial.
1856-
auto PressureAfter = DAG.getRealRegPressure(Region);
1857-
unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF);
1858+
GCNRegPressure PressureAfter = DAG.getRealRegPressure(Region);
1859+
unsigned SpillCostAfter = PressureAfter.getVGPRSpills(MF);
18581860

1859-
uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
18601861
uint64_t BlockFreq =
18611862
MBFI.getBlockFreq(DAG.Regions[Region].first->getParent())
18621863
.getFrequency();
@@ -1893,8 +1894,6 @@ int64_t RewriteScheduleStage::getRewriteCost(
18931894

18941895
unsigned CopyCost = 0;
18951896

1896-
uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
1897-
18981897
// For each CopyForDef, increase the cost by the register size while
18991898
// accounting for block frequency.
19001899
for (auto *DefMI : CopyForDef) {
@@ -1910,12 +1909,11 @@ int64_t RewriteScheduleStage::getRewriteCost(
19101909
}
19111910

19121911
// Account for CopyForUse copies in each block that the register is used.
1913-
for (auto &UseEntry : CopyForUse) {
1912+
for (auto &[UseBlock, UseRegs] : CopyForUse) {
19141913
uint64_t UseFreq =
1915-
EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq
1916-
: 1;
1914+
EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1;
19171915

1918-
for (auto UseReg : UseEntry.second) {
1916+
for (auto UseReg : UseRegs) {
19191917
unsigned RegSize =
19201918
DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg));
19211919
unsigned NumRegs = std::max(RegSize / 32, (unsigned)1);
@@ -1927,9 +1925,7 @@ int64_t RewriteScheduleStage::getRewriteCost(
19271925

19281926
// Reset to the vgpr form. We must do rewriting after copy-insertion, as some
19291927
// defs of the register may require VGPR.
1930-
for (auto RI : RewriteCands) {
1931-
MachineInstr *MI = RI.first;
1932-
1928+
for (auto &[MI, OriginalOpcode] : RewriteCands) {
19331929
assert(TII->isMAI(*MI));
19341930
const TargetRegisterClass *AGPRRC =
19351931
DAG.MRI.getRegClass(MI->getOperand(0).getReg());
@@ -1938,18 +1934,17 @@ int64_t RewriteScheduleStage::getRewriteCost(
19381934
MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
19391935
assert(Src2);
19401936

1941-
if (Src2->isReg()) {
1937+
if (Src2->isReg())
19421938
DAG.MRI.setRegClass(Src2->getReg(), VGPRRC);
1943-
}
19441939
DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC);
1945-
MI->setDesc(TII->get(RI.second));
1940+
MI->setDesc(TII->get(OriginalOpcode));
19461941
}
19471942

19481943
return Cost;
19491944
}
19501945

19511946
bool RewriteScheduleStage::rewrite(
1952-
std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
1947+
const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
19531948
DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
19541949
DenseMap<MachineInstr *, unsigned> LastMIToRegion;
19551950

@@ -1983,7 +1978,7 @@ bool RewriteScheduleStage::rewrite(
19831978
// want to replace the register it is using with the result of the copy, we
19841979
// must handle case 3. In the third case, we simply insert a copy after each
19851980
// of the reaching defs to connect to the copy of the reaching uses of the dst
1986-
// reg. This allows us to avoid inserting copies next to the' MFMAs.
1981+
// reg. This allows us to avoid inserting copies next to the MFMAs.
19871982
//
19881983
// While inserting the copies, we maintain a map of operands which will use
19891984
// different regs (i.e. the result of the copies). For example, a case 1 src2
@@ -1994,14 +1989,14 @@ bool RewriteScheduleStage::rewrite(
19941989
// queries.
19951990
//
19961991
// While inserting the copies, we also maintain a list or registers which we
1997-
// will want to reclassify as AGPR. After doing the copy isnertion and the
1992+
// will want to reclassify as AGPR. After doing the copy insertion and the
19981993
// register replacement, we can finally do the reclassification. This uses the
19991994
// redef map, as the registers we are interested in reclassifying may be
20001995
// replaced by the result of a copy. We must do this after the copy analysis
20011996
// and placement as we must have an accurate redef map -- otherwise we may end
20021997
// up creating illegal instructions.
20031998

2004-
// The original registers of the MFMA that need to be reclassified as AGPR
1999+
// The original registers of the MFMA that need to be reclassified as AGPR.
20052000
std::set<Register> RewriteRegs;
20062001
// The map of an original register in the MFMA to a new register (result of a
20072002
// copy) that it should be replaced with.
@@ -2015,16 +2010,15 @@ bool RewriteScheduleStage::rewrite(
20152010
DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
20162011
ReachingUseTracker;
20172012

2018-
for (auto &RI : RewriteCands) {
2019-
MachineInstr &MI = *RI.first;
2013+
for (auto &[MI, OriginalOpcode] : RewriteCands) {
20202014

2021-
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
2015+
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
20222016
if (ReplacementOp == -1)
20232017
continue;
2024-
MI.setDesc(TII->get(ReplacementOp));
2018+
MI->setDesc(TII->get(ReplacementOp));
20252019

20262020
// Case 1: insert copies for the reaching defs of the Src2Reg.
2027-
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
2021+
MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
20282022

20292023
if (Src2->isReg()) {
20302024
Register Src2Reg = Src2->getReg();
@@ -2094,7 +2088,7 @@ bool RewriteScheduleStage::rewrite(
20942088
// Case 2 and Case 3: insert copies before the reaching uses of the dsts,
20952089
// and after the reaching defs of the reaching uses of the dsts.
20962090

2097-
MachineOperand *Dst = &MI.getOperand(0);
2091+
MachineOperand *Dst = &MI->getOperand(0);
20982092
Register DstReg = Dst->getReg();
20992093
if (!DstReg.isVirtual())
21002094
return false;
@@ -2105,7 +2099,7 @@ bool RewriteScheduleStage::rewrite(
21052099
SmallVector<MachineOperand *, 8> DstReachingUseCopies;
21062100
SmallVector<MachineInstr *, 8> DstUseDefsReplace;
21072101

2108-
findReachingUses(&MI, DAG.LIS, DstReachingUses);
2102+
findReachingUses(MI, DAG.LIS, DstReachingUses);
21092103

21102104
for (MachineOperand *RUOp : DstReachingUses) {
21112105
if (TII->isMAI(*RUOp->getParent()))
@@ -2169,7 +2163,7 @@ bool RewriteScheduleStage::rewrite(
21692163
MachineBasicBlock *RUBlock = RU->getParent()->getParent();
21702164
// Just keep track of the reaching use of this register by block. After we
21712165
// have scanned all the MFMAs we can find optimal insert pts.
2172-
if (RUBlock != MI.getParent()) {
2166+
if (RUBlock != MI->getParent()) {
21732167
ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU);
21742168
continue;
21752169
}

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,6 @@ class RewriteScheduleStage : public GCNSchedStage {
413413
// spilling.
414414
BitVector RegionsWithExcessArchVGPR;
415415

416-
MachineBranchProbabilityInfo MBPI;
417-
MachineBlockFrequencyInfo MBFI;
418-
419416
const SIInstrInfo *TII;
420417
const SIRegisterInfo *SRI;
421418

@@ -431,23 +428,24 @@ class RewriteScheduleStage : public GCNSchedStage {
431428
SmallPtrSetImpl<MachineInstr *> &CopyForDef);
432429

433430
/// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
434-
/// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
431+
/// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
435432
/// costs, and \p RewriteCands to undo rewriting.
436-
int64_t
437-
getRewriteCost(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
438-
DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
439-
SmallPtrSetImpl<MachineInstr *> &CopyForDef);
433+
int64_t getRewriteCost(
434+
const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
435+
const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
436+
const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
440437

441438
/// Do the final rewrite on \p RewriteCands and insert any needed copies.
442-
bool rewrite(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
439+
bool
440+
rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
443441

444442
/// \returns true if this MI is a rewrite candidate.
445443
bool isRewriteCandidate(MachineInstr *MI) const;
446444

447445
/// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
448-
/// DefIdx
449-
SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
450-
SmallVectorImpl<SlotIndex> &DefIdxs);
446+
/// DefIdxs
447+
void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
448+
SmallVectorImpl<SlotIndex> &DefIdxs);
451449

452450
/// Finds all the reaching uses of \p DefMI and stores the use operands in \p
453451
/// ReachingUses

0 commit comments

Comments
 (0)