Skip to content

Commit 9b52c18

Browse files
jrbyrnesTony Linthicum
authored andcommitted
Review comments
Change-Id: I99db02cea2777024b4948a55d6a298c384f40534
1 parent 2870071 commit 9b52c18

File tree

3 files changed

+74
-81
lines changed

3 files changed

+74
-81
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ struct GCNRegPressure {
102102
DynamicVGPRBlockSize));
103103
}
104104

105-
unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) {
105+
unsigned getVGPRSpills(MachineFunction &MF) {
106+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
106107
if (!ST.hasGFX90AInsts())
107108
return 0;
108109

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 62 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,22 +1223,20 @@ bool GCNSchedStage::initGCNSchedStage() {
12231223
return true;
12241224
}
12251225

1226-
SlotIndex
1227-
RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
1228-
LiveIntervals *LIS,
1229-
SmallVectorImpl<SlotIndex> &DefIdxs) {
1226+
void RewriteScheduleStage::findReachingDefs(
1227+
MachineOperand &UseMO, LiveIntervals *LIS,
1228+
SmallVectorImpl<SlotIndex> &DefIdxs) {
12301229
assert(UseMO.isReg());
12311230
MachineInstr *UseMI = UseMO.getParent();
12321231
LiveInterval &UseLI = LIS->getInterval(UseMO.getReg());
1233-
auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
1232+
VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
12341233

1235-
SlotIndex DefMBBStart =
1236-
LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def));
1234+
SlotIndex DefMBBStart = LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNI->def));
12371235

12381236
// If the def is in the block, then it must be the only reaching def.
1239-
if (DefMBBStart != VNInfo->def) {
1240-
DefIdxs.push_back(VNInfo->def);
1241-
return VNInfo->def;
1237+
if (DefMBBStart != VNI->def) {
1238+
DefIdxs.push_back(VNI->def);
1239+
return;
12421240
}
12431241

12441242
SmallPtrSet<MachineBasicBlock *, 8> Visited;
@@ -1256,15 +1254,15 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
12561254
MachineBasicBlock *CurrMBB = Worklist.pop_back_val();
12571255

12581256
SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB);
1259-
auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
1257+
VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
12601258

1261-
MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def);
1259+
MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def);
12621260
SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB);
12631261

12641262
// If there is a def in this block, then add it to the list. This is the
12651263
// reaching def of this path.
1266-
if (DefMBBStart != VNInfo->def) {
1267-
DefIdxs.push_back(VNInfo->def);
1264+
if (DefMBBStart != VNI->def) {
1265+
DefIdxs.push_back(VNI->def);
12681266
continue;
12691267
}
12701268

@@ -1273,8 +1271,6 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
12731271
Worklist.push_back(PredMBB);
12741272
}
12751273
}
1276-
1277-
return VNInfo->def;
12781274
}
12791275

12801276
void RewriteScheduleStage::findReachingUses(
@@ -1288,9 +1284,9 @@ void RewriteScheduleStage::findReachingUses(
12881284

12891285
// If we find a use that contains this DefMI in its reachingDefs, then it is
12901286
// a reaching use.
1291-
if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1287+
if (any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
12921288
return SlotIndex::isSameInstr(RDIdx, DefIdx);
1293-
}) != ReachingDefIndexes.end())
1289+
}))
12941290
ReachingUses.push_back(&UseMO);
12951291
}
12961292
}
@@ -1966,27 +1962,29 @@ bool RewriteScheduleStage::initHeuristics(
19661962
// Prepare for the heuristics
19671963
for (auto &MBB : MF) {
19681964
for (auto &MI : MBB) {
1969-
if (isRewriteCandidate(&MI)) {
1970-
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
1971-
if (ReplacementOp == -1)
1972-
continue;
1965+
if (!isRewriteCandidate(&MI))
1966+
continue;
19731967

1974-
RewriteCands.push_back({&MI, MI.getOpcode()});
1975-
MI.setDesc(TII->get(ReplacementOp));
1968+
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
1969+
if (ReplacementOp == -1)
1970+
continue;
19761971

1977-
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1978-
if (Src2->isReg()) {
1979-
SmallVector<SlotIndex, 8> Src2ReachingDefs;
1980-
findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
1972+
RewriteCands.push_back({&MI, MI.getOpcode()});
1973+
MI.setDesc(TII->get(ReplacementOp));
19811974

1982-
// For any definition of the src2 register which is non-MFMA, we
1983-
// insert a copy.
1984-
for (SlotIndex RDIdx : Src2ReachingDefs) {
1985-
MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
1986-
if (!TII->isMAI(*RD))
1987-
CopyForDef.insert(RD);
1988-
}
1975+
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1976+
if (Src2->isReg()) {
1977+
SmallVector<SlotIndex, 8> Src2ReachingDefs;
1978+
findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
1979+
1980+
// For any definition of the src2 register which is non-MFMA, we
1981+
// insert a copy.
1982+
for (SlotIndex RDIdx : Src2ReachingDefs) {
1983+
MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
1984+
if (!TII->isMAI(*RD))
1985+
CopyForDef.insert(RD);
19891986
}
1987+
}
19901988

19911989
MachineOperand &Dst = MI.getOperand(0);
19921990
SmallVector<MachineOperand *, 8> DstReachingUses;
@@ -2024,36 +2022,39 @@ bool RewriteScheduleStage::initHeuristics(
20242022
DAG.MRI.setRegClass(Dst.getReg(), AGPRRC);
20252023
if (Src2->isReg())
20262024
DAG.MRI.setRegClass(Src2->getReg(), AGPRRC);
2027-
}
20282025
}
20292026
}
20302027

20312028
return true;
20322029
}
20332030

20342031
int64_t RewriteScheduleStage::getRewriteCost(
2035-
std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2036-
DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2037-
SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2032+
const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2033+
const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2034+
const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2035+
MachineBranchProbabilityInfo MBPI;
2036+
MachineBlockFrequencyInfo MBFI;
2037+
20382038
MBFI.calculate(MF, MBPI, *DAG.MLI);
20392039
int64_t BestSpillCost = 0;
20402040
int64_t Cost = 0;
20412041

2042+
uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
2043+
20422044
for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {
20432045
if (!RegionsWithExcessArchVGPR[Region])
20442046
continue;
20452047

2046-
auto PressureBefore = DAG.Pressure[Region];
2047-
unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF);
2048+
GCNRegPressure &PressureBefore = DAG.Pressure[Region];
2049+
unsigned SpillCostBefore = PressureBefore.getVGPRSpills(MF);
20482050

20492051
// For the cases we care about (i.e. ArchVGPR usage is greater than the
20502052
// addressable limit), rewriting alone should bring pressure to manageable
20512053
// level. If we find any such region, then the rewrite is potentially
20522054
// beneficial.
2053-
auto PressureAfter = DAG.getRealRegPressure(Region);
2054-
unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF);
2055+
GCNRegPressure PressureAfter = DAG.getRealRegPressure(Region);
2056+
unsigned SpillCostAfter = PressureAfter.getVGPRSpills(MF);
20552057

2056-
uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
20572058
uint64_t BlockFreq =
20582059
MBFI.getBlockFreq(DAG.Regions[Region].first->getParent())
20592060
.getFrequency();
@@ -2090,8 +2091,6 @@ int64_t RewriteScheduleStage::getRewriteCost(
20902091

20912092
unsigned CopyCost = 0;
20922093

2093-
uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
2094-
20952094
// For each CopyForDef, increase the cost by the register size while
20962095
// accounting for block frequency.
20972096
for (auto *DefMI : CopyForDef) {
@@ -2107,12 +2106,11 @@ int64_t RewriteScheduleStage::getRewriteCost(
21072106
}
21082107

21092108
// Account for CopyForUse copies in each block that the register is used.
2110-
for (auto &UseEntry : CopyForUse) {
2109+
for (auto &[UseBlock, UseRegs] : CopyForUse) {
21112110
uint64_t UseFreq =
2112-
EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq
2113-
: 1;
2111+
EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1;
21142112

2115-
for (auto UseReg : UseEntry.second) {
2113+
for (auto UseReg : UseRegs) {
21162114
unsigned RegSize =
21172115
DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg));
21182116
unsigned NumRegs = std::max(RegSize / 32, (unsigned)1);
@@ -2124,9 +2122,7 @@ int64_t RewriteScheduleStage::getRewriteCost(
21242122

21252123
// Reset to the vgpr form. We must do rewriting after copy-insertion, as some
21262124
// defs of the register may require VGPR.
2127-
for (auto RI : RewriteCands) {
2128-
MachineInstr *MI = RI.first;
2129-
2125+
for (auto &[MI, OriginalOpcode] : RewriteCands) {
21302126
assert(TII->isMAI(*MI));
21312127
const TargetRegisterClass *AGPRRC =
21322128
DAG.MRI.getRegClass(MI->getOperand(0).getReg());
@@ -2135,18 +2131,17 @@ int64_t RewriteScheduleStage::getRewriteCost(
21352131
MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
21362132
assert(Src2);
21372133

2138-
if (Src2->isReg()) {
2134+
if (Src2->isReg())
21392135
DAG.MRI.setRegClass(Src2->getReg(), VGPRRC);
2140-
}
21412136
DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC);
2142-
MI->setDesc(TII->get(RI.second));
2137+
MI->setDesc(TII->get(OriginalOpcode));
21432138
}
21442139

21452140
return Cost;
21462141
}
21472142

21482143
bool RewriteScheduleStage::rewrite(
2149-
std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2144+
const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
21502145
DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
21512146
DenseMap<MachineInstr *, unsigned> LastMIToRegion;
21522147

@@ -2180,7 +2175,7 @@ bool RewriteScheduleStage::rewrite(
21802175
// want to replace the register it is using with the result of the copy, we
21812176
// must handle case 3. In the third case, we simply insert a copy after each
21822177
// of the reaching defs to connect to the copy of the reaching uses of the dst
2183-
// reg. This allows us to avoid inserting copies next to the' MFMAs.
2178+
// reg. This allows us to avoid inserting copies next to the MFMAs.
21842179
//
21852180
// While inserting the copies, we maintain a map of operands which will use
21862181
// different regs (i.e. the result of the copies). For example, a case 1 src2
@@ -2191,14 +2186,14 @@ bool RewriteScheduleStage::rewrite(
21912186
// queries.
21922187
//
21932188
// While inserting the copies, we also maintain a list or registers which we
2194-
// will want to reclassify as AGPR. After doing the copy isnertion and the
2189+
// will want to reclassify as AGPR. After doing the copy insertion and the
21952190
// register replacement, we can finally do the reclassification. This uses the
21962191
// redef map, as the registers we are interested in reclassifying may be
21972192
// replaced by the result of a copy. We must do this after the copy analysis
21982193
// and placement as we must have an accurate redef map -- otherwise we may end
21992194
// up creating illegal instructions.
22002195

2201-
// The original registers of the MFMA that need to be reclassified as AGPR
2196+
// The original registers of the MFMA that need to be reclassified as AGPR.
22022197
std::set<Register> RewriteRegs;
22032198
// The map of an original register in the MFMA to a new register (result of a
22042199
// copy) that it should be replaced with.
@@ -2212,16 +2207,15 @@ bool RewriteScheduleStage::rewrite(
22122207
DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
22132208
ReachingUseTracker;
22142209

2215-
for (auto &RI : RewriteCands) {
2216-
MachineInstr &MI = *RI.first;
2210+
for (auto &[MI, OriginalOpcode] : RewriteCands) {
22172211

2218-
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
2212+
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
22192213
if (ReplacementOp == -1)
22202214
continue;
2221-
MI.setDesc(TII->get(ReplacementOp));
2215+
MI->setDesc(TII->get(ReplacementOp));
22222216

22232217
// Case 1: insert copies for the reaching defs of the Src2Reg.
2224-
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
2218+
MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
22252219

22262220
if (Src2->isReg()) {
22272221
Register Src2Reg = Src2->getReg();
@@ -2291,7 +2285,7 @@ bool RewriteScheduleStage::rewrite(
22912285
// Case 2 and Case 3: insert copies before the reaching uses of the dsts,
22922286
// and after the reaching defs of the reaching uses of the dsts.
22932287

2294-
MachineOperand *Dst = &MI.getOperand(0);
2288+
MachineOperand *Dst = &MI->getOperand(0);
22952289
Register DstReg = Dst->getReg();
22962290
if (!DstReg.isVirtual())
22972291
return false;
@@ -2302,7 +2296,7 @@ bool RewriteScheduleStage::rewrite(
23022296
SmallVector<MachineOperand *, 8> DstReachingUseCopies;
23032297
SmallVector<MachineInstr *, 8> DstUseDefsReplace;
23042298

2305-
findReachingUses(&MI, DAG.LIS, DstReachingUses);
2299+
findReachingUses(MI, DAG.LIS, DstReachingUses);
23062300

23072301
for (MachineOperand *RUOp : DstReachingUses) {
23082302
if (TII->isMAI(*RUOp->getParent()))
@@ -2366,7 +2360,7 @@ bool RewriteScheduleStage::rewrite(
23662360
MachineBasicBlock *RUBlock = RU->getParent()->getParent();
23672361
// Just keep track of the reaching use of this register by block. After we
23682362
// have scanned all the MFMAs we can find optimal insert pts.
2369-
if (RUBlock != MI.getParent()) {
2363+
if (RUBlock != MI->getParent()) {
23702364
ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU);
23712365
continue;
23722366
}

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -425,9 +425,6 @@ class RewriteScheduleStage : public GCNSchedStage {
425425
// spilling.
426426
BitVector RegionsWithExcessArchVGPR;
427427

428-
MachineBranchProbabilityInfo MBPI;
429-
MachineBlockFrequencyInfo MBFI;
430-
431428
const SIInstrInfo *TII;
432429
const SIRegisterInfo *SRI;
433430

@@ -443,23 +440,24 @@ class RewriteScheduleStage : public GCNSchedStage {
443440
SmallPtrSetImpl<MachineInstr *> &CopyForDef);
444441

445442
/// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
446-
/// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
443+
/// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
447444
/// costs, and \p RewriteCands to undo rewriting.
448-
int64_t
449-
getRewriteCost(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
450-
DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
451-
SmallPtrSetImpl<MachineInstr *> &CopyForDef);
445+
int64_t getRewriteCost(
446+
const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
447+
const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
448+
const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
452449

453450
/// Do the final rewrite on \p RewriteCands and insert any needed copies.
454-
bool rewrite(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
451+
bool
452+
rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
455453

456454
/// \returns true if this MI is a rewrite candidate.
457455
bool isRewriteCandidate(MachineInstr *MI) const;
458456

459457
/// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
460-
/// DefIdx
461-
SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
462-
SmallVectorImpl<SlotIndex> &DefIdxs);
458+
/// DefIdxs
459+
void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
460+
SmallVectorImpl<SlotIndex> &DefIdxs);
463461

464462
/// Finds all the reaching uses of \p DefMI and stores the use operands in \p
465463
/// ReachingUses

0 commit comments

Comments
 (0)