Skip to content

Commit 8228743

Browse files
committed
Squash.
- Rebase. - Tests updated and passing. Minor tweaks and fixes: - '-prera-lat-red' option for experiments with latency reduction. - Don't reduce latency with a wide DAG that is acyclically latency limited. - Simplify and factor out check for IsStore. - Disable IsRedefining[] with PressureDiffs. - Add check for vreg uses in biasPhysRegExtra() - NFC fix for ShouldTrackPressure init. - Remove lenghty code in initializePressureSets() - there is a test. - Remove changes in SystemZElimCompare as that is not used anymore.
1 parent bd30247 commit 8228743

39 files changed

+1035
-1112
lines changed

llvm/include/llvm/CodeGen/MachineScheduler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1224,7 +1224,7 @@ class GenericSchedulerBase : public MachineSchedStrategy {
12241224
void traceCandidate(const SchedCandidate &Cand);
12251225
#endif
12261226

1227-
private:
1227+
protected:
12281228
bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
12291229
bool ComputeRemLatency, unsigned &RemLatency) const;
12301230
};

llvm/lib/Target/SystemZ/SystemZElimCompare.cpp

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
150150
return Ref;
151151
}
152152

153+
// Return true if this is a load and test which can be optimized the
154+
// same way as compare instruction.
155+
static bool isLoadAndTestAsCmp(MachineInstr &MI) {
156+
// If we during isel used a load-and-test as a compare with 0, the
157+
// def operand is dead.
158+
return (MI.getOpcode() == SystemZ::LTEBR ||
159+
MI.getOpcode() == SystemZ::LTDBR ||
160+
MI.getOpcode() == SystemZ::LTXBR) &&
161+
MI.getOperand(0).isDead();
162+
}
163+
164+
// Return the source register of Compare, which is the unknown value
165+
// being tested.
166+
static unsigned getCompareSourceReg(MachineInstr &Compare) {
167+
unsigned reg = 0;
168+
if (Compare.isCompare())
169+
reg = Compare.getOperand(0).getReg();
170+
else if (isLoadAndTestAsCmp(Compare))
171+
reg = Compare.getOperand(1).getReg();
172+
assert(reg);
173+
174+
return reg;
175+
}
176+
153177
// Compare compares the result of MI against zero. If MI is an addition
154178
// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
155179
// and convert the branch to a BRCT(G) or BRCTH. Return true on success.
@@ -182,7 +206,7 @@ bool SystemZElimCompare::convertToBRCT(
182206
// We already know that there are no references to the register between
183207
// MI and Compare. Make sure that there are also no references between
184208
// Compare and Branch.
185-
unsigned SrcReg = TII->getCompareSourceReg(Compare);
209+
unsigned SrcReg = getCompareSourceReg(Compare);
186210
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
187211
for (++MBBI; MBBI != MBBE; ++MBBI)
188212
if (getRegReferences(*MBBI, SrcReg))
@@ -229,7 +253,7 @@ bool SystemZElimCompare::convertToLoadAndTrap(
229253
// We already know that there are no references to the register between
230254
// MI and Compare. Make sure that there are also no references between
231255
// Compare and Branch.
232-
unsigned SrcReg = TII->getCompareSourceReg(Compare);
256+
unsigned SrcReg = getCompareSourceReg(Compare);
233257
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
234258
for (++MBBI; MBBI != MBBE; ++MBBI)
235259
if (getRegReferences(*MBBI, SrcReg))
@@ -470,17 +494,25 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
470494
return true;
471495
}
472496

497+
// Return true if Compare is a comparison against zero.
498+
static bool isCompareZero(MachineInstr &Compare) {
499+
if (isLoadAndTestAsCmp(Compare))
500+
return true;
501+
return Compare.getNumExplicitOperands() == 2 &&
502+
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
503+
}
504+
473505
// Try to optimize cases where comparison instruction Compare is testing
474506
// a value against zero. Return true on success and if Compare should be
475507
// deleted as dead. CCUsers is the list of instructions that use the CC
476508
// value produced by Compare.
477509
bool SystemZElimCompare::optimizeCompareZero(
478510
MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
479-
if (!TII->isCompareZero(Compare))
511+
if (!isCompareZero(Compare))
480512
return false;
481513

482514
// Search back for CC results that are based on the first operand.
483-
unsigned SrcReg = TII->getCompareSourceReg(Compare);
515+
unsigned SrcReg = getCompareSourceReg(Compare);
484516
MachineBasicBlock &MBB = *Compare.getParent();
485517
Reference CCRefs;
486518
Reference SrcRefs;
@@ -669,7 +701,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
669701
MachineBasicBlock::iterator MBBI = MBB.end();
670702
while (MBBI != MBB.begin()) {
671703
MachineInstr &MI = *--MBBI;
672-
if (CompleteCCUsers && (MI.isCompare() || TII->isLoadAndTestAsCmp(MI)) &&
704+
if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) &&
673705
(optimizeCompareZero(MI, CCUsers) ||
674706
fuseCompareOperations(MI, CCUsers))) {
675707
++MBBI;

llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,28 +2156,6 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
21562156
return 0;
21572157
}
21582158

2159-
bool SystemZInstrInfo::isLoadAndTestAsCmp(const MachineInstr &MI) const {
2160-
// If we during isel used a load-and-test as a compare with 0, the
2161-
// def operand is dead.
2162-
return (MI.getOpcode() == SystemZ::LTEBR ||
2163-
MI.getOpcode() == SystemZ::LTDBR ||
2164-
MI.getOpcode() == SystemZ::LTXBR) &&
2165-
MI.getOperand(0).isDead();
2166-
}
2167-
2168-
bool SystemZInstrInfo::isCompareZero(const MachineInstr &Compare) const {
2169-
if (isLoadAndTestAsCmp(Compare))
2170-
return true;
2171-
return Compare.isCompare() && Compare.getNumExplicitOperands() == 2 &&
2172-
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
2173-
}
2174-
2175-
Register
2176-
SystemZInstrInfo::getCompareSourceReg(const MachineInstr &Compare) const {
2177-
assert(isCompareZero(Compare) && "Expected a compare with 0.");
2178-
return Compare.getOperand(isLoadAndTestAsCmp(Compare) ? 1 : 0).getReg();
2179-
}
2180-
21812159
bool SystemZInstrInfo::
21822160
prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const {
21832161
assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() &&

llvm/lib/Target/SystemZ/SystemZInstrInfo.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -356,17 +356,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
356356
SystemZII::FusedCompareType Type,
357357
const MachineInstr *MI = nullptr) const;
358358

359-
// Return true if this is a load and test which can be optimized the
360-
// same way as compare instruction.
361-
bool isLoadAndTestAsCmp(const MachineInstr &MI) const;
362-
363-
// Return true if Compare is a comparison against zero.
364-
bool isCompareZero(const MachineInstr &Compare) const;
365-
366-
// Return the source register of Compare, which is the unknown value
367-
// being tested.
368-
Register getCompareSourceReg(const MachineInstr &Compare) const;
369-
370359
// Try to find all CC users of the compare instruction (MBBI) and update
371360
// all of them to maintain equivalent behavior after swapping the compare
372361
// operands. Return false if not all users can be conclusively found and

llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp

Lines changed: 80 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,28 @@ using namespace llvm;
2020
static cl::opt<unsigned> TinyRegionLim(
2121
"tiny-region-lim", cl::Hidden, cl::init(10),
2222
cl::desc("Run limited pre-ra scheduling on regions of this size or "
23-
"smaller. Mainly for testing."));
23+
"smaller."));
24+
25+
namespace SystemZSched {
26+
enum LatencyReduction { Always, Never, More, Heuristics, CycleBased };
27+
} // namespace SystemZSched
28+
29+
static cl::opt<SystemZSched::LatencyReduction> PreRALatRed(
30+
"prera-lat-red", cl::Hidden,
31+
cl::desc("Tuning of latency reduction during pre-ra mi-scheduling."),
32+
cl::init(SystemZSched::LatencyReduction::Heuristics),
33+
cl::values(
34+
clEnumValN(SystemZSched::LatencyReduction::Always, "always",
35+
"Reduce scheduled latency always."),
36+
clEnumValN(SystemZSched::LatencyReduction::Never, "never",
37+
"Don't reduce scheduled latency."),
38+
clEnumValN(SystemZSched::LatencyReduction::More, "more",
39+
"Reduce scheduled latency on most DAGs."),
40+
clEnumValN(SystemZSched::LatencyReduction::Heuristics, "heuristics",
41+
"Use heuristics for reduction of scheduled latency."),
42+
clEnumValN(SystemZSched::LatencyReduction::CycleBased, "cycle-based",
43+
"Use GenericSched cycle based decisions for reduction of "
44+
"scheduled latency.")));
2445

2546
// EXPERIMENTAL
2647
static cl::opt<bool>
@@ -86,51 +107,36 @@ void SystemZPreRASchedStrategy::initializePressureSets(
86107
// If %7 is already live, there would also be 'VR16Bit -1', which is the
87108
// interesting case.
88109
//
89-
// Rather than hard coding VR16Bit and GRX32Bit PressureSets, they are
90-
// inferred below as the intersections of various register class groups.
91-
//
92-
// TODO: Could TableGen emit these directly instead?
110+
// misched-prera-pdiffs.mir tests against any future change in the
111+
// PressureSets, so simply hard-code them here:
112+
93113
if (!WITHPDIFFS)
94114
return;
115+
PrioPressureSet = SystemZ::VR16Bit;
116+
GPRPressureSet = SystemZ::GRX32Bit;
117+
}
95118

96-
auto addPSets = [&TRI](std::set<unsigned> &S, const TargetRegisterClass *RC,
97-
std::set<unsigned> *Intersect = nullptr) {
98-
for (const int *PS = TRI->getRegClassPressureSets(RC); *PS != -1; ++PS)
99-
if (!Intersect || Intersect->count(*PS))
100-
S.insert(*PS);
101-
};
102-
103-
std::set<unsigned> SetA, SetB;
104-
addPSets(SetA, &SystemZ::VR16BitRegClass);
105-
addPSets(SetA, &SystemZ::VR32BitRegClass);
106-
addPSets(SetA, &SystemZ::VR64BitRegClass);
107-
addPSets(SetA, &SystemZ::VR128BitRegClass);
108-
assert(SetA.size() == 1 && "Expected one pressure set (VR16Bit).");
109-
110-
addPSets(SetB, &SystemZ::FP16BitRegClass, &SetA);
111-
addPSets(SetB, &SystemZ::FP32BitRegClass, &SetA);
112-
addPSets(SetB, &SystemZ::FP64BitRegClass, &SetA);
113-
addPSets(SetB, &SystemZ::VF128BitRegClass, &SetA);
114-
addPSets(SetB, &SystemZ::FP128BitRegClass, &SetA);
115-
assert(SetB.size() == 1 && *SetA.begin() == *SetB.begin() &&
116-
"Expected one pressure set (VR16Bit).");
117-
PrioPressureSet = *SetB.begin();
118-
119-
SetA.clear();
120-
SetB.clear();
121-
addPSets(SetA, &SystemZ::GRX32BitRegClass);
122-
addPSets(SetA, &SystemZ::GR64BitRegClass);
123-
addPSets(SetA, &SystemZ::ADDR64BitRegClass);
124-
addPSets(SetA, &SystemZ::GR128BitRegClass);
125-
addPSets(SetA, &SystemZ::ADDR128BitRegClass);
126-
assert(SetA.size() == 1 && "Expected one pressure set (GRX32Bit).");
127-
128-
addPSets(SetB, &SystemZ::GR32BitRegClass, &SetA);
129-
addPSets(SetB, &SystemZ::GRH32BitRegClass, &SetA);
130-
addPSets(SetB, &SystemZ::ADDR32BitRegClass, &SetA);
131-
assert(SetB.size() == 1 && *SetA.begin() == *SetB.begin() &&
132-
"Expected one pressure set (GRX32Bit).");
133-
GPRPressureSet = *SetB.begin();
119+
bool SystemZPreRASchedStrategy::shouldReduceLatency(SchedBoundary *Zone) const {
120+
if (PreRALatRed == SystemZSched::Always)
121+
return true;
122+
if (PreRALatRed == SystemZSched::Never)
123+
return false;
124+
125+
if (IsWideDAG)
126+
return false;
127+
128+
if (PreRALatRed == SystemZSched::More)
129+
return true;
130+
if (PreRALatRed == SystemZSched::Heuristics)
131+
return HasDataSequences || Rem.IsAcyclicLatencyLimited;
132+
133+
if (PreRALatRed == SystemZSched::CycleBased) {
134+
CandPolicy P;
135+
getRemLat(Zone);
136+
return GenericScheduler::shouldReduceLatency(P, *Zone, false, RemLat);
137+
}
138+
139+
llvm_unreachable("Unhandled option value.");
134140
}
135141

136142
unsigned SystemZPreRASchedStrategy::getRemLat(SchedBoundary *Zone) const {
@@ -139,6 +145,12 @@ unsigned SystemZPreRASchedStrategy::getRemLat(SchedBoundary *Zone) const {
139145
return RemLat;
140146
}
141147

148+
static bool isStoreOfVReg(const MachineInstr *MI) {
149+
return MI->mayStore() && !MI->mayLoad() && MI->getNumOperands() &&
150+
isVirtRegUse(MI->getOperand(0)) &&
151+
MI->getDesc().operands()[0].OperandType != MCOI::OPERAND_MEMORY;
152+
}
153+
142154
void SystemZPreRASchedStrategy::initializeStoresGroup() {
143155
StoresGroup.clear();
144156
FirstStoreInGroupScheduled = false;
@@ -149,18 +161,7 @@ void SystemZPreRASchedStrategy::initializeStoresGroup() {
149161
const MachineInstr *MI = SU->getInstr();
150162
if (!MI->getNumOperands() || MI->isCopy())
151163
continue;
152-
153-
bool HasVirtDef = false;
154-
bool HasVirtUse = false;
155-
for (unsigned I = 0; I < MI->getDesc().getNumOperands(); ++I) {
156-
const MachineOperand &MO = MI->getOperand(I);
157-
if (isVirtRegDef(MO) && !MO.isDead())
158-
HasVirtDef = true;
159-
else if (isVirtRegUse(MO) &&
160-
MI->getDesc().operands()[I].OperandType != MCOI::OPERAND_MEMORY)
161-
HasVirtUse = true;
162-
}
163-
bool IsStore = !HasVirtDef && HasVirtUse;
164+
bool IsStore = isStoreOfVReg(MI);
164165

165166
// Find a group of stores that all are at the bottom while avoiding
166167
// regions with any additional group of lesser depth.
@@ -198,8 +199,14 @@ static int biasPhysRegExtra(const SUnit *SU) {
198199
const MachineInstr *MI = SU->getInstr();
199200
if (MI->getNumOperands() && !MI->isCopy()) {
200201
const MachineOperand &DefMO = MI->getOperand(0);
201-
if (isPhysRegDef(DefMO))
202+
if (isPhysRegDef(DefMO)) {
203+
#ifndef NDEBUG
204+
for (const MachineOperand &MO : MI->all_uses())
205+
assert(!MO.getReg().isVirtual() &&
206+
"Did not expect a virtual register use operand.");
207+
#endif
202208
return 1;
209+
}
203210
}
204211

205212
return 0;
@@ -216,9 +223,8 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
216223

217224
const MachineOperand &MO0 = MI->getOperand(0);
218225
assert(!isPhysRegDef(MO0) && "Did not expect physreg def!");
219-
bool IsLoad =
220-
isRegDef(MO0) && !MO0.isDead() && !IsRedefining[SU->NodeNum];
221-
bool IsStore = (!isRegDef(MO0) || MO0.isDead());
226+
bool IsLoad = isRegDef(MO0) && !MO0.isDead() && !IsRedefining[SU->NodeNum];
227+
bool IsStore = isStoreOfVReg(MI);
222228
bool PreservesSchedLat = SU->getHeight() <= Zone->getScheduledLatency();
223229
const unsigned Cycles = 2;
224230
unsigned Margin = SchedModel->getIssueWidth() * (Cycles + SU->Latency - 1);
@@ -272,12 +278,10 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
272278
if (IsLoad) {
273279
bool PrioDefNoKill = PrioPressureChange == -RegWeight;
274280
bool GPRDefNoKill = GPRPressureChange == -RegWeight;
275-
UsesLivePrio =
276-
(PrioDefNoKill || (!PrioPressureChange && GPRDefNoKill));
281+
UsesLivePrio = (PrioDefNoKill || (!PrioPressureChange && GPRDefNoKill));
277282
UsesLiveAll = (PrioDefNoKill && !GPRPressureChange) ||
278283
(!PrioPressureChange && GPRDefNoKill);
279-
}
280-
else if (IsStore && FirstStoreInGroupScheduled && StoresGroup.count(SU)) {
284+
} else if (IsStore && FirstStoreInGroupScheduled && StoresGroup.count(SU)) {
281285
bool SrcKill = !DAG->getBotRPTracker().isRegLive(MO0.getReg());
282286
StoreKill =
283287
SrcKill && (PrioPressureChange == RegWeight ||
@@ -344,7 +348,7 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
344348
// Don't extend the scheduled latency in regions with many nodes in
345349
// simple data sequences, or for (single block loop) regions that are
346350
// acyclically (within a single loop iteration) latency limited.
347-
if ((HasDataSequences || Rem.IsAcyclicLatencyLimited) &&
351+
if (shouldReduceLatency(Zone) &&
348352
TryCand.SU->getHeight() != Cand.SU->getHeight() &&
349353
(std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
350354
Zone->getScheduledLatency())) {
@@ -359,7 +363,7 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
359363
}
360364
}
361365

362-
// Weak edges are for clustering and other constraints.
366+
// Weak edges help copy coalescing.
363367
if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
364368
Weak))
365369
return TryCand.Reason != NoCand;
@@ -378,9 +382,7 @@ void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
378382
unsigned NumRegionInstrs) {
379383
TinyRegion = NumRegionInstrs <= TinyRegionLim;
380384

381-
// RegionPolicy.ShouldTrackPressure = !TinyRegion;
382-
// Some exceptions are made, see initialize().
383-
RegionPolicy.ShouldTrackPressure = NumRegionInstrs > 6;
385+
RegionPolicy.ShouldTrackPressure = !TinyRegion;
384386

385387
// These heuristics has so far seemed to work better without adding a
386388
// top-down boundary.
@@ -407,7 +409,8 @@ void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
407409
unsigned DAGHeight = 0;
408410
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
409411
DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
410-
if ((HasDataSequences = DAG->SUnits.size() < 3 * std::max(DAGHeight, 1u))) {
412+
IsWideDAG = DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
413+
if ((HasDataSequences = !IsWideDAG)) {
411414
unsigned CurrSequence = 0, NumSeqNodes = 0;
412415
auto countSequence = [&CurrSequence, &NumSeqNodes]() {
413416
NumSeqNodes += CurrSequence >= 2 ? CurrSequence : 0;
@@ -443,14 +446,15 @@ void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
443446

444447
// If MI uses the register it defines, record it one time here.
445448
IsRedefining = std::vector<bool>(DAG->SUnits.size(), false);
446-
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
447-
const MachineInstr *MI = DAG->SUnits[Idx].getInstr();
448-
if (MI->getNumOperands()) {
449-
const MachineOperand &DefMO = MI->getOperand(0);
450-
if (isVirtRegDef(DefMO))
451-
IsRedefining[Idx] = MI->readsVirtualRegister(DefMO.getReg());
449+
if (!WITHPDIFFS) // This is not needed if using PressureDiffs.
450+
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
451+
const MachineInstr *MI = DAG->SUnits[Idx].getInstr();
452+
if (MI->getNumOperands()) {
453+
const MachineOperand &DefMO = MI->getOperand(0);
454+
if (isVirtRegDef(DefMO))
455+
IsRedefining[Idx] = MI->readsVirtualRegister(DefMO.getReg());
456+
}
452457
}
453-
}
454458

455459
initializeStoresGroup();
456460
LLVM_DEBUG(if (!StoresGroup.empty()) dbgs()

0 commit comments

Comments
 (0)