@@ -20,7 +20,28 @@ using namespace llvm;
2020static cl::opt<unsigned > TinyRegionLim (
2121 " tiny-region-lim" , cl::Hidden, cl::init(10 ),
2222 cl::desc(" Run limited pre-ra scheduling on regions of this size or "
23- " smaller. Mainly for testing." ));
23+ " smaller." ));
24+
25+ namespace SystemZSched {
26+ enum LatencyReduction { Always, Never, More, Heuristics, CycleBased };
27+ } // namespace SystemZSched
28+
29+ static cl::opt<SystemZSched::LatencyReduction> PreRALatRed (
30+ " prera-lat-red" , cl::Hidden,
31+ cl::desc (" Tuning of latency reduction during pre-ra mi-scheduling." ),
32+ cl::init(SystemZSched::LatencyReduction::Heuristics),
33+ cl::values(
34+ clEnumValN (SystemZSched::LatencyReduction::Always, " always" ,
35+ " Reduce scheduled latency always." ),
36+ clEnumValN(SystemZSched::LatencyReduction::Never, " never" ,
37+ " Don't reduce scheduled latency." ),
38+ clEnumValN(SystemZSched::LatencyReduction::More, " more" ,
39+ " Reduce scheduled latency on most DAGs." ),
40+ clEnumValN(SystemZSched::LatencyReduction::Heuristics, " heuristics" ,
41+ " Use heuristics for reduction of scheduled latency." ),
42+ clEnumValN(SystemZSched::LatencyReduction::CycleBased, " cycle-based" ,
43+ " Use GenericSched cycle based decisions for reduction of "
44+ " scheduled latency." )));
2445
2546// EXPERIMENTAL
2647static cl::opt<bool >
@@ -86,51 +107,36 @@ void SystemZPreRASchedStrategy::initializePressureSets(
86107 // If %7 is already live, there would also be 'VR16Bit -1', which is the
87108 // interesting case.
88109 //
89- // Rather than hard coding VR16Bit and GRX32Bit PressureSets, they are
90- // inferred below as the intersections of various register class groups.
91- //
92- // TODO: Could TableGen emit these directly instead?
110+ // misched-prera-pdiffs.mir tests against any future change in the
111+ // PressureSets, so simply hard-code them here:
112+
93113 if (!WITHPDIFFS)
94114 return ;
115+ PrioPressureSet = SystemZ::VR16Bit;
116+ GPRPressureSet = SystemZ::GRX32Bit;
117+ }
95118
96- auto addPSets = [&TRI](std::set<unsigned > &S, const TargetRegisterClass *RC,
97- std::set<unsigned > *Intersect = nullptr ) {
98- for (const int *PS = TRI->getRegClassPressureSets (RC); *PS != -1 ; ++PS)
99- if (!Intersect || Intersect->count (*PS))
100- S.insert (*PS);
101- };
102-
103- std::set<unsigned > SetA, SetB;
104- addPSets (SetA, &SystemZ::VR16BitRegClass);
105- addPSets (SetA, &SystemZ::VR32BitRegClass);
106- addPSets (SetA, &SystemZ::VR64BitRegClass);
107- addPSets (SetA, &SystemZ::VR128BitRegClass);
108- assert (SetA.size () == 1 && " Expected one pressure set (VR16Bit)." );
109-
110- addPSets (SetB, &SystemZ::FP16BitRegClass, &SetA);
111- addPSets (SetB, &SystemZ::FP32BitRegClass, &SetA);
112- addPSets (SetB, &SystemZ::FP64BitRegClass, &SetA);
113- addPSets (SetB, &SystemZ::VF128BitRegClass, &SetA);
114- addPSets (SetB, &SystemZ::FP128BitRegClass, &SetA);
115- assert (SetB.size () == 1 && *SetA.begin () == *SetB.begin () &&
116- " Expected one pressure set (VR16Bit)." );
117- PrioPressureSet = *SetB.begin ();
118-
119- SetA.clear ();
120- SetB.clear ();
121- addPSets (SetA, &SystemZ::GRX32BitRegClass);
122- addPSets (SetA, &SystemZ::GR64BitRegClass);
123- addPSets (SetA, &SystemZ::ADDR64BitRegClass);
124- addPSets (SetA, &SystemZ::GR128BitRegClass);
125- addPSets (SetA, &SystemZ::ADDR128BitRegClass);
126- assert (SetA.size () == 1 && " Expected one pressure set (GRX32Bit)." );
127-
128- addPSets (SetB, &SystemZ::GR32BitRegClass, &SetA);
129- addPSets (SetB, &SystemZ::GRH32BitRegClass, &SetA);
130- addPSets (SetB, &SystemZ::ADDR32BitRegClass, &SetA);
131- assert (SetB.size () == 1 && *SetA.begin () == *SetB.begin () &&
132- " Expected one pressure set (GRX32Bit)." );
133- GPRPressureSet = *SetB.begin ();
119+ bool SystemZPreRASchedStrategy::shouldReduceLatency (SchedBoundary *Zone) const {
120+ if (PreRALatRed == SystemZSched::Always)
121+ return true ;
122+ if (PreRALatRed == SystemZSched::Never)
123+ return false ;
124+
125+ if (IsWideDAG)
126+ return false ;
127+
128+ if (PreRALatRed == SystemZSched::More)
129+ return true ;
130+ if (PreRALatRed == SystemZSched::Heuristics)
131+ return HasDataSequences || Rem.IsAcyclicLatencyLimited ;
132+
133+ if (PreRALatRed == SystemZSched::CycleBased) {
134+ CandPolicy P;
135+ getRemLat (Zone);
136+ return GenericScheduler::shouldReduceLatency (P, *Zone, false , RemLat);
137+ }
138+
139+ llvm_unreachable (" Unhandled option value." );
134140}
135141
136142unsigned SystemZPreRASchedStrategy::getRemLat (SchedBoundary *Zone) const {
@@ -139,6 +145,12 @@ unsigned SystemZPreRASchedStrategy::getRemLat(SchedBoundary *Zone) const {
139145 return RemLat;
140146}
141147
148+ static bool isStoreOfVReg (const MachineInstr *MI) {
149+ return MI->mayStore () && !MI->mayLoad () && MI->getNumOperands () &&
150+ isVirtRegUse (MI->getOperand (0 )) &&
151+ MI->getDesc ().operands ()[0 ].OperandType != MCOI::OPERAND_MEMORY;
152+ }
153+
142154void SystemZPreRASchedStrategy::initializeStoresGroup () {
143155 StoresGroup.clear ();
144156 FirstStoreInGroupScheduled = false ;
@@ -149,18 +161,7 @@ void SystemZPreRASchedStrategy::initializeStoresGroup() {
149161 const MachineInstr *MI = SU->getInstr ();
150162 if (!MI->getNumOperands () || MI->isCopy ())
151163 continue ;
152-
153- bool HasVirtDef = false ;
154- bool HasVirtUse = false ;
155- for (unsigned I = 0 ; I < MI->getDesc ().getNumOperands (); ++I) {
156- const MachineOperand &MO = MI->getOperand (I);
157- if (isVirtRegDef (MO) && !MO.isDead ())
158- HasVirtDef = true ;
159- else if (isVirtRegUse (MO) &&
160- MI->getDesc ().operands ()[I].OperandType != MCOI::OPERAND_MEMORY)
161- HasVirtUse = true ;
162- }
163- bool IsStore = !HasVirtDef && HasVirtUse;
164+ bool IsStore = isStoreOfVReg (MI);
164165
165166 // Find a group of stores that all are at the bottom while avoiding
166167 // regions with any additional group of lesser depth.
@@ -198,8 +199,14 @@ static int biasPhysRegExtra(const SUnit *SU) {
198199 const MachineInstr *MI = SU->getInstr ();
199200 if (MI->getNumOperands () && !MI->isCopy ()) {
200201 const MachineOperand &DefMO = MI->getOperand (0 );
201- if (isPhysRegDef (DefMO))
202+ if (isPhysRegDef (DefMO)) {
203+ #ifndef NDEBUG
204+ for (const MachineOperand &MO : MI->all_uses ())
205+ assert (!MO.getReg ().isVirtual () &&
206+ " Did not expect a virtual register use operand." );
207+ #endif
202208 return 1 ;
209+ }
203210 }
204211
205212 return 0 ;
@@ -216,9 +223,8 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
216223
217224 const MachineOperand &MO0 = MI->getOperand (0 );
218225 assert (!isPhysRegDef (MO0) && " Did not expect physreg def!" );
219- bool IsLoad =
220- isRegDef (MO0) && !MO0.isDead () && !IsRedefining[SU->NodeNum ];
221- bool IsStore = (!isRegDef (MO0) || MO0.isDead ());
226+ bool IsLoad = isRegDef (MO0) && !MO0.isDead () && !IsRedefining[SU->NodeNum ];
227+ bool IsStore = isStoreOfVReg (MI);
222228 bool PreservesSchedLat = SU->getHeight () <= Zone->getScheduledLatency ();
223229 const unsigned Cycles = 2 ;
224230 unsigned Margin = SchedModel->getIssueWidth () * (Cycles + SU->Latency - 1 );
@@ -272,12 +278,10 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
272278 if (IsLoad) {
273279 bool PrioDefNoKill = PrioPressureChange == -RegWeight;
274280 bool GPRDefNoKill = GPRPressureChange == -RegWeight;
275- UsesLivePrio =
276- (PrioDefNoKill || (!PrioPressureChange && GPRDefNoKill));
281+ UsesLivePrio = (PrioDefNoKill || (!PrioPressureChange && GPRDefNoKill));
277282 UsesLiveAll = (PrioDefNoKill && !GPRPressureChange) ||
278283 (!PrioPressureChange && GPRDefNoKill);
279- }
280- else if (IsStore && FirstStoreInGroupScheduled && StoresGroup.count (SU)) {
284+ } else if (IsStore && FirstStoreInGroupScheduled && StoresGroup.count (SU)) {
281285 bool SrcKill = !DAG->getBotRPTracker ().isRegLive (MO0.getReg ());
282286 StoreKill =
283287 SrcKill && (PrioPressureChange == RegWeight ||
@@ -344,7 +348,7 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
344348 // Don't extend the scheduled latency in regions with many nodes in
345349 // simple data sequences, or for (single block loop) regions that are
346350 // acyclically (within a single loop iteration) latency limited.
347- if ((HasDataSequences || Rem. IsAcyclicLatencyLimited ) &&
351+ if (shouldReduceLatency (Zone ) &&
348352 TryCand.SU ->getHeight () != Cand.SU ->getHeight () &&
349353 (std::max (TryCand.SU ->getHeight (), Cand.SU ->getHeight ()) >
350354 Zone->getScheduledLatency ())) {
@@ -359,7 +363,7 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
359363 }
360364 }
361365
362- // Weak edges are for clustering and other constraints .
366+ // Weak edges help copy coalescing .
363367 if (tryLess (TryCand.SU ->WeakSuccsLeft , Cand.SU ->WeakSuccsLeft , TryCand, Cand,
364368 Weak))
365369 return TryCand.Reason != NoCand;
@@ -378,9 +382,7 @@ void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
378382 unsigned NumRegionInstrs) {
379383 TinyRegion = NumRegionInstrs <= TinyRegionLim;
380384
381- // RegionPolicy.ShouldTrackPressure = !TinyRegion;
382- // Some exceptions are made, see initialize().
383- RegionPolicy.ShouldTrackPressure = NumRegionInstrs > 6 ;
385+ RegionPolicy.ShouldTrackPressure = !TinyRegion;
384386
385387 // These heuristics has so far seemed to work better without adding a
386388 // top-down boundary.
@@ -407,7 +409,8 @@ void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
407409 unsigned DAGHeight = 0 ;
408410 for (unsigned Idx = 0 , End = DAG->SUnits .size (); Idx != End; ++Idx)
409411 DAGHeight = std::max (DAGHeight, DAG->SUnits [Idx].getHeight ());
410- if ((HasDataSequences = DAG->SUnits .size () < 3 * std::max (DAGHeight, 1u ))) {
412+ IsWideDAG = DAG->SUnits .size () >= 3 * std::max (DAGHeight, 1u );
413+ if ((HasDataSequences = !IsWideDAG)) {
411414 unsigned CurrSequence = 0 , NumSeqNodes = 0 ;
412415 auto countSequence = [&CurrSequence, &NumSeqNodes]() {
413416 NumSeqNodes += CurrSequence >= 2 ? CurrSequence : 0 ;
@@ -443,14 +446,15 @@ void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) {
443446
444447 // If MI uses the register it defines, record it one time here.
445448 IsRedefining = std::vector<bool >(DAG->SUnits .size (), false );
446- for (unsigned Idx = 0 , End = DAG->SUnits .size (); Idx != End; ++Idx) {
447- const MachineInstr *MI = DAG->SUnits [Idx].getInstr ();
448- if (MI->getNumOperands ()) {
449- const MachineOperand &DefMO = MI->getOperand (0 );
450- if (isVirtRegDef (DefMO))
451- IsRedefining[Idx] = MI->readsVirtualRegister (DefMO.getReg ());
449+ if (!WITHPDIFFS) // This is not needed if using PressureDiffs.
450+ for (unsigned Idx = 0 , End = DAG->SUnits .size (); Idx != End; ++Idx) {
451+ const MachineInstr *MI = DAG->SUnits [Idx].getInstr ();
452+ if (MI->getNumOperands ()) {
453+ const MachineOperand &DefMO = MI->getOperand (0 );
454+ if (isVirtRegDef (DefMO))
455+ IsRedefining[Idx] = MI->readsVirtualRegister (DefMO.getReg ());
456+ }
452457 }
453- }
454458
455459 initializeStoresGroup ();
456460 LLVM_DEBUG (if (!StoresGroup.empty ()) dbgs ()
0 commit comments