@@ -214,11 +214,11 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
214214 if (!MI->getNumOperands () || MI->isCopy ())
215215 return 0 ;
216216
217- const MachineOperand &DefMO = MI->getOperand (0 );
218- assert (!isPhysRegDef (DefMO ) && " Did not expect physreg def!" );
217+ const MachineOperand &MO0 = MI->getOperand (0 );
218+ assert (!isPhysRegDef (MO0 ) && " Did not expect physreg def!" );
219219 bool IsLoad =
220- isRegDef (DefMO ) && !DefMO .isDead () && !IsRedefining[SU->NodeNum ];
221- bool IsStore = (!isRegDef (DefMO ) || DefMO .isDead ());
220+ isRegDef (MO0 ) && !MO0 .isDead () && !IsRedefining[SU->NodeNum ];
221+ bool IsStore = (!isRegDef (MO0 ) || MO0 .isDead ());
222222 bool PreservesSchedLat = SU->getHeight () <= Zone->getScheduledLatency ();
223223 const unsigned Cycles = 2 ;
224224 unsigned Margin = SchedModel->getIssueWidth () * (Cycles + SU->Latency - 1 );
@@ -252,10 +252,10 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
252252 // Find the interesting properties.
253253 // Prioritize FP: Ignore GPR/Addr kills with an FP def.
254254 UsesLivePrio = IsLoad && !PrioKill &&
255- (isPrioVirtReg (DefMO .getReg (), &DAG->MRI ) || !GPRKill);
255+ (isPrioVirtReg (MO0 .getReg (), &DAG->MRI ) || !GPRKill);
256256 UsesLiveAll = !PrioKill && !GPRKill;
257257 StoreKill = (PrioKill || (!HasPrioUse && GPRKill));
258- } else {
258+ } else if (MO0. isReg () && MO0. getReg (). isVirtual ()) {
259259 int PrioPressureChange = 0 ;
260260 int GPRPressureChange = 0 ;
261261 const PressureDiff &PDiff = DAG->getPressureDiff (SU);
@@ -267,22 +267,21 @@ int SystemZPreRASchedStrategy::computeSULivenessScore(
267267 else if (PC.getPSet () == GPRPressureSet)
268268 GPRPressureChange += PC.getUnitInc ();
269269 }
270+ const TargetRegisterClass *RC = DAG->MRI .getRegClass (MO0.getReg ());
271+ int RegWeight = TRI->getRegClassWeight (RC).RegWeight ;
270272 if (IsLoad) {
271- const TargetRegisterClass *RC = DAG->MRI .getRegClass (DefMO.getReg ());
272- int DefWeight = -int (TRI->getRegClassWeight (RC).RegWeight );
273- bool PrioDefNoKill = PrioPressureChange == DefWeight;
274- bool GPRDefNoKill = GPRPressureChange == DefWeight;
273+ bool PrioDefNoKill = PrioPressureChange == -RegWeight;
274+ bool GPRDefNoKill = GPRPressureChange == -RegWeight;
275275 UsesLivePrio =
276- (PrioDefNoKill || (PrioPressureChange == 0 && GPRDefNoKill));
277- UsesLiveAll = (PrioDefNoKill && GPRPressureChange == 0 ) ||
278- (PrioPressureChange == 0 && GPRDefNoKill);
276+ (PrioDefNoKill || (! PrioPressureChange && GPRDefNoKill));
277+ UsesLiveAll = (PrioDefNoKill && ! GPRPressureChange) ||
278+ (! PrioPressureChange && GPRDefNoKill);
279279 }
280- if (IsStore && FirstStoreInGroupScheduled && StoresGroup.count (SU)) {
281- Register SrcReg = MI->getOperand (0 ).getReg ();
282- bool SrcKill = !DAG->getBotRPTracker ().isRegLive (SrcReg);
280+ else if (IsStore && FirstStoreInGroupScheduled && StoresGroup.count (SU)) {
281+ bool SrcKill = !DAG->getBotRPTracker ().isRegLive (MO0.getReg ());
283282 StoreKill =
284- SrcKill && (PrioPressureChange > 0 ||
285- (PrioPressureChange == 0 && GPRPressureChange > 0 ));
283+ SrcKill && (PrioPressureChange == RegWeight ||
284+ (! PrioPressureChange && GPRPressureChange == RegWeight ));
286285 }
287286 }
288287
@@ -342,8 +341,10 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
342341 if (tryLess (TryCandScore, CandScore, TryCand, Cand, LivenessReduce))
343342 return TryCand.Reason != NoCand;
344343
345- // Don't extend the scheduled latency.
346- if (ShouldReduceLatency &&
344+ // Don't extend the scheduled latency in regions with many nodes in
345+ // simple data sequences, or for (single block loop) regions that are
346+ // acyclically (within a single loop iteration) latency limited.
347+ if ((HasDataSequences || Rem.IsAcyclicLatencyLimited ) &&
347348 TryCand.SU ->getHeight () != Cand.SU ->getHeight () &&
348349 (std::max (TryCand.SU ->getHeight (), Cand.SU ->getHeight ()) >
349350 Zone->getScheduledLatency ())) {
@@ -392,79 +393,53 @@ void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,
392393void SystemZPreRASchedStrategy::initialize (ScheduleDAGMI *dag) {
393394 GenericScheduler::initialize (dag);
394395
395- const SystemZInstrInfo *TII = static_cast <const SystemZInstrInfo *>(DAG->TII );
396- if (TinyRegion) {
397- // A tiny region with long latency instructions is better handled using
398- // normal heuristics, except in regions that have COPYs of a physreg both
399- // ways and/or have a compare-0 likely to be eliminated.
400- const SUnit *CmpZeroSU = nullptr ;
401- const SUnit *CmpSrcSU = nullptr ;
402- Register CmpSrcReg = 0 ;
403- bool OtherCCClob = false ;
404- unsigned MaxLat = 0 ;
405- std::set<Register> PRegs;
406- bool CopysPRegDep = false ;
407- for (unsigned Idx = DAG->SUnits .size () - 1 ; Idx + 1 != 0 ; --Idx) {
408- const SUnit *SU = &DAG->SUnits [Idx];
409- const MachineInstr *MI = SU->getInstr ();
410-
411- // Check for a (likely) eliminable compare-0.
412- if (TII->isCompareZero (*MI)) {
413- CmpZeroSU = SU;
414- CmpSrcReg = TII->getCompareSourceReg (*MI);
415- continue ;
416- }
417- if (MI->getNumOperands ()) {
418- const MachineOperand &DefMO = MI->getOperand (0 );
419- // Doing this instead of SU data preds happens to also handle the
420- // case where CmpSrcReg is redefined.
421- if (isVirtRegDef (DefMO) && DefMO.getReg () == CmpSrcReg &&
422- MI->getDesc ().hasImplicitDefOfPhysReg (SystemZ::CC))
423- CmpSrcSU = SU;
424- }
425- if (SU != CmpZeroSU && SU != CmpSrcSU &&
426- MI->getDesc ().hasImplicitDefOfPhysReg (SystemZ::CC))
427- OtherCCClob = true ;
428-
429- // Check for long latency instructions.
430- MaxLat = std::max (MaxLat, unsigned (SU->Latency ));
431-
432- // Check for COPYs of pregs both in and out of the region.
433- if (MI->isCopy ()) {
434- Register DstReg = MI->getOperand (0 ).getReg ();
435- Register SrcReg = MI->getOperand (1 ).getReg ();
436- if (DstReg.isPhysical () && DAG->MRI .isAllocatable (DstReg) &&
437- SrcReg.isVirtual ())
438- PRegs.insert (DstReg);
439- else if (SrcReg.isPhysical () && DAG->MRI .isAllocatable (SrcReg) &&
440- DstReg.isVirtual ()) {
441- if (!PRegs.insert (SrcReg).second )
442- CopysPRegDep = true ;
443- }
444- }
445- }
446- bool CmpElimRegion = CmpZeroSU && CmpSrcSU && OtherCCClob;
447-
448- if (DAG->SUnits .size () > 6 && MaxLat >= 6 && !CopysPRegDep &&
449- !CmpElimRegion)
450- TinyRegion = false ;
451- }
452396 LLVM_DEBUG (dbgs () << " Region is" << (TinyRegion ? " " : " not" ) << " tiny.\n " );
453397 if (TinyRegion)
454398 return ;
455399
456400 NumLeft = DAG->SUnits .size ();
457401 RemLat = ~0U ;
458402
459- // It seems to work best to include the latencies in this heuristic (as
460- // opposed to something like a "unit SU height" with all latencies counted
461- // as 1).
403+ // Enable latency reduction for a region that has a considerable amount of
404+ // data sequences so that they become interlaved. These are SUs that only
405+ // have one data predecessor / successor edge(s) to their adjacent
406+ // SU(s). Disable if region has many SUs relative to the overall height.
462407 unsigned DAGHeight = 0 ;
463408 for (unsigned Idx = 0 , End = DAG->SUnits .size (); Idx != End; ++Idx)
464409 DAGHeight = std::max (DAGHeight, DAG->SUnits [Idx].getHeight ());
465- ShouldReduceLatency = DAG->SUnits .size () < 3 * std::max (DAGHeight, 1u );
466- LLVM_DEBUG (if (ShouldReduceLatency) dbgs () << " Latency scheduling enabled.\n " ;
467- else dbgs () << " Latency scheduling disabled.\n " ;);
410+ if ((HasDataSequences = DAG->SUnits .size () < 3 * std::max (DAGHeight, 1u ))) {
411+ unsigned CurrSequence = 0 , NumSeqNodes = 0 ;
412+ auto countSequence = [&CurrSequence, &NumSeqNodes]() {
413+ NumSeqNodes += CurrSequence >= 2 ? CurrSequence : 0 ;
414+ CurrSequence = 0 ;
415+ };
416+ for (unsigned Idx = 0 , End = DAG->SUnits .size (); Idx != End; ++Idx) {
417+ const SUnit *SU = &DAG->SUnits [Idx];
418+ bool InDataSequence = true ;
419+ unsigned NumPreds = 0 ;
420+ for (const SDep &Pred : SU->Preds )
421+ if (++NumPreds != 1 || Pred.getKind () != SDep::Data ||
422+ Pred.getSUnit ()->NodeNum != Idx - 1 )
423+ InDataSequence = false ;
424+ unsigned NumSuccs = 0 ;
425+ for (const SDep &Succ : SU->Succs )
426+ if (Succ.getSUnit () != &DAG->ExitSU &&
427+ (++NumSuccs != 1 || Succ.getKind () != SDep::Data))
428+ InDataSequence = false ;
429+ if (!InDataSequence || !NumPreds)
430+ countSequence ();
431+ if (InDataSequence)
432+ CurrSequence++;
433+ }
434+ countSequence ();
435+ if (NumSeqNodes >= DAG->SUnits .size () / 4 )
436+ LLVM_DEBUG (dbgs () << " Number of nodes in def-use sequences: "
437+ << NumSeqNodes << " . " ;);
438+ else
439+ HasDataSequences = false ;
440+ }
441+ LLVM_DEBUG (dbgs () << " Latency scheduling " << (HasDataSequences ? " " : " not " )
442+ << " enabled for data sequences.\n " ;);
468443
469444 // If MI uses the register it defines, record it one time here.
470445 IsRedefining = std::vector<bool >(DAG->SUnits .size (), false );
0 commit comments