@@ -197,7 +197,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
197197 }
198198
199199 void createAdjacencyStructure (SwingSchedulerDAG *DAG);
200- bool circuit (int V, int S, NodeSetType &NodeSets, bool HasBackedge = false );
200+ bool circuit (int V, int S, NodeSetType &NodeSets,
201+ const SwingSchedulerDAG *DAG, bool HasBackedge = false );
201202 void unblock (int U);
202203 };
203204
@@ -260,7 +261,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
260261 return Source->getInstr ()->isPHI () || Dep.getSUnit ()->getInstr ()->isPHI ();
261262 }
262263
263- bool isLoopCarriedDep (SUnit *Source, const SDep &Dep, bool isSucc = true );
264+ bool isLoopCarriedDep (SUnit *Source, const SDep &Dep,
265+ bool isSucc = true ) const ;
264266
265267 // / The distance function, which indicates that operation V of iteration I
266268 // / depends on operations U of iteration I-distance.
@@ -311,7 +313,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
311313 void computeNodeOrder (NodeSetType &NodeSets);
312314 void checkValidNodeOrder (const NodeSetType &Circuits) const ;
313315 bool schedulePipeline (SMSchedule &Schedule);
314- bool computeDelta (MachineInstr &MI, unsigned &Delta);
316+ bool computeDelta (MachineInstr &MI, unsigned &Delta) const ;
315317 MachineInstr *findDefInLoop (Register Reg);
316318 bool canUseLastOffsetValue (MachineInstr *MI, unsigned &BasePos,
317319 unsigned &OffsetPos, unsigned &NewBase,
@@ -339,24 +341,56 @@ class NodeSet {
339341 using iterator = SetVector<SUnit *>::const_iterator;
340342
341343 NodeSet () = default ;
342- NodeSet (iterator S, iterator E) : Nodes(S, E), HasRecurrence(true ) {
343- Latency = 0 ;
344- for (const SUnit *Node : Nodes) {
345- DenseMap<SUnit *, unsigned > SuccSUnitLatency;
346- for (const SDep &Succ : Node->Succs ) {
347- auto SuccSUnit = Succ.getSUnit ();
348- if (!Nodes.count (SuccSUnit))
344+ NodeSet (iterator S, iterator E, const SwingSchedulerDAG *DAG)
345+ : Nodes(S, E), HasRecurrence(true ) {
346+ // Calculate the latency of this node set.
347+ // Example to demonstrate the calculation:
348+ // Given: N0 -> N1 -> N2 -> N0
349+ // Edges:
350+ // (N0 -> N1, 3)
351+ // (N0 -> N1, 5)
352+ // (N1 -> N2, 2)
353+ // (N2 -> N0, 1)
354+ // The total latency which is a lower bound of the recurrence MII is the
355+ // longest path from N0 back to N0 given only the edges of this node set.
356+ // In this example, the latency is: 5 + 2 + 1 = 8.
357+ //
358+ // Hold a map from each SUnit in the circle to the maximum distance from the
359+ // source node by only considering the nodes.
360+ DenseMap<SUnit *, unsigned > SUnitToDistance;
361+ for (auto *Node : Nodes)
362+ SUnitToDistance[Node] = 0 ;
363+
364+ for (unsigned I = 1 , E = Nodes.size (); I <= E; ++I) {
365+ SUnit *U = Nodes[I - 1 ];
366+ SUnit *V = Nodes[I % Nodes.size ()];
367+ for (const SDep &Succ : U->Succs ) {
368+ SUnit *SuccSUnit = Succ.getSUnit ();
369+ if (V != SuccSUnit)
349370 continue ;
350- unsigned CurLatency = Succ.getLatency ();
351- unsigned MaxLatency = 0 ;
352- if (SuccSUnitLatency.count (SuccSUnit))
353- MaxLatency = SuccSUnitLatency[SuccSUnit];
354- if (CurLatency > MaxLatency)
355- SuccSUnitLatency[SuccSUnit] = CurLatency;
371+ if (SUnitToDistance[U] + Succ.getLatency () > SUnitToDistance[V]) {
372+ SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency ();
373+ }
356374 }
357- for (auto SUnitLatency : SuccSUnitLatency)
358- Latency += SUnitLatency.second ;
359375 }
376+ // Handle a back-edge in loop carried dependencies
377+ SUnit *FirstNode = Nodes[0 ];
378+ SUnit *LastNode = Nodes[Nodes.size () - 1 ];
379+
380+ for (auto &PI : LastNode->Preds ) {
381+ // If we have an order dep that is potentially loop carried then a
382+ // back-edge exists between the last node and the first node that isn't
383+ // modeled in the DAG. Handle it manually by adding 1 to the distance of
384+ // the last node.
385+ if (PI.getSUnit () != FirstNode || PI.getKind () != SDep::Order ||
386+ !DAG->isLoopCarriedDep (LastNode, PI, false ))
387+ continue ;
388+ SUnitToDistance[FirstNode] =
389+ std::max (SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1 );
390+ }
391+
392+ // The latency is the distance from the source node to itself.
393+ Latency = SUnitToDistance[Nodes.front ()];
360394 }
361395
362396 bool insert (SUnit *SU) { return Nodes.insert (SU); }
0 commit comments