22
33use crate :: entity:: SecondaryMap ;
44use crate :: flowgraph:: { BlockPredecessor , ControlFlowGraph } ;
5- use crate :: inst_predicates;
65use crate :: ir:: { Block , ExpandedProgramPoint , Function , Inst , Layout , ProgramOrder , Value } ;
76use crate :: packed_option:: PackedOption ;
87use crate :: timing;
@@ -16,8 +15,7 @@ use core::mem;
1615const STRIDE : u32 = 4 ;
1716
1817/// Special RPO numbers used during `compute_postorder`.
19- const DONE : u32 = 1 ;
20- const SEEN : u32 = 2 ;
18+ const SEEN : u32 = 1 ;
2119
2220/// Dominator tree node. We keep one of these per block.
2321#[ derive( Clone , Default ) ]
@@ -36,6 +34,12 @@ struct DomNode {
3634 idom : PackedOption < Inst > ,
3735}
3836
37+ /// DFT stack state marker for computing the cfg postorder.
38+ enum Visit {
39+ First ,
40+ Last ,
41+ }
42+
3943/// The dominator tree for a single function.
4044pub struct DominatorTree {
4145 nodes : SecondaryMap < Block , DomNode > ,
@@ -44,7 +48,7 @@ pub struct DominatorTree {
4448 postorder : Vec < Block > ,
4549
4650 /// Scratch memory used by `compute_postorder()`.
47- stack : Vec < Block > ,
51+ stack : Vec < ( Visit , Block ) > ,
4852
4953 valid : bool ,
5054}
@@ -275,93 +279,64 @@ impl DominatorTree {
275279
276280 // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
277281 // post-order of the blocks that are reachable form the entry block. A DFT post-order is not
278- // unique. The specific order we get is controlled by two factors:
279- //
280- // 1. The order each node's children are visited, and
281- // 2. The method used for pruning graph edges to get a tree.
282- //
283- // There are two ways of viewing the CFG as a graph:
284- //
285- // 1. Each block is a node, with outgoing edges for all the branches in the block.
286- // 2. Each basic block is a node, with outgoing edges for the single branch at the end of
287- // the BB. (A block is a linear sequence of basic blocks).
288- //
289- // The first graph is a contraction of the second one. We want to compute a block post-order
290- // that is compatible both graph interpretations. That is, if you compute a BB post-order
291- // and then remove those BBs that do not correspond to block headers, you get a post-order of
292- // the block graph.
293- //
294- // Node child order:
295- //
296- // In the BB graph, we always go down the fall-through path first and follow the branch
297- // destination second.
298- //
299- // In the block graph, this is equivalent to visiting block successors in a bottom-up
300- // order, starting from the destination of the block's terminating jump, ending at the
301- // destination of the first branch in the block.
302- //
303- // Edge pruning:
282+ // unique. The specific order we get is controlled by the order each node's children are
283+ // visited.
304284 //
305- // In the BB graph, we keep an edge to a block the first time we visit the *source* side
306- // of the edge. Any subsequent edges to the same block are pruned.
285+ // We view the CFG as a graph where each `BlockCall` value of a terminating branch
286+ // instruction is an edge. A consequence of this is that we visit successor nodes in the
287+ // reverse order specified by the branch instruction that terminates the basic block.
288+ // (Reversed because we are using a stack to control traversal, and push the successors in
289+ // the order the branch instruction specifies -- there's no good reason for this particular
290+ // order.)
307291 //
308- // The equivalent tree is reached in the block graph by keeping the first edge to a block
309- // in a top-down traversal of the successors. (And then visiting edges in a bottom-up
310- // order).
311- //
312- // This pruning method makes it possible to compute the DFT without storing lots of
313- // information about the progress through a block.
314-
315292 // During this algorithm only, use `rpo_number` to hold the following state:
316293 //
317- // 0: block has not yet been reached in the pre-order.
318- // SEEN: block has been pushed on the stack but successors not yet pushed.
319- // DONE: Successors pushed.
294+ // 0: block has not yet had its first visit
295+ // SEEN: block has been visited at least once, implying that all of its successors are on
296+ // the stack
320297
321298 match func. layout . entry_block ( ) {
322299 Some ( block) => {
323- self . stack . push ( block) ;
324- self . nodes [ block] . rpo_number = SEEN ;
300+ self . stack . push ( ( Visit :: First , block) ) ;
325301 }
326302 None => return ,
327303 }
328304
329- while let Some ( block) = self . stack . pop ( ) {
330- match self . nodes [ block] . rpo_number {
331- SEEN => {
332- // This is the first time we pop the block, so we need to scan its successors and
333- // then revisit it.
334- self . nodes [ block] . rpo_number = DONE ;
335- self . stack . push ( block) ;
336- self . push_successors ( func, block) ;
305+ while let Some ( ( visit, block) ) = self . stack . pop ( ) {
306+ match visit {
307+ Visit :: First => {
308+ if self . nodes [ block] . rpo_number == 0 {
309+ // This is the first time we pop the block, so we need to scan its
310+ // successors and then revisit it.
311+ self . nodes [ block] . rpo_number = SEEN ;
312+ self . stack . push ( ( Visit :: Last , block) ) ;
313+ if let Some ( inst) = func. stencil . layout . last_inst ( block) {
314+ for block in func. stencil . dfg . insts [ inst]
315+ . branch_destination ( & func. stencil . dfg . jump_tables )
316+ . iter ( )
317+ {
318+ let succ = block. block ( & func. stencil . dfg . value_lists ) ;
319+
320+ // This is purely an optimization to avoid additional iterations of
321+ // the loop, and is not required; it's merely inlining the check
322+ // from the outer conditional of this case to avoid the extra loop
323+ // iteration.
324+ if self . nodes [ succ] . rpo_number == 0 {
325+ self . stack . push ( ( Visit :: First , succ) )
326+ }
327+ }
328+ }
329+ }
337330 }
338- DONE => {
339- // This is the second time we pop the block, so all successors have been
340- // processed .
331+
332+ Visit :: Last => {
333+ // We've finished all this node's successors .
341334 self . postorder . push ( block) ;
342335 }
343- _ => unreachable ! ( ) ,
344336 }
345337 }
346338 }
347339
348- /// Push `block` successors onto `self.stack`, filtering out those that have already been seen.
349- ///
350- /// The successors are pushed in program order which is important to get a split-invariant
351- /// post-order. Split-invariant means that if a block is split in two, we get the same
352- /// post-order except for the insertion of the new block header at the split point.
353- fn push_successors ( & mut self , func : & Function , block : Block ) {
354- inst_predicates:: visit_block_succs ( func, block, |_, succ, _| self . push_if_unseen ( succ) )
355- }
356-
357- /// Push `block` onto `self.stack` if it has not already been seen.
358- fn push_if_unseen ( & mut self , block : Block ) {
359- if self . nodes [ block] . rpo_number == 0 {
360- self . nodes [ block] . rpo_number = SEEN ;
361- self . stack . push ( block) ;
362- }
363- }
364-
365340 /// Build a dominator tree from a control flow graph using Keith D. Cooper's
366341 /// "Simple, Fast Dominator Algorithm."
367342 fn compute_domtree ( & mut self , func : & Function , cfg : & ControlFlowGraph ) {
@@ -728,7 +703,7 @@ mod tests {
728703 // } block3:jump block1
729704 // } block3
730705
731- assert_eq ! ( dt. cfg_postorder( ) , & [ block2 , block0 , block1, block3] ) ;
706+ assert_eq ! ( dt. cfg_postorder( ) , & [ block0 , block2 , block1, block3] ) ;
732707
733708 assert_eq ! ( cur. func. layout. entry_block( ) . unwrap( ) , block3) ;
734709 assert_eq ! ( dt. idom( block3) , None ) ;
0 commit comments