Skip to content

Commit 5824c73

Browse files
authored
irinterp: add Tarjan SCC algorithm for reachability (JuliaLang#52966)
This PR optimizes `kill_edge!` for IR interp. The basic algorithm flow is: ``` 1. Check whether `target` of dead edge is unreachable, which is true iff: - Reducible CFG node: there are no live incoming forward edges (predecessors) - Irreducible CFG node: Tarjan's SCC algorithm reports no live incoming forward edges to the SCC 2. If `target` is dead, repeat (1) for all of its outgoing edges ``` This maintains reachability information very efficiently, especially for reducible CFG's which are overwhelmingly common. As an added bonus, `CFGReachability` can also be consulted to check which BasicBlocks are part of an irreducible loop.
2 parents bc642cf + 400ee71 commit 5824c73

File tree

6 files changed

+498
-27
lines changed

6 files changed

+498
-27
lines changed

base/compiler/compiler.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ include("compiler/validation.jl")
200200
include("compiler/ssair/basicblock.jl")
201201
include("compiler/ssair/domtree.jl")
202202
include("compiler/ssair/ir.jl")
203+
include("compiler/ssair/tarjan.jl")
203204

204205
include("compiler/abstractlattice.jl")
205206
include("compiler/inferenceresult.jl")

base/compiler/inferencestate.jl

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,17 @@ function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
170170
return TwoPhaseVectorView(tpdum.data, nelems, range)
171171
end
172172

173+
mutable struct LazyCFGReachability
174+
ir::IRCode
175+
reachability::CFGReachability
176+
LazyCFGReachability(ir::IRCode) = new(ir)
177+
end
178+
function get!(x::LazyCFGReachability)
179+
isdefined(x, :reachability) && return x.reachability
180+
domtree = construct_domtree(x.ir.cfg.blocks)
181+
return x.reachability = CFGReachability(x.ir.cfg, domtree)
182+
end
183+
173184
mutable struct LazyGenericDomtree{IsPostDom}
174185
ir::IRCode
175186
domtree::GenericDomTree{IsPostDom}
@@ -744,7 +755,7 @@ mutable struct IRInterpretationState
744755
const sptypes::Vector{VarState}
745756
const tpdum::TwoPhaseDefUseMap
746757
const ssa_refined::BitSet
747-
const lazydomtree::LazyDomtree
758+
const lazyreachability::LazyCFGReachability
748759
valid_worlds::WorldRange
749760
const edges::Vector{Any}
750761
parent # ::Union{Nothing,AbsIntState}
@@ -764,12 +775,12 @@ mutable struct IRInterpretationState
764775
append!(ir.argtypes, given_argtypes)
765776
tpdum = TwoPhaseDefUseMap(length(ir.stmts))
766777
ssa_refined = BitSet()
767-
lazydomtree = LazyDomtree(ir)
778+
lazyreachability = LazyCFGReachability(ir)
768779
valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
769780
edges = Any[]
770781
parent = nothing
771782
return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
772-
ssa_refined, lazydomtree, valid_worlds, edges, parent)
783+
ssa_refined, lazyreachability, valid_worlds, edges, parent)
773784
end
774785
end
775786

base/compiler/ssair/irinterp.jl

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ function kill_block!(ir::IRCode, bb::Int)
6363
ir[SSAValue(last(stmts))][:stmt] = ReturnNode()
6464
return
6565
end
66+
kill_block!(ir::IRCode) = (bb::Int)->kill_block!(ir, bb)
6667

6768
function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
6869
ir = irsv.ir
@@ -102,26 +103,8 @@ function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::
102103
end
103104

104105
function kill_edge!(irsv::IRInterpretationState, from::Int, to::Int)
105-
ir = irsv.ir
106-
kill_edge!(ir, from, to, update_phi!(irsv))
107-
108-
lazydomtree = irsv.lazydomtree
109-
domtree = nothing
110-
if isdefined(lazydomtree, :domtree)
111-
domtree = get!(lazydomtree)
112-
domtree_delete_edge!(domtree, ir.cfg.blocks, from, to)
113-
elseif length(ir.cfg.blocks[to].preds) != 0
114-
# TODO: If we're not maintaining the domtree, computing it just for this
115-
# is slightly overkill - just the dfs tree would be enough.
116-
domtree = get!(lazydomtree)
117-
end
118-
119-
if domtree !== nothing && bb_unreachable(domtree, to)
120-
kill_block!(ir, to)
121-
for edge in ir.cfg.blocks[to].succs
122-
kill_edge!(irsv, to, edge)
123-
end
124-
end
106+
kill_edge!(get!(irsv.lazyreachability), irsv.ir.cfg, from, to,
107+
update_phi!(irsv), kill_block!(irsv.ir))
125108
end
126109

127110
function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction, idx::Int,

base/compiler/ssair/tarjan.jl

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
using Core.Compiler: DomTree, CFG, BasicBlock, StmtRange, dominates
4+
5+
struct SCCStackItem
6+
v::Int32
7+
# which child of `v` to scan
8+
child::Int32
9+
# the location of `parent` in the stack
10+
parent::Int32
11+
# the index in the (pre-order traversal of the) DFS tree
12+
preorder::Int32
13+
# the minimum node (by pre-order index) reachable from any node in the DFS sub-tree rooted at `v`
14+
minpreorder::Int32
15+
# whether this node is reachable from BasicBlock #1
16+
live::Bool
17+
end
18+
19+
function SCCStackItem(item::SCCStackItem; child=item.child,
20+
minpreorder=item.minpreorder, live=item.live)
21+
return SCCStackItem(
22+
item.v, # v
23+
child, # child
24+
item.parent, # parent
25+
item.preorder, # preorder
26+
minpreorder, # minpreorder
27+
live, # live
28+
)
29+
end
30+
31+
struct CFGReachability
32+
irreducible::BitVector # BBNumber -> Bool
33+
scc::Vector{Int} # BBNumber -> SCCNumber
34+
domtree::DomTree
35+
36+
_worklist::Vector{Int} # for node removal
37+
_stack::Vector{SCCStackItem} # for Tarjan's SCC algorithm
38+
end
39+
40+
function CFGReachability(cfg::CFG, domtree::DomTree)
41+
n_blocks = length(cfg.blocks)
42+
reachability = CFGReachability(
43+
BitVector(undef, n_blocks), # irreducible
44+
zeros(Int, n_blocks), # scc
45+
domtree, # domtree
46+
Int[], # _worklist
47+
SCCStackItem[], # _stack
48+
)
49+
tarjan!(reachability, cfg;
50+
# reducible back-edges don't need to be considered for reachability
51+
filter = (from::Int,to::Int)->!dominates(domtree, to, from)
52+
)
53+
return reachability
54+
end
55+
56+
bb_unreachable(reach::CFGReachability, bb::Int) = reach.scc[bb] == 0
57+
58+
bb_in_irreducible_loop(reach::CFGReachability, bb::Int) = reach.irreducible[bb]
59+
60+
# Returns `true` if a node is 'rooted' as reachable, i.e. it is has an incoming
61+
# edge from a resolved SCC other than its own (or it is BasicBlock #1).
62+
#
63+
# `tarjan!` takes the transitive closure of this relation in order to detect
64+
# which BasicBlocks are unreachable.
65+
function _bb_externally_reachable(reach::CFGReachability, cfg::CFG, bb::Int; filter)
66+
(; scc) = reach
67+
bb == 1 && return true
68+
for pred in cfg.blocks[bb].preds
69+
scc[pred] <= 0 && continue
70+
!filter(pred, bb) && continue
71+
@assert scc[pred] != scc[bb]
72+
return true
73+
end
74+
return false
75+
end
76+
77+
"""
78+
tarjan!(reach::CFGReachability, cfg::CFG, root::Int=1)
79+
80+
Tarjan's strongly-connected components algorithm. Traverses the CFG starting at `root`, ignoring
81+
nodes with resolved SCC's and updating outputs for all un-resolved nodes.
82+
83+
Returns true if any node was discovered to be unreachable, false otherwise.
84+
85+
Outputs:
86+
- `reach.scc`: strongly-connected components, ignoring backedges to (natural) loops
87+
- `reach.irreducible`: true iff a BasicBlock is part of a (non-trivial) SCC / irreducible loop
88+
- `reach._worklist`: if performing an incremental update (`root != 1`), any traversed nodes that
89+
are unreachable from BasicBlock #1 are enqueued to this worklist
90+
"""
91+
function tarjan!(reach::CFGReachability, cfg::CFG; root::Int=1,
92+
filter = (from::Int,to::Int)->true,
93+
)
94+
(; scc, irreducible) = reach
95+
scc[root] != 0 && return scc
96+
live = _bb_externally_reachable(reach, cfg, root; filter)
97+
98+
# the original algorithm has a separate stack and worklist (unrelated to `reach._worklist`)
99+
# here we use a single combined stack for improved memory/cache efficiency
100+
stack = reach._stack
101+
push!(stack, SCCStackItem(
102+
root, # v
103+
1, # child
104+
0, # parent
105+
1, # preorder
106+
1, # minpreorder
107+
live, # live
108+
))
109+
scc[root] = -1
110+
cursor = length(stack)
111+
112+
# worklist length before any new unreachable nodes are added
113+
worklist_len = length(reach._worklist)
114+
115+
# last (pre-order) DFS label assigned to a node
116+
preorder_id = 1
117+
while true
118+
(; v, child, minpreorder, live) = item = stack[cursor]
119+
120+
bb = cfg.blocks[v]
121+
if child <= length(bb.succs) # queue next child
122+
stack[cursor] = item = SCCStackItem(item; child=child+1)
123+
succ = bb.succs[child]
124+
125+
# ignore any edges that don't pass the filter
126+
!filter(convert(Int, v), succ) && continue
127+
128+
if scc[succ] < 0
129+
# next child is already in DFS tree
130+
child_preorder = stack[-scc[succ]].preorder
131+
132+
# only need to update `minpreorder` for `v`
133+
stack[cursor] = item = SCCStackItem(item;
134+
minpreorder=min(minpreorder, child_preorder))
135+
elseif scc[succ] == 0
136+
# next child is a new element in DFS tree
137+
preorder_id += 1
138+
live = live || _bb_externally_reachable(reach, cfg, succ; filter)
139+
push!(stack, SCCStackItem(
140+
succ, # v
141+
1, # child
142+
cursor, # parent (index in stack)
143+
preorder_id, # preorder
144+
preorder_id, # minpreorder
145+
live, # live
146+
))
147+
scc[succ] = -length(stack)
148+
cursor = length(stack)
149+
else end # next child is a resolved SCC (do nothing)
150+
else # v's children are processed, finalize v
151+
if item.minpreorder == item.preorder
152+
has_one_element = stack[end].v == v
153+
while true
154+
item = pop!(stack)
155+
if live
156+
scc[item.v] = v
157+
scan_subgraph!(reach, cfg, convert(Int, item.v),
158+
#= filter =# (pred,x)->(filter(pred, x) && scc[x] > typemax(Int)÷2),
159+
#= action =# (x)->(scc[x] -= typemax(Int)÷2;),
160+
)
161+
else # this offset marks a node as 'maybe-dead'
162+
scc[item.v] = v + typemax(Int)÷2
163+
push!(reach._worklist, item.v)
164+
end
165+
irreducible[item.v] = !has_one_element
166+
(item.v == v) && break
167+
end
168+
item.parent == 0 && break # all done
169+
elseif live
170+
stack[item.parent] = SCCStackItem(stack[item.parent]; live=true)
171+
end
172+
173+
# update `minpreorder` for parent
174+
parent = stack[item.parent]
175+
minpreorder = min(parent.minpreorder, item.minpreorder)
176+
stack[item.parent] = SCCStackItem(parent; minpreorder)
177+
178+
cursor = item.parent
179+
end
180+
end
181+
182+
worklist = reach._worklist
183+
184+
# filter the worklist, leaving any nodes not proven to be reachable from BB #1
185+
n_popped = 0
186+
for i = (worklist_len + 1):length(worklist)
187+
@assert worklist[i] != 1
188+
@assert scc[worklist[i]] > 0
189+
if scc[worklist[i]] > typemax(Int)÷2
190+
# node is unreachable, enqueue it
191+
scc[worklist[i]] = 0
192+
worklist[i - n_popped] = worklist[i]
193+
else
194+
n_popped += 1
195+
end
196+
end
197+
resize!(worklist, length(worklist) - n_popped)
198+
199+
return length(worklist) > worklist_len # if true, a (newly) unreachable node was enqueued
200+
end
201+
202+
"""
203+
Scan the subtree rooted at `root`, excluding `root` itself
204+
205+
Note: This function will not detect cycles for you. The `filter` provided must
206+
protect against infinite cycle traversal.
207+
"""
208+
function scan_subgraph!(reach::CFGReachability, cfg::CFG, root::Int, filter, action)
209+
worklist = reach._worklist
210+
start_len = length(worklist)
211+
212+
push!(worklist, root)
213+
while length(worklist) > start_len
214+
v = pop!(worklist)
215+
for succ in cfg.blocks[v].succs
216+
!filter(v, succ) && continue
217+
action(succ)
218+
push!(worklist, succ)
219+
end
220+
end
221+
end
222+
223+
function enqueue_if_unreachable!(reach::CFGReachability, cfg::CFG, bb::Int)
224+
(; domtree, scc) = reach
225+
@assert scc[bb] != 0
226+
227+
bb == 1 && return false
228+
if bb_in_irreducible_loop(reach, bb)
229+
# irreducible CFG
230+
# this requires a full scan of the irreducible loop
231+
232+
# any reducible back-edges do not need to be considered as part of reachability
233+
# (very important optimization, since it means reducible CFGs will have no SCCs)
234+
filter = (from::Int, to::Int)->!dominates(domtree, to, from)
235+
236+
scc′ = scc[bb]
237+
scc[bb] = 0
238+
scan_subgraph!(reach, cfg, bb, # set this SCC to 0
239+
#= filter =# (pred,x)->(filter(pred, x) && scc[x] == scc′),
240+
#= action =# (x)->(scc[x] = 0;),
241+
)
242+
243+
# re-compute the SCC's for this portion of the CFG, adding any freshly
244+
# unreachable nodes to `reach._worklist`
245+
return tarjan!(reach, cfg; root=bb, filter)
246+
else
247+
# target is a reducible CFG node
248+
# this node lives iff it still has an incoming forward edge
249+
for pred in cfg.blocks[bb].preds
250+
!dominates(domtree, bb, pred) && return false # forward-edge
251+
end
252+
scc[bb] = 0
253+
push!(reach._worklist, bb)
254+
return true
255+
end
256+
end
257+
258+
function kill_cfg_edge!(cfg::CFG, from::Int, to::Int)
259+
preds, succs = cfg.blocks[to].preds, cfg.blocks[from].succs
260+
deleteat!(preds, findfirst(x::Int->x==from, preds)::Int)
261+
deleteat!(succs, findfirst(x::Int->x==to, succs)::Int)
262+
return nothing
263+
end
264+
265+
"""
266+
Remove from `cfg` and `reach` the edge (from → to), as well as any blocks/edges
267+
this causes to become unreachable.
268+
269+
Calls:
270+
- `block_callback` for every unreachable block.
271+
- `edge_callback` for every unreachable edge into a reachable block (may also
272+
be called for blocks which are later discovered to be unreachable).
273+
"""
274+
function kill_edge!(reach::CFGReachability, cfg::CFG, from::Int, to::Int,
275+
edge_callback=nothing, block_callback=nothing)
276+
(reach.scc[from] == 0) && return # source is already unreachable
277+
@assert reach.scc[to] != 0
278+
279+
# delete (from → to) edge
280+
kill_cfg_edge!(cfg, from, to)
281+
282+
# check for unreachable target
283+
enqueued = enqueue_if_unreachable!(reach, cfg, to)
284+
if !enqueued && edge_callback !== nothing
285+
edge_callback(from, to)
286+
end
287+
while !isempty(reach._worklist)
288+
node = convert(Int, pop!(reach._worklist))
289+
290+
# already marked unreachable, just need to notify
291+
@assert reach.scc[node] == 0 && node != 1
292+
if block_callback !== nothing
293+
block_callback(node)
294+
end
295+
296+
for succ in cfg.blocks[node].succs
297+
# delete (node → succ) edge
298+
preds = cfg.blocks[succ].preds
299+
deleteat!(preds, findfirst(x::Int->x==node, preds)::Int)
300+
301+
# check for newly unreachable target
302+
reach.scc[succ] == 0 && continue
303+
enqueued = enqueue_if_unreachable!(reach, cfg, succ)
304+
if !enqueued && edge_callback !== nothing
305+
edge_callback(node, succ)
306+
end
307+
end
308+
empty!(cfg.blocks[node].succs)
309+
end
310+
end

0 commit comments

Comments
 (0)