Skip to content

Commit d6adba1

Browse files
committed
irinterp: Add Tarjan SCC algorithm for reachability
This optimizes `kill_edge!` for IR interp. The basic algorithm flow is: 1. Check whether `target` of dead edge is unreachable, i.e. iff: - Reducible CFG node: there is no live incoming forward edge - Irreducible CFG node: Tarjan's SCC algorithm reports no live incoming forward edges to the SCC 2. If `target` is dead, repeat (1) for all of its outgoing edges This maintains reachability information very efficiently, especially for reducible CFG's which are overwhelmingly common. As an added bonus CFGReachability can also be consulted to check which BasicBlocks are part of an irreducible loop.
1 parent fb2d946 commit d6adba1

File tree

6 files changed

+487
-27
lines changed

6 files changed

+487
-27
lines changed

base/compiler/compiler.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ include("compiler/validation.jl")
200200
include("compiler/ssair/basicblock.jl")
201201
include("compiler/ssair/domtree.jl")
202202
include("compiler/ssair/ir.jl")
203+
include("compiler/ssair/tarjan.jl")
203204

204205
include("compiler/abstractlattice.jl")
205206
include("compiler/inferenceresult.jl")

base/compiler/inferencestate.jl

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,17 @@ function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
170170
return TwoPhaseVectorView(tpdum.data, nelems, range)
171171
end
172172

173+
mutable struct LazyCFGReachability
174+
ir::IRCode
175+
reachability::CFGReachability
176+
LazyCFGReachability(ir::IRCode) = new(ir)
177+
end
178+
function get!(x::LazyCFGReachability)
179+
isdefined(x, :reachability) && return x.reachability
180+
domtree = construct_domtree(x.ir.cfg.blocks)
181+
return x.reachability = CFGReachability(x.ir.cfg, domtree)
182+
end
183+
173184
mutable struct LazyGenericDomtree{IsPostDom}
174185
ir::IRCode
175186
domtree::GenericDomTree{IsPostDom}
@@ -744,7 +755,7 @@ mutable struct IRInterpretationState
744755
const sptypes::Vector{VarState}
745756
const tpdum::TwoPhaseDefUseMap
746757
const ssa_refined::BitSet
747-
const lazydomtree::LazyDomtree
758+
const lazyreachability::LazyCFGReachability
748759
valid_worlds::WorldRange
749760
const edges::Vector{Any}
750761
parent # ::Union{Nothing,AbsIntState}
@@ -764,12 +775,12 @@ mutable struct IRInterpretationState
764775
append!(ir.argtypes, given_argtypes)
765776
tpdum = TwoPhaseDefUseMap(length(ir.stmts))
766777
ssa_refined = BitSet()
767-
lazydomtree = LazyDomtree(ir)
778+
lazyreachability = LazyCFGReachability(ir)
768779
valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
769780
edges = Any[]
770781
parent = nothing
771782
return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
772-
ssa_refined, lazydomtree, valid_worlds, edges, parent)
783+
ssa_refined, lazyreachability, valid_worlds, edges, parent)
773784
end
774785
end
775786

base/compiler/ssair/irinterp.jl

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ function kill_block!(ir::IRCode, bb::Int)
6363
ir[SSAValue(last(stmts))][:stmt] = ReturnNode()
6464
return
6565
end
66+
kill_block!(ir::IRCode) = (bb::Int)->kill_block!(ir, bb)
6667

6768
function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
6869
ir = irsv.ir
@@ -102,26 +103,8 @@ function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::
102103
end
103104

104105
function kill_edge!(irsv::IRInterpretationState, from::Int, to::Int)
105-
ir = irsv.ir
106-
kill_edge!(ir, from, to, update_phi!(irsv))
107-
108-
lazydomtree = irsv.lazydomtree
109-
domtree = nothing
110-
if isdefined(lazydomtree, :domtree)
111-
domtree = get!(lazydomtree)
112-
domtree_delete_edge!(domtree, ir.cfg.blocks, from, to)
113-
elseif length(ir.cfg.blocks[to].preds) != 0
114-
# TODO: If we're not maintaining the domtree, computing it just for this
115-
# is slightly overkill - just the dfs tree would be enough.
116-
domtree = get!(lazydomtree)
117-
end
118-
119-
if domtree !== nothing && bb_unreachable(domtree, to)
120-
kill_block!(ir, to)
121-
for edge in ir.cfg.blocks[to].succs
122-
kill_edge!(irsv, to, edge)
123-
end
124-
end
106+
kill_edge!(get!(irsv.lazyreachability), irsv.ir.cfg, from, to,
107+
update_phi!(irsv), kill_block!(irsv.ir))
125108
end
126109

127110
function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction, idx::Int,

base/compiler/ssair/tarjan.jl

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
using Core.Compiler: DomTree, CFG, BasicBlock, StmtRange, dominates
4+
5+
struct SCCStackItem
6+
v::Int32
7+
# which child of `v` to scan
8+
child::Int32
9+
# the location of `parent` in the stack
10+
parent::Int32
11+
# the index in the (pre-order traversal of the) DFS tree
12+
preorder::Int32
13+
# the minimum node (by pre-order index) reachable from any node in the DFS sub-tree rooted at `v`
14+
minpreorder::Int32
15+
# whether this node is reachable from BasicBlock #1
16+
live::Bool
17+
end
18+
19+
function SCCStackItem(item::SCCStackItem; child=item.child,
20+
minpreorder=item.minpreorder, live=item.live)
21+
return SCCStackItem(
22+
item.v, # v
23+
child, # child
24+
item.parent, # parent
25+
item.preorder, # preorder
26+
minpreorder, # minpreorder
27+
live, # live
28+
)
29+
end
30+
31+
struct CFGReachability
32+
irreducible::BitVector # BBNumber -> Bool
33+
scc::Vector{Int} # BBNumber -> SCCNumber
34+
domtree::DomTree
35+
36+
_worklist::Vector{Int} # for node removal
37+
_stack::Vector{SCCStackItem} # for Tarjan's SCC algorithm
38+
end
39+
40+
function CFGReachability(cfg::CFG, domtree::DomTree)
41+
n_blocks = length(cfg.blocks)
42+
reachability = CFGReachability(
43+
BitVector(undef, n_blocks), # irreducible
44+
zeros(Int, n_blocks), # scc
45+
domtree, # domtree
46+
Int[], # _worklist
47+
SCCStackItem[], # _stack
48+
)
49+
tarjan!(reachability, cfg)
50+
return reachability
51+
end
52+
53+
bb_unreachable(reach::CFGReachability, bb::Int) = reach.scc[bb] == 0
54+
55+
bb_in_irreducible_loop(reach::CFGReachability, bb::Int) = reach.irreducible[bb]
56+
57+
# Returns `true` if a node is 'rooted' as reachable, i.e. it is has an incoming
58+
# edge from a resolved SCC other than its own (or it is BasicBlock #1).
59+
#
60+
# `tarjan!` takes the transitive closure of this relation in order to detect
61+
# which BasicBlocks are unreachable.
62+
function _bb_externally_reachable(reach::CFGReachability, cfg::CFG, bb::Int)
63+
(; scc) = reach
64+
bb == 1 && return true
65+
for pred in cfg.blocks[bb].preds
66+
scc[pred] <= 0 && continue
67+
dominates(reach.domtree, bb, pred) && continue
68+
@assert scc[pred] != scc[bb]
69+
return true
70+
end
71+
return false
72+
end
73+
74+
"""
75+
tarjan!(reach::CFGReachability, cfg::CFG, root::Int=1)
76+
77+
Tarjan's strongly-connected components algorithm. Traverses the CFG starting at `root`, ignoring
78+
nodes with resolved SCC's and updating outputs for all un-resolved nodes.
79+
80+
Returns true if any node was discovered to be unreachable, false otherwise.
81+
82+
Outputs:
83+
- `reach.scc`: strongly-connected components, ignoring backedges to (natural) loops
84+
- `reach.irreducible`: true iff a BasicBlock is part of a (non-trivial) SCC / irreducible loop
85+
- `reach._worklist`: if performing an incremental update (`root != 1`), any traversed nodes that
86+
are unreachable from BasicBlock #1 are enqueued to this worklist
87+
"""
88+
function tarjan!(reach::CFGReachability, cfg::CFG; root::Int=1)
89+
(; scc, irreducible, domtree) = reach
90+
scc[root] != 0 && return scc
91+
live = _bb_externally_reachable(reach, cfg, root)
92+
93+
# the original algorithm has a separate stack and worklist (unrelated to `reach._worklist`)
94+
# here we use a single combined stack for improved memory/cache efficiency
95+
stack = reach._stack
96+
push!(stack, SCCStackItem(
97+
root, # v
98+
1, # child
99+
0, # parent
100+
1, # preorder
101+
1, # minpreorder
102+
live, # live
103+
))
104+
scc[root] = -1
105+
cursor = length(stack)
106+
107+
# worklist length before any new unreachable nodes are added
108+
worklist_len = length(reach._worklist)
109+
110+
# last (pre-order) DFS label assigned to a node
111+
preorder_id = 1
112+
while true
113+
(; v, child, minpreorder, live) = item = stack[cursor]
114+
115+
bb = cfg.blocks[v]
116+
if child <= length(bb.succs) # queue next child
117+
stack[cursor] = item = SCCStackItem(item; child=child+1)
118+
succ = bb.succs[child]
119+
120+
# ignore any back-edges in a (natural) loop (see `kill_edge!`)
121+
if dominates(domtree, succ, convert(Int, v))
122+
# This check ensures that reducible CFG's will contain no SCC's. The vast majority
123+
# of functions have reducible CFG's, so this optimization is very important.
124+
continue
125+
end
126+
127+
if scc[succ] < 0
128+
# next child is already in DFS tree
129+
child_preorder = stack[-scc[succ]].preorder
130+
131+
# only need to update `minpreorder` for `v`
132+
stack[cursor] = item = SCCStackItem(item;
133+
minpreorder=min(minpreorder, child_preorder))
134+
elseif scc[succ] == 0
135+
# next child is a new element in DFS tree
136+
preorder_id += 1
137+
live = live || _bb_externally_reachable(reach, cfg, succ)
138+
push!(stack, SCCStackItem(
139+
succ, # v
140+
1, # child
141+
cursor, # parent (index in stack)
142+
preorder_id, # preorder
143+
preorder_id, # minpreorder
144+
live, # live
145+
))
146+
scc[succ] = -length(stack)
147+
cursor = length(stack)
148+
else end # next child is a resolved SCC (do nothing)
149+
else # v's children are processed, finalize v
150+
if item.minpreorder == item.preorder
151+
has_one_element = stack[end].v == v
152+
while true
153+
item = pop!(stack)
154+
if live
155+
scc[item.v] = v
156+
scan_subgraph!(reach, cfg, convert(Int, item.v),
157+
#= filter =# (pred,x)->(!dominates(domtree, x, pred) && scc[x] > typemax(Int)÷2),
158+
#= action =# (x)->(scc[x] -= typemax(Int)÷2;),
159+
)
160+
else # this offset marks a node as 'maybe-dead'
161+
scc[item.v] = v + typemax(Int)÷2
162+
push!(reach._worklist, item.v)
163+
end
164+
irreducible[item.v] = !has_one_element
165+
(item.v == v) && break
166+
end
167+
item.parent == 0 && break # all done
168+
elseif live
169+
stack[item.parent] = SCCStackItem(stack[item.parent]; live=true)
170+
end
171+
172+
# update `minpreorder` for parent
173+
parent = stack[item.parent]
174+
minpreorder = min(parent.minpreorder, item.minpreorder)
175+
stack[item.parent] = SCCStackItem(parent; minpreorder)
176+
177+
cursor = item.parent
178+
end
179+
end
180+
181+
worklist = reach._worklist
182+
183+
# filter the worklist, leaving any nodes not proven to be reachable from BB #1
184+
n_filtered = 0
185+
for i = (worklist_len + 1):length(worklist)
186+
@assert worklist[i] != 1
187+
@assert scc[worklist[i]] > 0
188+
if scc[worklist[i]] > typemax(Int)÷2
189+
# node is unreachable, enqueue it
190+
scc[worklist[i]] = 0
191+
worklist[i - n_filtered] = worklist[i]
192+
else
193+
n_filtered += 1
194+
end
195+
end
196+
resize!(worklist, length(worklist) - n_filtered)
197+
198+
return length(worklist) > worklist_len # if true, a (newly) unreachable node was enqueued
199+
end
200+
201+
"""
202+
Scan the subtree rooted at `root`, excluding `root` itself
203+
204+
Note: This function will not detect cycles for you. The `filter` provided must
205+
protect against infinite cycle traversal.
206+
"""
207+
function scan_subgraph!(reach::CFGReachability, cfg::CFG, root::Int, filter, action)
208+
worklist = reach._worklist
209+
start_len = length(worklist)
210+
211+
push!(worklist, root)
212+
while length(worklist) > start_len
213+
v = pop!(worklist)
214+
for succ in cfg.blocks[v].succs
215+
!filter(v, succ) && continue
216+
action(succ)
217+
push!(worklist, succ)
218+
end
219+
end
220+
end
221+
222+
function enqueue_if_unreachable!(reach::CFGReachability, cfg::CFG, bb::Int)
223+
(; domtree, scc) = reach
224+
@assert scc[bb] != 0
225+
226+
bb == 1 && return false
227+
if bb_in_irreducible_loop(reach, bb)
228+
# irreducible CFG
229+
# this requires a full scan of the irreducible loop
230+
231+
scc′ = scc[bb]
232+
scc[bb] = 0
233+
scan_subgraph!(reach, cfg, bb, # set this SCC to 0
234+
#= filter =# (pred,x)->(!dominates(domtree, x, pred) && scc[x] == scc′),
235+
#= action =# (x)->(scc[x] = 0;),
236+
)
237+
238+
# re-compute the SCC's for this portion of the CFG, adding any freshly
239+
# unreachable nodes to `reach._worklist`
240+
return tarjan!(reach, cfg; root=bb)
241+
else
242+
# target is a reducible CFG node
243+
# this node lives iff it still has an incoming forward edge
244+
for pred in cfg.blocks[bb].preds
245+
!dominates(domtree, bb, pred) && return false # forward-edge
246+
end
247+
scc[bb] = 0
248+
push!(reach._worklist, bb)
249+
return true
250+
end
251+
end
252+
253+
"""
254+
Remove from `cfg` and `reach` the edge (from → to), as well as any blocks/edges
255+
this causes to become unreachable.
256+
257+
Calls:
258+
- `block_callback` for every unreachable block.
259+
- `edge_callback` for every unreachable edge into a reachable block (may also
260+
be called for blocks which are later discovered to be unreachable).
261+
"""
262+
function kill_edge!(reach::CFGReachability, cfg::CFG, from::Int, to::Int,
263+
edge_callback=nothing, block_callback=nothing)
264+
(reach.scc[from] == 0) && return # source is already unreachable
265+
@assert reach.scc[to] != 0
266+
267+
# delete (from → to) edge
268+
preds, succs = cfg.blocks[to].preds, cfg.blocks[from].succs
269+
deleteat!(preds, findfirst(x::Int->x==from, preds)::Int)
270+
deleteat!(succs, findfirst(x::Int->x==to, succs)::Int)
271+
272+
# check for unreachable target
273+
enqueued = enqueue_if_unreachable!(reach, cfg, to)
274+
if !enqueued && edge_callback !== nothing
275+
edge_callback(from, to)
276+
end
277+
while !isempty(reach._worklist)
278+
node = convert(Int, pop!(reach._worklist))
279+
280+
# already marked unreachable, just need to notify
281+
@assert reach.scc[node] == 0 && node != 1
282+
if block_callback !== nothing
283+
block_callback(node)
284+
end
285+
286+
for succ in cfg.blocks[node].succs
287+
# delete (node → succ) edge
288+
preds = cfg.blocks[succ].preds
289+
deleteat!(preds, findfirst(x::Int->x==node, preds)::Int)
290+
291+
# check for newly unreachable target
292+
reach.scc[succ] == 0 && continue
293+
enqueued = enqueue_if_unreachable!(reach, cfg, succ)
294+
if !enqueued && edge_callback !== nothing
295+
edge_callback(node, succ)
296+
end
297+
end
298+
end
299+
end

0 commit comments

Comments
 (0)