Skip to content

Commit cd190a8

Browse files
committed
wip: implement a better statement selection logic
Specifically, this commit aims to review the implementation of `add_control_flow!` and improves its accuracy. Ideally, it should pass JET's existing test cases as well as the newly added ones, including the test cases from JuliaDebug/LoweredCodeUtils.jl#99. The goal is to share the same high-precision CFG selection logic between LoweredCodeUtils and JET. The new algorithm is based on what was proposed in [^Wei84]. If there is even one active block in the blocks reachable from a conditional branch up to its successors' nearest common post-dominator (referred to as **INFL** in the paper), it is necessary to follow that conditional branch and execute the code. Otherwise, execution can be short-circuited from the conditional branch to the nearest common post-dominator. COMBAK: It is important to note that in Julia's IR (`CodeInfo`), "short-circuiting" a specific code region is not a simple task. Simply ignoring the path to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, a more careful implementation is required for this aspect. [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
1 parent 2dfcd4e commit cd190a8

File tree

3 files changed

+198
-56
lines changed

3 files changed

+198
-56
lines changed

src/JET.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,10 @@ using .CC: @nospecs, ⊑,
4141
InvokeCallInfo, MethodCallResult, MethodMatchInfo, MethodMatches, NOT_FOUND,
4242
OptimizationState, OptimizationParams, OverlayMethodTable, StmtInfo, UnionSplitInfo,
4343
UnionSplitMethodMatches, VarState, VarTable, WorldRange, WorldView,
44-
argextype, argtype_by_index, argtypes_to_type, hasintersect, ignorelimited,
45-
instanceof_tfunc, istopfunction, singleton_type, slot_id, specialize_method,
46-
tmeet, tmerge, typeinf_lattice, widenconst, widenlattice
44+
argextype, argtype_by_index, argtypes_to_type, compute_basic_blocks, construct_domtree,
45+
construct_postdomtree, hasintersect, ignorelimited, instanceof_tfunc, istopfunction,
46+
nearest_common_dominator, singleton_type, slot_id, specialize_method, tmeet, tmerge,
47+
typeinf_lattice, widenconst, widenlattice
4748

4849
using Base: IdSet, get_world_counter
4950

src/toplevel/virtualprocess.jl

Lines changed: 89 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,67 +1173,70 @@ end
11731173

11741174
# The goal of this function is to request concretization of the minimal necessary control
11751175
# flow to evaluate statements whose concretization have already been requested.
1176-
# The basic approach is to check if there are any active successors for each basic block,
1177-
# and if there is an active successor and the terminator is not a fall-through, then request
1178-
# the concretization of that terminator. Additionally, for conditional terminators, a simple
1179-
# optimization using post-domination analysis is also performed.
1180-
function add_control_flow!(concretize::BitVector, src::CodeInfo, cfg::CFG, postdomtree)
1176+
# The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active
1177+
# block in the blocks reachable from a conditional branch up to its successors' nearest
1178+
# common post-dominator (referred to as **INFL** in the paper), it is necessary to follow
1179+
# that conditional branch and execute the code. Otherwise, execution can be short-circuited
1180+
# from the conditional branch to the nearest common post-dominator.
1181+
#
1182+
# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`),
1183+
# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path
1184+
# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore,
1185+
# a more careful implementation is required for this aspect.
1186+
#
1187+
# [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
1188+
function add_control_flow!(concretize::BitVector, src::CodeInfo, cfg::CFG, domtree, postdomtree)
11811189
local changed::Bool = false
11821190
function mark_concretize!(idx::Int)
11831191
if !concretize[idx]
1184-
concretize[idx] = true
1192+
changed |= concretize[idx] = true
11851193
return true
11861194
end
11871195
return false
11881196
end
1189-
nblocks = length(cfg.blocks)
1190-
for bbidx = 1:nblocks
1191-
bb = cfg.blocks[bbidx] # forward traversal
1197+
for bbidx = 1:length(cfg.blocks) # forward traversal
1198+
bb = cfg.blocks[bbidx]
11921199
nsuccs = length(bb.succs)
11931200
if nsuccs == 0
11941201
continue
11951202
elseif nsuccs == 1
1196-
terminator_idx = bb.stmts[end]
1197-
if src.code[terminator_idx] isa GotoNode
1198-
# If the destination of this `GotoNode` is not active, it's fine to ignore
1199-
# the control flow caused by this `GotoNode` and treat it as a fall-through.
1200-
# If the block that is fallen through to is active and has a dependency on
1201-
# this goto block, then the concretization of this goto block should already
1202-
# be requested (at some point of the higher concretization convergence cycle
1203-
# of `select_dependencies`), and thus, this `GotoNode` will be concretized.
1204-
if any(@view concretize[cfg.blocks[only(bb.succs)].stmts])
1205-
changed |= mark_concretize!(terminator_idx)
1203+
termidx = bb.stmts[end]
1204+
if src.code[termidx] isa GotoNode
1205+
succ = only(bb.succs)
1206+
if any(@view concretize[cfg.blocks[succ].stmts])
1207+
dominator = nearest_common_dominator(domtree, bbidx, succ)
1208+
if dominator succ
1209+
for blk in reachable_blocks(cfg, dominator, succ)
1210+
if blk == dominator || blk == succ
1211+
continue
1212+
end
1213+
if any(@view concretize[cfg.blocks[blk].stmts])
1214+
mark_concretize!(termidx)
1215+
break
1216+
end
1217+
end
1218+
else
1219+
mark_concretize!(termidx)
1220+
end
12061221
end
12071222
end
1223+
continue # we can just fall-through
12081224
elseif nsuccs == 2
1209-
terminator_idx = bb.stmts[end]
1210-
@assert is_conditional_terminator(src.code[terminator_idx]) "invalid IR"
1225+
termidx = bb.stmts[end]
1226+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
12111227
succ1, succ2 = bb.succs
1212-
succ1_req = any(@view concretize[cfg.blocks[succ1].stmts])
1213-
succ2_req = any(@view concretize[cfg.blocks[succ2].stmts])
1214-
if succ1_req
1215-
if succ2_req
1216-
changed |= mark_concretize!(terminator_idx)
1217-
else
1218-
active_bb, inactive_bb = succ1, succ2
1219-
@goto asymmetric_case
1228+
postdominator = nearest_common_dominator(postdomtree, succ1, succ2)
1229+
inflblks = reachable_blocks(cfg, succ1, postdominator) reachable_blocks(cfg, succ2, postdominator)
1230+
for blk in inflblks
1231+
if blk == postdominator
1232+
continue
12201233
end
1221-
elseif succ2_req
1222-
active_bb, inactive_bb = succ2, succ1
1223-
@label asymmetric_case
1224-
# We can ignore the control flow of this conditional terminator and treat
1225-
# it as a fall-through if only one of its successors is active and the
1226-
# active block post-dominates the inactive one, since the post-domination
1227-
# ensures that the active basic block will be reached regardless of the
1228-
# control flow.
1229-
if CC.postdominates(postdomtree, active_bb, inactive_bb)
1230-
# fall through this block
1231-
else
1232-
changed |= mark_concretize!(terminator_idx)
1234+
if any(@view concretize[cfg.blocks[blk].stmts])
1235+
mark_concretize!(termidx)
1236+
break
12331237
end
1234-
else
1235-
# both successors are inactive, just fall through this block
12361238
end
1239+
# we can just fall-through to the post dominator block (by ignoring all statements between)
12371240
end
12381241
end
12391242
return changed
@@ -1242,6 +1245,25 @@ end
12421245
is_conditional_terminator(@nospecialize stmt) = stmt isa GotoIfNot ||
12431246
(@static @isdefined(EnterNode) ? stmt isa EnterNode : isexpr(stmt, :enter))
12441247

1248+
function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int)
1249+
worklist = Int[from_bb]
1250+
visited = BitSet(from_bb)
1251+
if to_bb == from_bb
1252+
return visited
1253+
end
1254+
push!(visited, to_bb)
1255+
function visit!(bb::Int)
1256+
if bb visited
1257+
push!(visited, bb)
1258+
push!(worklist, bb)
1259+
end
1260+
end
1261+
while !isempty(worklist)
1262+
foreach(visit!, cfg.blocks[pop!(worklist)].succs)
1263+
end
1264+
return visited
1265+
end
1266+
12451267
function add_required_inplace!(concretize::BitVector, src::CodeInfo, edges, cl)
12461268
changed = false
12471269
for i = 1:length(src.code)
@@ -1272,27 +1294,42 @@ function is_arg_requested(@nospecialize(arg), concretize, edges, cl)
12721294
return false
12731295
end
12741296

1297+
# The purpose of this function is to find other statements that affect the execution of the
1298+
# statements choosen by `select_direct_dependencies!`. In other words, it extracts the
1299+
# minimal amount of code necessary to realize the required concretization.
1300+
# This technique is generally referred to as "program slicing," and specifically as
1301+
# "static program slicing". The basic algorithm implemented here is an extension of the one
1302+
# proposed in https://dl.acm.org/doi/10.5555/800078.802557, which is especially tuned for
1303+
# Julia's intermediate code representation.
12751304
function select_dependencies!(concretize::BitVector, src::CodeInfo, edges, cl)
12761305
typedefs = LoweredCodeUtils.find_typedefs(src)
1277-
cfg = CC.compute_basic_blocks(src.code)
1278-
postdomtree = CC.construct_postdomtree(cfg.blocks)
1306+
cfg = compute_basic_blocks(src.code)
1307+
domtree = construct_domtree(cfg.blocks)
1308+
postdomtree = construct_postdomtree(cfg.blocks)
12791309

12801310
while true
12811311
changed = false
12821312

1283-
# discover struct/method definitions at the beginning,
1284-
# and propagate the definition requirements by tracking SSA precedessors
1313+
# Discover Dtruct/method definitions at the beginning,
1314+
# and propagate the definition requirements by tracking SSA precedessors.
1315+
# (TODO maybe hoist this out of the loop?)
12851316
changed |= LoweredCodeUtils.add_typedefs!(concretize, src, edges, typedefs, ())
12861317
changed |= add_ssa_preds!(concretize, src, edges, ())
12871318

1288-
# mark some common inplace operations like `push!(x, ...)` and `setindex!(x, ...)`
1289-
# when `x` has been marked already: otherwise we may end up using it with invalid state
1319+
# Mark some common inplace operations like `push!(x, ...)` and `setindex!(x, ...)`
1320+
# when `x` has been marked already: otherwise we may end up using it with invalid state.
1321+
# However, note that this is an incomplete approach, and note that the slice created
1322+
# by this routine will not be sound because of this. This is because
1323+
# `add_required_inplace!` only requires certain special-cased function calls and
1324+
# does not take into account the possibility that arguments may be mutated in
1325+
# arbitrary function calls. Ideally, function calls should be required while
1326+
# considering the effects of these statements, or by some sort of an
1327+
# inter-procedural program slicing
12901328
changed |= add_required_inplace!(concretize, src, edges, cl)
12911329
changed |= add_ssa_preds!(concretize, src, edges, ())
12921330

1293-
# mark necessary control flows,
1294-
# and propagate the definition requirements by tracking SSA precedessors
1295-
changed |= add_control_flow!(concretize, src, cfg, postdomtree)
1331+
# Mark necessary control flows.
1332+
changed |= add_control_flow!(concretize, src, cfg, domtree, postdomtree)
12961333
changed |= add_ssa_preds!(concretize, src, edges, ())
12971334

12981335
changed || break

test/toplevel/test_virtualprocess.jl

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1748,7 +1748,7 @@ end
17481748
found_write = true
17491749
@test !slice[i]
17501750
elseif (JET.isexpr(stmt, :call) && (arg1 = stmt.args[1]; arg1 isa Core.SSAValue) &&
1751-
src.code[arg1.id] === :write)
1751+
src.code[arg1.id] === :write)
17521752
found_write = true
17531753
@test !slice[i]
17541754
end
@@ -1778,6 +1778,110 @@ end
17781778
@test isempty(s)
17791779
end
17801780

1781+
# A more complex test case (xref: https://github.com/JuliaDebug/LoweredCodeUtils.jl/pull/99#issuecomment-2236373067)
1782+
# This test case might seem simple at first glance, but note that `x2` and `a2` are
1783+
# defined at the top level (because of the `begin` at the top).
1784+
# Since global variable type declarations have been allowed since v1.
1785+
# 10, a conditional branch that includes `Core.get_binding_type` is generated for
1786+
# these simple global variable assignments.
1787+
# Specifically, the code is lowered into something like this:
1788+
# 1 1: conditional branching based on `x2`'s binding type
1789+
# │╲
1790+
# │ ╲
1791+
# │ ╲ 2: goto block for the case when no conversion is required for the value of `x2`
1792+
# 2 3 3: fall-through block for the case when a conversion is required for the value of `x2`
1793+
# │ ╱
1794+
# │ ╱
1795+
# │╱
1796+
# 4 4: assignment to `x2`, **and**
1797+
# │╲ conditional branching based on `a2`'s binding type
1798+
# │ ╲
1799+
# │ ╲ 5: goto block for the case when no conversion is required for the value of `a2`
1800+
# 5 6 6: fall-through block for the case when a conversion is required for the value of `a2`
1801+
# │ ╱
1802+
# │ ╱
1803+
# │╱
1804+
# 7 7: assignment to `a2`
1805+
# What's important to note here is that since there's an assignment to `a2`,
1806+
# concretization of the blocks 4-6 is necessary. However, at the same time we also want
1807+
# to skip concretizing the blocks 1-3.
1808+
let src = @src begin
1809+
x2 = 5
1810+
a2 = 1
1811+
@eval global geta2() = $a2 # concretization is forced
1812+
end
1813+
slice = JET.select_statements(@__MODULE__, src)
1814+
1815+
found_a2 = found_a2_get_binding_type = found_x2 = found_x2_get_binding_type = false
1816+
for (i, stmt) in enumerate(src.code)
1817+
if JET.isexpr(stmt, :(=))
1818+
lhs, rhs = stmt.args
1819+
if lhs isa GlobalRef
1820+
lhs = lhs.name
1821+
end
1822+
if lhs === :a2
1823+
found_a2 = true
1824+
@test slice[i]
1825+
elseif lhs === :x2
1826+
found_x2 = true
1827+
@test !slice[i] # this is easy to meet
1828+
end
1829+
elseif JET.@capture(stmt, $(GlobalRef(Core, :get_binding_type))(_, :a2))
1830+
found_a2_get_binding_type = true
1831+
@test slice[i]
1832+
elseif JET.@capture(stmt, $(GlobalRef(Core, :get_binding_type))(_, :x2))
1833+
found_x2_get_binding_type = true
1834+
@test !slice[i] # this is difficult to meet
1835+
end
1836+
end
1837+
@test found_a2; @test found_a2_get_binding_type; @test found_x2; @test found_x2_get_binding_type
1838+
end
1839+
let src = @src begin
1840+
cond = true
1841+
if cond
1842+
x = 1
1843+
y = 1
1844+
else
1845+
x = 2
1846+
y = 2
1847+
end
1848+
@eval global getx() = $x # concretization is forced
1849+
end
1850+
slice = JET.select_statements(@__MODULE__, src)
1851+
1852+
found_cond = found_cond_get_binding_type = false
1853+
found_x = found_x_get_binding_type = found_y = found_y_get_binding_type = 0
1854+
for (i, stmt) in enumerate(src.code)
1855+
if JET.isexpr(stmt, :(=))
1856+
lhs, rhs = stmt.args
1857+
if lhs isa GlobalRef
1858+
lhs = lhs.name
1859+
end
1860+
if lhs === :cond
1861+
found_cond = true
1862+
@test slice[i]
1863+
elseif lhs === :x
1864+
found_x += 1
1865+
@test slice[i]
1866+
elseif lhs === :y
1867+
found_y += 1
1868+
@test !slice[i]
1869+
end
1870+
elseif JET.@capture(stmt, $(GlobalRef(Core, :get_binding_type))(_, :cond))
1871+
found_cond_get_binding_type = true
1872+
@test slice[i]
1873+
elseif JET.@capture(stmt, $(GlobalRef(Core, :get_binding_type))(_, :x))
1874+
found_x_get_binding_type += 1
1875+
@test slice[i]
1876+
elseif JET.@capture(stmt, $(GlobalRef(Core, :get_binding_type))(_, :y))
1877+
found_y_get_binding_type += 1
1878+
@test !slice[i]
1879+
end
1880+
end
1881+
@test found_cond; @test found_cond_get_binding_type
1882+
@test found_x == found_x_get_binding_type == found_y == found_y_get_binding_type == 2
1883+
end
1884+
17811885
@testset "captured variables" begin
17821886
let (vmod, res) = @analyze_toplevel2 begin
17831887
begin

0 commit comments

Comments
 (0)