Skip to content

Commit 1a398ab

Browse files
committed
switch to a new CFG selection logic
This commit aims to port the new CFG selection logic implemented in aviatesk/JET.jl#654 to LCU, so that it can be shared between LCU and JET. The new algorithm is based on what was proposed in [Wei84][^Wei84]. If there is even one active block in the blocks reachable from a conditional branch up to its successors' nearest common post-dominator (referred to as "𝑰𝑵𝑭𝑳" in the paper), it is necessary to follow that conditional branch and execute the code. Otherwise, execution can be short-circuited[^short-circuit] from the conditional branch to the nearest common post-dominator. Regarding the `GotoNode`, it is now marked only for active blocks after all requirements have converged, rather than marking it inside the `add_loop!` or such. This approach eliminates the need to add unnecessary blocks inside the loop, and the need to use `add_loop!` while allowing the required CFG to be executed safely. [^Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984. https://ieeexplore.ieee.org/document/5010248 [^short-circuit]: It is important to note that in Julia's IR (`CodeInfo`), "short-circuiting" a specific code region is not a simple task. Simply ignoring the path to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, a more careful implementation is required for this aspect.
1 parent 75da0d8 commit 1a398ab

File tree

3 files changed

+165
-41
lines changed

3 files changed

+165
-41
lines changed

src/codeedges.jl

Lines changed: 135 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ function print_with_code(preprint, postprint, io::IO, src::CodeInfo)
108108
:displaysize=>displaysize(io),
109109
:SOURCE_SLOTNAMES => Base.sourceinfo_slotnames(src))
110110
used = BitSet()
111-
cfg = Core.Compiler.compute_basic_blocks(src.code)
111+
cfg = compute_basic_blocks(src.code)
112112
for stmt in src.code
113113
Core.Compiler.scan_ssa_use!(push!, used, stmt)
114114
end
@@ -629,8 +629,7 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo,
629629
objs = add_requests!(isrequired, objs, edges, norequire)
630630

631631
# Compute basic blocks, which we'll use to make sure we mark necessary control-flow
632-
cfg = Core.Compiler.compute_basic_blocks(src.code) # needed for control-flow analysis
633-
domtree = construct_domtree(cfg.blocks)
632+
cfg = compute_basic_blocks(src.code) # needed for control-flow analysis
634633
postdomtree = construct_postdomtree(cfg.blocks)
635634

636635
# We'll mostly use generic graph traversal to discover all the lines we need,
@@ -651,14 +650,18 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo,
651650

652651
# Add control-flow
653652
changed |= add_loops!(isrequired, cfg)
654-
changed |= add_control_flow!(isrequired, cfg, domtree, postdomtree)
653+
changed |= add_control_flow!(isrequired, src, cfg, postdomtree)
655654

656655
# So far, everything is generic graph traversal. Now we add some domain-specific information
657656
changed |= add_typedefs!(isrequired, src, edges, typedefs, norequire)
658657
changed |= add_inplace!(isrequired, src, edges, norequire)
659658

660659
iter += 1 # just for diagnostics
661660
end
661+
662+
# now mark the active goto nodes
663+
add_active_gotos!(isrequired, src, cfg, postdomtree)
664+
662665
return isrequired
663666
end
664667

@@ -752,48 +755,140 @@ function add_loops!(isrequired, cfg)
752755
return changed
753756
end
754757

755-
function add_control_flow!(isrequired, cfg, domtree, postdomtree)
756-
changed, _changed = false, true
757-
blocks = cfg.blocks
758-
nblocks = length(blocks)
759-
while _changed
760-
_changed = false
761-
for (ibb, bb) in enumerate(blocks)
762-
r = rng(bb)
763-
if any(view(isrequired, r))
764-
# Walk up the dominators
765-
jbb = ibb
766-
while jbb != 1
767-
jdbb = domtree.idoms_bb[jbb]
768-
dbb = blocks[jdbb]
769-
# Check the successors; if jbb doesn't post-dominate, mark the last statement
770-
for s in dbb.succs
771-
if !postdominates(postdomtree, jbb, s)
772-
idxlast = rng(dbb)[end]
773-
_changed |= !isrequired[idxlast]
774-
isrequired[idxlast] = true
775-
break
776-
end
777-
end
778-
jbb = jdbb
758+
using Core: CodeInfo
759+
using Core.Compiler: CFG, BasicBlock, compute_basic_blocks
760+
761+
# The goal of this function is to request concretization of the minimal necessary control
762+
# flow to evaluate statements whose concretization have already been requested.
763+
# The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active
764+
# block in the blocks reachable from a conditional branch up to its successors' nearest
765+
# common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow
766+
# that conditional branch and execute the code. Otherwise, execution can be short-circuited
767+
# from the conditional branch to the nearest common post-dominator.
768+
#
769+
# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`),
770+
# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path
771+
# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore,
772+
# a more careful implementation is required for this aspect.
773+
#
774+
# [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
775+
function add_control_flow!(isrequired, src::CodeInfo, cfg::CFG, postdomtree)
776+
local changed::Bool = false
777+
function mark_isrequired!(idx::Int)
778+
if !isrequired[idx]
779+
changed |= isrequired[idx] = true
780+
return true
781+
end
782+
return false
783+
end
784+
for bbidx = 1:length(cfg.blocks) # forward traversal
785+
bb = cfg.blocks[bbidx]
786+
nsuccs = length(bb.succs)
787+
if nsuccs == 0
788+
continue
789+
elseif nsuccs == 1
790+
continue # leave a fall-through terminator unmarked: `GotoNode`s are marked later
791+
elseif nsuccs == 2
792+
termidx = bb.stmts[end]
793+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
794+
if is_conditional_block_active(isrequired, bb, cfg, postdomtree)
795+
mark_isrequired!(termidx)
796+
else
797+
# fall-through to the post dominator block (by short-circuiting all statements between)
798+
end
799+
end
800+
end
801+
return changed
802+
end
803+
804+
is_conditional_terminator(@nospecialize stmt) = stmt isa GotoIfNot ||
805+
(@static @isdefined(EnterNode) ? stmt isa EnterNode : isexpr(stmt, :enter))
806+
807+
function is_conditional_block_active(isrequired, bb::BasicBlock, cfg::CFG, postdomtree)
808+
return visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet
809+
for blk in 𝑰𝑵𝑭𝑳
810+
if blk == postdominator
811+
continue # skip the post-dominator block and continue to a next infl block
812+
end
813+
if any(@view isrequired[cfg.blocks[blk].stmts])
814+
return true
815+
end
816+
end
817+
return false
818+
end
819+
end
820+
821+
function visit_𝑰𝑵𝑭𝑳_blocks(func, bb::BasicBlock, cfg::CFG, postdomtree)
822+
succ1, succ2 = bb.succs
823+
postdominator = nearest_common_dominator(postdomtree, succ1, succ2)
824+
𝑰𝑵𝑭𝑳 = reachable_blocks(cfg, succ1, postdominator) reachable_blocks(cfg, succ2, postdominator)
825+
return func(postdominator, 𝑰𝑵𝑭𝑳)
826+
end
827+
828+
function reachable_blocks(cfg, from_bb::Int, to_bb::Int)
829+
worklist = Int[from_bb]
830+
visited = BitSet(from_bb)
831+
if to_bb == from_bb
832+
return visited
833+
end
834+
push!(visited, to_bb)
835+
function visit!(bb::Int)
836+
if bb visited
837+
push!(visited, bb)
838+
push!(worklist, bb)
839+
end
840+
end
841+
while !isempty(worklist)
842+
foreach(visit!, cfg.blocks[pop!(worklist)].succs)
843+
end
844+
return visited
845+
end
846+
847+
function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree)
848+
dead_blocks = compute_dead_blocks(isrequired, src, cfg, postdomtree)
849+
changed = false
850+
for bbidx = 1:length(cfg.blocks)
851+
if bbidx dead_blocks
852+
bb = cfg.blocks[bbidx]
853+
nsuccs = length(bb.succs)
854+
if nsuccs == 1
855+
termidx = bb.stmts[end]
856+
if src.code[termidx] isa GotoNode
857+
changed |= isrequired[termidx] = true
779858
end
780-
# Walk down the post-dominators, including self
781-
jbb = ibb
782-
while jbb != 0 && jbb < nblocks
783-
pdbb = blocks[jbb]
784-
# Check if the exit of this block is a GotoNode or `return`
785-
if length(pdbb.succs) < 2
786-
idxlast = rng(pdbb)[end]
787-
_changed |= !isrequired[idxlast]
788-
isrequired[idxlast] = true
859+
end
860+
end
861+
end
862+
return changed
863+
end
864+
865+
# find dead blocks using the same approach as `add_control_flow!`, for the converged `isrequired`
866+
function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree)
867+
dead_blocks = BitSet()
868+
for bbidx = 1:length(cfg.blocks)
869+
bb = cfg.blocks[bbidx]
870+
nsuccs = length(bb.succs)
871+
if nsuccs == 2
872+
termidx = bb.stmts[end]
873+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
874+
visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet
875+
is_𝑰𝑵𝑭𝑳_active = false
876+
for blk in 𝑰𝑵𝑭𝑳
877+
if blk == postdominator
878+
continue # skip the post-dominator block and continue to a next infl block
879+
end
880+
if any(@view isrequired[cfg.blocks[blk].stmts])
881+
is_𝑰𝑵𝑭𝑳_active |= true
882+
break
789883
end
790-
jbb = postdomtree.idoms_bb[jbb]
884+
end
885+
if !is_𝑰𝑵𝑭𝑳_active
886+
union!(dead_blocks, delete!(𝑰𝑵𝑭𝑳, postdominator))
791887
end
792888
end
793889
end
794-
changed |= _changed
795890
end
796-
return changed
891+
return dead_blocks
797892
end
798893

799894
# Do a traveral of "numbered" predecessors and find statement ranges and names of type definitions

src/domtree.jl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,30 @@ function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber)
412412
end
413413
return bb1 == bb2
414414
end
415+
416+
"""
417+
nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
418+
419+
Compute the nearest common (post-)dominator of `a` and `b`.
420+
"""
421+
function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
422+
a == 0 && return a
423+
b == 0 && return b
424+
alevel = domtree.nodes[a].level
425+
blevel = domtree.nodes[b].level
426+
# W.l.g. assume blevel <= alevel
427+
if alevel < blevel
428+
a, b = b, a
429+
alevel, blevel = blevel, alevel
430+
end
431+
while alevel > blevel
432+
a = domtree.idoms_bb[a]
433+
alevel -= 1
434+
end
435+
while a != b && a != 0
436+
a = domtree.idoms_bb[a]
437+
b = domtree.idoms_bb[b]
438+
end
439+
@assert a == b
440+
return a
441+
end

src/packagedef.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optle
22
@eval Base.Experimental.@optlevel 1
33
end
44

5-
using Core: SimpleVector, CodeInfo, NewvarNode, GotoNode
5+
using Core: SimpleVector
6+
using Core.IR
67
using Base.Meta: isexpr
78

89
const SSAValues = Union{Core.Compiler.SSAValue, JuliaInterpreter.SSAValue}
@@ -22,6 +23,7 @@ else
2223
const construct_domtree = Core.Compiler.construct_domtree
2324
const construct_postdomtree = Core.Compiler.construct_postdomtree
2425
const postdominates = Core.Compiler.postdominates
26+
const nearest_common_dominator = Core.Compiler.nearest_common_dominator
2527
end
2628

2729
# precompilation

0 commit comments

Comments
 (0)