diff --git a/docs/src/api.md b/docs/src/api.md index a4bdbb6..ea5a0bf 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -17,6 +17,7 @@ lines_required lines_required! selective_eval! selective_eval_fromstart! +SelectiveEvalController ``` ## Interpreter diff --git a/src/codeedges.jl b/src/codeedges.jl index 29aeb81..e92aa36 100644 --- a/src/codeedges.jl +++ b/src/codeedges.jl @@ -182,6 +182,59 @@ function postprint_linelinks(io::IO, idx::Int, src::CodeInfo, cl::CodeLinks, bbc return nothing end +struct CFGShortCut + from::Int # pc of GotoIfNot with inactive 𝑰𝑵𝑭𝑳 blocks + to::Int # pc of the entry of the nearest common post-dominator of the GotoIfNot's successors +end + +""" + info::SelectiveEvalInfo + +When this object is passed as the `recurse` argument of `selective_eval!`, +the selective execution is adjusted as follows: + +- **Implicit return**: In Julia's IR representation (`CodeInfo`), the final block does not + necessarily return and may `goto` another block. And if the `return` statement is not + included in the slice in such cases, it is necessary to terminate `selective_eval!` when + execution reaches such implicit return statements. `info.implicit_returns` records + the PCs of such return statements, and `selective_eval!` will return when reaching those statements. + +- **CFG short-cut**: When the successors of a conditional branch are inactive, and it is + safe to move the program counter from the conditional branch to the nearest common + post-dominator of those successors, this short-cut is taken. + This short-cut is not merely an optimization but is actually essential for the correctness + of the selective execution. This is because, in `CodeInfo`, even if we simply fall-through + dead blocks (i.e., increment the program counter without executing the statements of those + blocks), it does not necessarily lead to the nearest common post-dominator block. + +These adjustments are necessary for performing selective execution correctly. +[`lines_required`](@ref) or [`lines_required!`](@ref) will update the `SelectiveInterpreter` +passed as an argument to be appropriate for the program slice generated. +""" +struct SelectiveEvalInfo + implicit_returns::BitSet # pc where selective execution should terminate even if they're inactive + shortcuts::Vector{CFGShortCut} +end +SelectiveEvalInfo() = SelectiveEvalInfo(BitSet(), CFGShortCut[]) + +""" + struct SelectiveInterpreter{S<:Interpreter,T<:AbstractVector{Bool}} <: Interpreter + inner::S + isrequired::T + end + +An `JuliaInterpreter.Interpreter` that executes only the statements marked `true` in `isrequired`. +Note that this inforeter does not recurse into callee frames. +That is, when `JuliaInterpreter.finish!(info::SelectiveEvalInfo, frame, ...)` is +performed, the `frame` will be executed selectively according to `info.isrequired`, but +any callee frames within it will be executed by `info.inner::Interpreter`, not by `info`. +""" +struct SelectiveInterpreter{S<:Interpreter,T<:AbstractVector{Bool}} <: Interpreter + inner::S + isrequired::T + info::SelectiveEvalInfo +end + function namedkeys(cl::CodeLinks) ukeys = Set{GlobalRef}() for c in (cl.namepreds, cl.namesuccs, cl.nameassigns) @@ -591,8 +644,8 @@ function terminal_preds!(s, j, edges, covered) # can't be an inner function bec end """ - isrequired = lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges) - isrequired = lines_required(idx::Int, src::CodeInfo, edges::CodeEdges) + isrequired = lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges, [info::SelectiveEvalInfo]) + isrequired = lines_required(idx::Int, src::CodeInfo, edges::CodeEdges, [info::SelectiveEvalInfo]) Determine which lines might need to be executed to evaluate `obj` or the statement indexed by `idx`. If `isrequired[i]` is `false`, the `i`th statement is *not* required. @@ -601,21 +654,26 @@ will end up skipping a subset of such statements, perhaps while repeating others See also [`lines_required!`](@ref) and [`selective_eval!`](@ref). """ -function lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges; kwargs...) +function lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges, + info::SelectiveEvalInfo=SelectiveEvalInfo(); + kwargs...) isrequired = falses(length(edges.preds)) objs = Set{GlobalRef}([obj]) - return lines_required!(isrequired, objs, src, edges; kwargs...) + return lines_required!(isrequired, objs, src, edges, info; kwargs...) end -function lines_required(idx::Int, src::CodeInfo, edges::CodeEdges; kwargs...) +function lines_required(idx::Int, src::CodeInfo, edges::CodeEdges, + info::SelectiveEvalInfo=SelectiveEvalInfo(); + kwargs...) isrequired = falses(length(edges.preds)) isrequired[idx] = true objs = Set{GlobalRef}() - return lines_required!(isrequired, objs, src, edges; kwargs...) + return lines_required!(isrequired, objs, src, edges, info; kwargs...) end """ - lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges; + lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges, + [info::SelectiveEvalInfo]; norequire = ()) Like `lines_required`, but where `isrequired[idx]` has already been set to `true` for all statements @@ -627,9 +685,11 @@ should _not_ be marked as a requirement. For example, use `norequire = LoweredCodeUtils.exclude_named_typedefs(src, edges)` if you're extracting method signatures and not evaluating new definitions. """ -function lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges; kwargs...) +function lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges, + info::SelectiveEvalInfo=SelectiveEvalInfo(); + kwargs...) objs = Set{GlobalRef}() - return lines_required!(isrequired, objs, src, edges; kwargs...) + return lines_required!(isrequired, objs, src, edges, info; kwargs...) end function exclude_named_typedefs(src::CodeInfo, edges::CodeEdges) @@ -649,7 +709,9 @@ function exclude_named_typedefs(src::CodeInfo, edges::CodeEdges) return norequire end -function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, edges::CodeEdges; norequire = ()) +function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, edges::CodeEdges, + info::SelectiveEvalInfo=SelectiveEvalInfo(); + norequire = ()) # Mark any requested objects (their lines of assignment) objs = add_requests!(isrequired, objs, edges, norequire) @@ -684,7 +746,10 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, end # now mark the active goto nodes - add_active_gotos!(isrequired, src, cfg, postdomtree) + add_active_gotos!(isrequired, src, cfg, postdomtree, info) + + # check if there are any implicit return blocks + record_implcit_return!(info, isrequired, cfg) return isrequired end @@ -762,19 +827,19 @@ end ## Add control-flow - # The goal of this function is to request concretization of the minimal necessary control # flow to evaluate statements whose concretization have already been requested. # The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active # block in the blocks reachable from a conditional branch up to its successors' nearest # common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow -# that conditional branch and execute the code. Otherwise, execution can be short-circuited +# that conditional branch and execute the code. Otherwise, execution can be short-cut # from the conditional branch to the nearest common post-dominator. # -# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`), -# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path -# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, -# a more careful implementation is required for this aspect. +# It is important to note that in Julia's intermediate code representation (`CodeInfo`), +# "short-cutting" a specific code region is not a simple task. Simply incrementing the +# program counter without executing the statements of 𝑰𝑵𝑭𝑳 blocks does not guarantee that +# the program counter fall-throughs to the post-dominator. +# To handle such cases, `selective_eval!` needs to use `SelectiveInterpreter`. # # [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984. function add_control_flow!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) @@ -849,8 +914,8 @@ function reachable_blocks(cfg, from_bb::Int, to_bb::Int) return visited end -function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) - dead_blocks = compute_dead_blocks(isrequired, src, cfg, postdomtree) +function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree, info::SelectiveEvalInfo) + dead_blocks = compute_dead_blocks!(isrequired, src, cfg, postdomtree, info) changed = false for bbidx = 1:length(cfg.blocks) if bbidx ∉ dead_blocks @@ -868,7 +933,7 @@ function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) end # find dead blocks using the same approach as `add_control_flow!`, for the converged `isrequired` -function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) +function compute_dead_blocks!(isrequired, src::CodeInfo, cfg::CFG, postdomtree, info::SelectiveEvalInfo) dead_blocks = BitSet() for bbidx = 1:length(cfg.blocks) bb = cfg.blocks[bbidx] @@ -889,6 +954,11 @@ function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) end if !is_𝑰𝑵𝑭𝑳_active union!(dead_blocks, delete!(𝑰𝑵𝑭𝑳, postdominator)) + if postdominator ≠ 0 + postdominator_bb = cfg.blocks[postdominator] + postdominator_entryidx = postdominator_bb.stmts[begin] + push!(info.shortcuts, CFGShortCut(termidx, postdominator_entryidx)) + end end end end @@ -896,6 +966,19 @@ function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) return dead_blocks end +function record_implcit_return!(info::SelectiveEvalInfo, isrequired, cfg::CFG) + for bbidx = 1:length(cfg.blocks) + bb = cfg.blocks[bbidx] + if isempty(bb.succs) + i = findfirst(idx::Int->!isrequired[idx], bb.stmts) + if !isnothing(i) + push!(info.implicit_returns, bb.stmts[i]) + end + end + end + nothing +end + # Do a traveral of "numbered" predecessors and find statement ranges and names of type definitions function find_typedefs(src::CodeInfo) typedef_blocks, typedef_names = UnitRange{Int}[], Symbol[] @@ -1018,26 +1101,19 @@ function add_inplace!(isrequired, src, edges, norequire) return changed end -""" - struct SelectiveInterpreter{S<:Interpreter,T<:AbstractVector{Bool}} <: Interpreter - inner::S - isrequired::T - end - -An `JuliaInterpreter.Interpreter` that executes only the statements marked `true` in `isrequired`. -Note that this interpreter does not recurse into callee frames. -That is, when `JuliaInterpreter.finish!(interp::SelectiveInterpreter, frame, ...)` is -performed, the `frame` will be executed selectively according to `interp.isrequired`, but -any callee frames within it will be executed by `interp.inner::Interpreter`, not by `interp`. -""" -struct SelectiveInterpreter{S<:Interpreter,T<:AbstractVector{Bool}} <: Interpreter - inner::S - isrequired::T -end function JuliaInterpreter.step_expr!(interp::SelectiveInterpreter, frame::Frame, istoplevel::Bool) pc = frame.pc + if pc in interp.info.implicit_returns + return nothing + elseif pc_expr(frame) isa GotoIfNot + for shortcut in interp.info.shortcuts + if shortcut.from == pc + return frame.pc = shortcut.to + end + end + end if interp.isrequired[pc] - step_expr!(interp.inner, frame::Frame, istoplevel::Bool) + step_expr!(interp.inner, frame, istoplevel) else next_or_nothing!(interp, frame) end @@ -1068,12 +1144,23 @@ See [`selective_eval_fromstart!`](@ref) to have that performed automatically. `isrequired` pertains only to `frame` itself, not any of its callees. +When `interp.info::SelectiveEvalInfo` is configured, the selective evaluation execution +becomes fully correct. Conversely, with the default `finish_and_return!`, selective +evaluation may not be necessarily correct for all possible Julia code (see +https://github.com/JuliaDebug/LoweredCodeUtils.jl/pull/99 for more details). + +Ensure that the specified `interp` is properly synchronized with `isrequired`. +Additionally note that, at present, it is not possible to recurse the `interp`. +In other words, there is no system in place for inforocedural selective evaluation. + This will return either a `BreakpointRef`, the value obtained from the last executed statement (if stored to `frame.framedata.ssavlues`), or `nothing`. Typically, assignment to a variable binding does not result in an ssa store by JuliaInterpreter. """ -selective_eval!(interp::Interpreter, frame::Frame, isrequired::AbstractVector{Bool}, istoplevel::Bool=false) = - JuliaInterpreter.finish_and_return!(SelectiveInterpreter(interp, isrequired), frame, istoplevel) +function selective_eval!(interp::Interpreter, frame::Frame, isrequired::AbstractVector{Bool}, istoplevel::Bool=false) + interp = SelectiveInterpreter(interp, isrequired, SelectiveEvalInfo()) + JuliaInterpreter.finish_and_return!(interp, frame, istoplevel) +end selective_eval!(args...) = selective_eval!(RecursiveInterpreter(), args...) """ diff --git a/src/packagedef.jl b/src/packagedef.jl index 21c7d21..3ff177b 100644 --- a/src/packagedef.jl +++ b/src/packagedef.jl @@ -24,7 +24,8 @@ const trackedheads = (:method,) # Revise uses this (for now), don't delete; a const structdecls = (:_structtype, :_abstracttype, :_primitivetype) export signature, rename_framemethods!, methoddef!, methoddefs!, bodymethod -export CodeEdges, lines_required, lines_required!, selective_eval!, selective_eval_fromstart! +export CodeEdges, SelectiveEvalInfo, SelectiveInterpreter, + lines_required, lines_required!, selective_eval!, selective_eval_fromstart! include("utils.jl") include("signatures.jl") diff --git a/src/signatures.jl b/src/signatures.jl index 3a3378c..fad03b7 100644 --- a/src/signatures.jl +++ b/src/signatures.jl @@ -334,6 +334,7 @@ function _rename_framemethods!(interp::Interpreter, frame::Frame, set_to_running_name!(interp, replacements, frame, methodinfos, selfcalls[idx], calledby, callee, caller) catch err @warn "skipping callee $callee (called by $caller) due to $err" + # showerror(stderr, err, stacktrace(catch_backtrace())) end end for sc in selfcalls diff --git a/test/codeedges.jl b/test/codeedges.jl index 902756d..f6d1c2e 100644 --- a/test/codeedges.jl +++ b/test/codeedges.jl @@ -217,6 +217,25 @@ module ModSelective end @test ModSelective.k11 == 0 @test 3 <= ModSelective.s11 <= 15 + # Final block is not a `return`: Need to use `controller::SelectiveEvalController` explicitly + ex = quote + x = 1 + yy = 7 + @label loop + x += 1 + x < 5 || return yy + @goto loop + end + frame = Frame(ModSelective, ex) + src = frame.framecode.src + edges = CodeEdges(ModSelective, src) + info = SelectiveEvalInfo() + isrequired = lines_required(GlobalRef(ModSelective, :x), src, edges, info) + interp = LoweredCodeUtils.SelectiveInterpreter(LoweredCodeUtils.RecursiveInterpreter(), isrequired, info) + JuliaInterpreter.finish_and_return!(interp, frame, true) + @test ModSelective.x == 5 + @test !isdefined(ModSelective, :yy) + # Control-flow in an abstract type definition ex = :(abstract type StructParent{T, N} <: AbstractArray{T, N} end) frame = Frame(ModSelective, ex) diff --git a/test/signatures.jl b/test/signatures.jl index 9d5d1fd..0afab9b 100644 --- a/test/signatures.jl +++ b/test/signatures.jl @@ -414,9 +414,10 @@ bodymethtest5(x, y=Dict(1=>2)) = 5 oldenv = Pkg.project().path try # we test with the old version of CBinding, let's do it in an isolated environment + # so we don't cause package conflicts with everything else Pkg.activate(; temp=true, io=devnull) - @info "Adding CBinding to the environment for test purposes" + @info "Adding CBinding v0.9.4 to the environment for test purposes" Pkg.add(; name="CBinding", version="0.9.4", io=devnull) # `@cstruct` isn't defined for v1.0 and above m = Module()