diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl index f4adb47abf3d1..b78f6c29c05e6 100644 --- a/base/compiler/bootstrap.jl +++ b/base/compiler/bootstrap.jl @@ -30,6 +30,10 @@ let println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i))) end end + # bootstraps for escape analysis + # NOTE make sure we first infer `find_escapes!`, which seems to be costly when run in interpreter, + # otherwise the bootstrap of `typeinf_ext` can be really slow + pushfirst!(fs, EscapeAnalysis.find_escapes!, EscapeAnalysis.escape_builtin!) starttime = time() for f in fs for m in _methods_by_ftype(Tuple{typeof(f), Vararg{Any}}, 10, typemax(UInt)) diff --git a/base/compiler/escape.jl b/base/compiler/escape.jl new file mode 100644 index 0000000000000..63b9d91dd0c88 --- /dev/null +++ b/base/compiler/escape.jl @@ -0,0 +1,484 @@ +module EscapeAnalysis + +export find_escapes! + +import Core: + SimpleVector, GotoNode, GotoIfNot, Argument, IntrinsicFunction, Const, sizeof + +import ..Compiler: + Vector, IdDict, BitSet, MethodInstance, IRCode, SSAValue, PiNode, PhiNode, + PhiCNode, UpsilonNode, ReturnNode, IR_FLAG_EFFECT_FREE, + ==, !, !==, !=, ≠, :, ≤, &, |, +, -, *, <, <<, ∪, ∩, ⊆, + isbitstype, ismutabletype, widenconst, argextype, argtype_to_function, isexpr, + is_meta_expr_head, copy, zip, empty!, length, get, first, isassigned, push!, isempty, + @assert, @nospecialize + +isnothing(x) = x === nothing + +# analysis +# ======== + +""" + x::EscapeLattice + +A lattice for escape information, which holds the following properties: +- `x.Analyzed::Bool`: not formally part of the lattice, indicates this statement has not been analyzed at all +- `x.ReturnEscape::BitSet`: keeps SSA numbers of return statements where it can be returned to the caller + * `isempty(x.ReturnEscape)` means it never escapes to the caller + * otherwise it indicates it will escape to the caller via return (possibly as a field), + where `0 ∈ x.ReturnEscape` has the special meaning that it's visible to the caller + simply because it's passed as call argument +- `x.ThrownEscape::Bool`: indicates it may escape to somewhere through an exception (possibly as a field) +- `x.GlobalEscape::Bool`: indicates it may escape to a global space an exception (possibly as a field) +- `x.ArgEscape::Int` (not implemented yet): indicates it will escape to the caller through `setfield!` on argument(s) + * `-1` : no escape + * `0` : unknown or multiple + * `n` : through argument N + +These attributes can be combined to create a partial lattice that has a finite height, given +that input program has a finite number of statements, which is assured by Julia's semantics. + +There are utility constructors to create common `EscapeLattice`s, e.g., +- `NoEscape()`: the bottom element of this lattice, meaning it won't escape to anywhere +- `AllEscape()`: the topmost element of this lattice, meaning it will escape to everywhere + +The escape analysis will transition these elements from the bottom to the top, +in the same direction as Julia's native type inference routine. +An abstract state will be initialized with the bottom(-like) elements: +- the call arguments are initialized as `ArgumentReturnEscape()`, because they're visible from a caller immediately +- the other states are initialized as `NotAnalyzed()`, which is a special lattice element that + is slightly lower than `NoEscape`, but at the same time doesn't represent any meaning + other than it's not analyzed yet (thus it's not formally part of the lattice). +""" +struct EscapeLattice + Analyzed::Bool + ReturnEscape::BitSet + ThrownEscape::Bool + GlobalEscape::Bool + # TODO: ArgEscape::Int +end + +function ==(x::EscapeLattice, y::EscapeLattice) + return x.Analyzed === y.Analyzed && + x.ReturnEscape == y.ReturnEscape && + x.ThrownEscape === y.ThrownEscape && + x.GlobalEscape === y.GlobalEscape +end + +# lattice constructors +# precompute default values in order to eliminate computations at callsites +const NO_RETURN = BitSet() +const ARGUMENT_RETURN = BitSet(0) +NotAnalyzed() = EscapeLattice(false, NO_RETURN, false, false) # not formally part of the lattice +NoEscape() = EscapeLattice(true, NO_RETURN, false, false) +ReturnEscape(returns::BitSet) = EscapeLattice(true, returns, false, false) +ReturnEscape(pc::Int) = ReturnEscape(BitSet(pc)) +ArgumentReturnEscape() = ReturnEscape(ARGUMENT_RETURN) +ThrownEscape() = EscapeLattice(true, NO_RETURN, true, false) +GlobalEscape() = EscapeLattice(true, NO_RETURN, false, true) +let + all_return = BitSet(0:1000000) + global AllReturnEscape() = ReturnEscape(all_return) # used for `show` + global AllEscape() = EscapeLattice(true, all_return, true, true) +end + +# Convenience names for some ⊑ queries +export + has_not_analyzed, + has_no_escape, + has_return_escape, + has_thrown_escape, + has_global_escape, + has_all_escape, + can_elide_finalizer +has_not_analyzed(x::EscapeLattice) = x == NotAnalyzed() +has_no_escape(x::EscapeLattice) = x ⊑ NoEscape() +has_return_escape(x::EscapeLattice) = !isempty(x.ReturnEscape) +has_return_escape(x::EscapeLattice, pc::Int) = pc in x.ReturnEscape +has_thrown_escape(x::EscapeLattice) = x.ThrownEscape +has_global_escape(x::EscapeLattice) = x.GlobalEscape +has_all_escape(x::EscapeLattice) = AllEscape() == x + +""" + can_elide_finalizer(x::EscapeLattice, pc::Int) -> Bool + +Queries the validity of the finalizer elision optimization at the `return` site of statement `pc`, +which inserts `finalize` call when the lifetime of interested object ends. +Note that we don't need to take `x.ThrownEscape` into account because it would have never +been thrown when the program execution reaches the `return` site. +""" +function can_elide_finalizer(x::EscapeLattice, pc::Int) + x.GlobalEscape && return false + 0 in x.ReturnEscape && return false + return pc ∉ x.ReturnEscape +end + +""" + can_allocate_on_stack(x::EscapeLattice) -> Bool + +Queries the validity of heap-to-stack optimization, which allocates `mutable` object that +`x` represents on stack rather than heap. +The condition is almost same as `has_no_escape(x)`, but additionally we can ignore +`ThrownEscape` if it's handled within analysis frame. +""" +function can_allocate_on_stack(x::EscapeLattice) + return x.Analyzed && + !has_return_escape(x) && + !has_unhandled_thrown_escape(x) && + !x.GlobalEscape +end + +function ⊑(x::EscapeLattice, y::EscapeLattice) + if x.Analyzed ≤ y.Analyzed && + x.ReturnEscape ⊆ y.ReturnEscape && + x.ThrownEscape ≤ y.ThrownEscape && + x.GlobalEscape ≤ y.GlobalEscape + return true + end + return false +end +⋤(x::EscapeLattice, y::EscapeLattice) = ⊑(x, y) && !⊑(y, x) + +function ⊔(x::EscapeLattice, y::EscapeLattice) + return EscapeLattice( + x.Analyzed | y.Analyzed, + x.ReturnEscape ∪ y.ReturnEscape, + x.ThrownEscape | y.ThrownEscape, + x.GlobalEscape | y.GlobalEscape, + ) +end + +function ⊓(x::EscapeLattice, y::EscapeLattice) + return EscapeLattice( + x.Analyzed & y.Analyzed, + x.ReturnEscape ∩ y.ReturnEscape, + x.ThrownEscape & y.ThrownEscape, + x.GlobalEscape & y.GlobalEscape, + ) +end + +""" + state::EscapeState + +Extended lattice that maps arguments and SSA values to escape information represented as `EscapeLattice`: +- `state.arguments::Vector{EscapeLattice}`: escape information about "arguments" – note that + "argument" can include both call arguments and slots appearing in analysis frame +- `ssavalues::Vector{EscapeLattice}`: escape information about each SSA value +""" +struct EscapeState + arguments::Vector{EscapeLattice} + ssavalues::Vector{EscapeLattice} +end +function EscapeState(nslots::Int, nargs::Int, nstmts::Int) + arguments = EscapeLattice[ + 1 ≤ i ≤ nargs ? ArgumentReturnEscape() : NotAnalyzed() for i in 1:nslots] + ssavalues = EscapeLattice[NotAnalyzed() for _ in 1:nstmts] + return EscapeState(arguments, ssavalues) +end + +const GLOBAL_ESCAPE_CACHE = IdDict{MethodInstance,EscapeState}() +__clear_escape_cache!() = empty!(GLOBAL_ESCAPE_CACHE) + +const Changes = Vector{Tuple{Any,EscapeLattice}} + +""" + find_escapes!(ir::IRCode, nargs::Int) -> EscapeState + +Escape analysis implementation is based on the data-flow algorithm described in the paper [^MM02]. +The analysis works on the lattice of [`EscapeLattice`](@ref) and transitions lattice elements +from the bottom to the top in a _backward_ way, i.e. data flows from usage cites to definitions, +until every lattice gets converged to a fixed point by maintaining a (conceptual) working set +that contains program counters corresponding to remaining SSA statements to be analyzed. +Note that the analysis only manages a single global state, with some flow-sensitivity +encoded as property of `EscapeLattice`. + +[^MM02]: A Graph-Free approach to Data-Flow Analysis. + Markas Mohnen, 2002, April. + +""" +function find_escapes!(ir::IRCode, nargs::Int) + (; stmts, sptypes, argtypes) = ir + nstmts = length(stmts) + + # only manage a single state, some flow-sensitivity is encoded as `EscapeLattice` properties + state = EscapeState(length(ir.argtypes), nargs, nstmts) + changes = Changes() # stashes changes that happen at current statement + + while true + local anyupdate = false + + for pc in nstmts:-1:1 + stmt = stmts.inst[pc] + + # we escape statements with the `ThrownEscape` property using the effect-freeness + # information computed by the inliner + is_effect_free = stmts.flag[pc] & IR_FLAG_EFFECT_FREE ≠ 0 + + # collect escape information + if isa(stmt, Expr) + head = stmt.head + if head === :call + has_changes = escape_call!(stmt.args, pc, state, ir, changes) + if !is_effect_free + add_changes!(stmt.args, ir, ThrownEscape(), changes) + else + has_changes || continue + end + elseif head === :invoke + escape_invoke!(stmt.args, pc, state, ir, changes) + elseif head === :new + info = state.ssavalues[pc] + info == NotAnalyzed() && (info = NoEscape()) + for arg in stmt.args[2:end] + push!(changes, (arg, info)) + end + push!(changes, (SSAValue(pc), info)) # we will be interested in if this allocation is not escape or not + elseif head === :splatnew + info = state.ssavalues[pc] + info == NotAnalyzed() && (info = NoEscape()) + # splatnew passes field values using a single tuple (args[2]) + push!(changes, (stmt.args[2], info)) + push!(changes, (SSAValue(pc), info)) # we will be interested in if this allocation is not escape or not + elseif head === :(=) + lhs, rhs = stmt.args + if isa(lhs, GlobalRef) # global store + add_change!(rhs, ir, GlobalEscape(), changes) + end + elseif head === :foreigncall + # for foreigncall we simply escape every argument (args[6:length(args[3])]) + # and its name (args[1]) + # TODO: we can apply a similar strategy like builtin calls to specialize some foreigncalls + foreigncall_nargs = length((stmt.args[3])::SimpleVector) + name = stmt.args[1] + # if normalize(name) === :jl_gc_add_finalizer_th + # continue # XXX assume this finalizer call is valid for finalizer elision + # end + push!(changes, (name, ThrownEscape())) + add_changes!(stmt.args[6:5+foreigncall_nargs], ir, ThrownEscape(), changes) + elseif head === :throw_undef_if_not # XXX when is this expression inserted ? + add_change!(stmt.args[1], ir, ThrownEscape(), changes) + elseif is_meta_expr_head(head) + # meta expressions doesn't account for any usages + continue + elseif head === :static_parameter + # :static_parameter refers any of static parameters, but since they exist + # statically, we're really not interested in their escapes + continue + elseif head === :copyast + # copyast simply copies a surface syntax AST, and should never use any of arguments or SSA values + continue + elseif head === :undefcheck + # undefcheck is temporarily inserted by compiler + # it will be processd be later pass so it won't change any of escape states + continue + elseif head === :the_exception + # we don't propagate escape information on exceptions via this expression, but rather + # use a dedicated lattice property `ThrownEscape` + continue + elseif head === :isdefined + # just returns `Bool`, nothing accounts for any usages + continue + elseif head === :enter || head === :leave || head === :pop_exception + # these exception frame managements doesn't account for any usages + # we can just ignore escape information from + continue + elseif head === :gc_preserve_begin || head === :gc_preserve_end + # `GC.@preserve` may "use" arbitrary values, but we can just ignore the escape information + # imposed on `GC.@preserve` expressions since they're supposed to never be used elsewhere + continue + else + add_changes!(stmt.args, ir, AllEscape(), changes) + end + elseif isa(stmt, GlobalRef) # global load + add_change!(SSAValue(pc), ir, GlobalEscape(), changes) + elseif isa(stmt, PiNode) + if isdefined(stmt, :val) + info = state.ssavalues[pc] + push!(changes, (stmt.val, info)) + end + elseif isa(stmt, PhiNode) + info = state.ssavalues[pc] + values = stmt.values + for i in 1:length(values) + if isassigned(values, i) + push!(changes, (values[i], info)) + end + end + elseif isa(stmt, PhiCNode) + info = state.ssavalues[pc] + values = stmt.values + for i in 1:length(values) + if isassigned(values, i) + push!(changes, (values[i], info)) + end + end + elseif isa(stmt, UpsilonNode) + if isdefined(stmt, :val) + info = state.ssavalues[pc] + push!(changes, (stmt.val, info)) + end + elseif isa(stmt, ReturnNode) + if isdefined(stmt, :val) + add_change!(stmt.val, ir, ReturnEscape(pc), changes) + end + else + @assert stmt isa GotoNode || stmt isa GotoIfNot || stmt isa GlobalRef || isnothing(stmt) # TODO remove me + continue + end + + isempty(changes) && continue + + anyupdate |= propagate_changes!(state, changes) + + empty!(changes) + end + + anyupdate || break + end + + for pc in 1:nstmts + # heap-to-stack optimization are carried for heap-allocated objects that are not escaped + if isexpr(stmts.inst[pc], :new) && ismutabletype(widenconst(stmts.type[pc])) && has_no_escape(state.ssavalues[pc]) + stmts.flag[pc] |= IR_FLAG_NO_ESCAPE + end + end + + return state +end + +# propagate changes, and check convergence +function propagate_changes!(state::EscapeState, changes::Changes) + local anychanged = false + + for (x, info) in changes + if isa(x, Argument) + old = state.arguments[x.n] + new = old ⊔ info + if old ≠ new + state.arguments[x.n] = new + anychanged |= true + end + elseif isa(x, SSAValue) + old = state.ssavalues[x.id] + new = old ⊔ info + if old ≠ new + state.ssavalues[x.id] = new + anychanged |= true + end + end + end + + return anychanged +end + +function add_changes!(args::Vector{Any}, ir::IRCode, info::EscapeLattice, changes::Changes) + for x in args + add_change!(x, ir, info, changes) + end +end + +function add_change!(@nospecialize(x), ir::IRCode, info::EscapeLattice, changes::Changes) + if !isbitstype(widenconst(argextype(x, ir, ir.sptypes, ir.argtypes))) + push!(changes, (x, info)) + end +end + +function escape_invoke!(args::Vector{Any}, pc::Int, + state::EscapeState, ir::IRCode, changes::Changes) + linfo = first(args)::MethodInstance + linfostate = get(GLOBAL_ESCAPE_CACHE, linfo, nothing) + args = args[2:end] + if isnothing(linfostate) + add_changes!(args, ir, AllEscape(), changes) + else + retinfo = state.ssavalues[pc] # escape information imposed on the call statement + for i in 1:length(args) + arg = args[i] + arginfo = linfostate.arguments[i] + info = from_interprocedural(arginfo, retinfo) + push!(changes, (arg, info)) + end + end +end + +# reinterpret the escape information imposed on the callee argument (`arginfo`) in the +# context of the caller frame using the escape information imposed on the return value (`retinfo`) +function from_interprocedural(arginfo::EscapeLattice, retinfo::EscapeLattice) + ar = arginfo.ReturnEscape + @assert !isempty(ar) "invalid escape lattice element returned from inter-procedural context" + newarginfo = EscapeLattice(true, NO_RETURN, arginfo.ThrownEscape, arginfo.GlobalEscape) + if ar == ARGUMENT_RETURN + # if this is simply passed as the call argument, we can discard the `ReturnEscape` + # information and just propagate the other escape information + return newarginfo + else + # if this can be a return value, we have to merge it with the escape information + return newarginfo ⊔ retinfo + end +end + +function escape_call!(args::Vector{Any}, pc::Int, + state::EscapeState, ir::IRCode, changes::Changes) + ft = argextype(first(args), ir, ir.sptypes, ir.argtypes) + f = argtype_to_function(ft) + if isa(f, Core.IntrinsicFunction) + return false # COMBAK we may break soundness here, e.g. `pointerref` + else + ishandled = escape_builtin!(f, args, pc, state, ir, changes)::Union{Nothing,Bool} + end + isnothing(ishandled) && return false # nothing to propagate + if !ishandled + # if this call hasn't been handled by any of pre-defined handlers, + # we escape this call conservatively + add_changes!(args[2:end], ir, AllEscape(), changes) + end + return true +end + +# TODO implement more builtins, make them more accurate +# TODO use `T_IFUNC`-like logic and don't not abuse the dispatch + +escape_builtin!(@nospecialize(f), _...) = return false + +escape_builtin!(::typeof(isa), _...) = return nothing +escape_builtin!(::typeof(typeof), _...) = return nothing +escape_builtin!(::typeof(Core.sizeof), _...) = return nothing +escape_builtin!(::typeof(===), _...) = return nothing + +function escape_builtin!(::typeof(ifelse), args::Vector{Any}, pc::Int, state::EscapeState, ir::IRCode, changes::Changes) + length(args) == 4 || return false + f, cond, th, el = args + info = state.ssavalues[pc] + condt = argextype(cond, ir, ir.sptypes, ir.argtypes) + if isa(condt, Const) && (cond = condt.val; isa(cond, Bool)) + if cond + push!(changes, (th, info)) + else + push!(changes, (el, info)) + end + else + push!(changes, (th, info)) + push!(changes, (el, info)) + end + return true +end + +function escape_builtin!(::typeof(tuple), args::Vector{Any}, pc::Int, state::EscapeState, ir::IRCode, changes::Changes) + info = state.ssavalues[pc] + info == NotAnalyzed() && (info = NoEscape()) + add_changes!(args[2:end], ir, info, changes) + return true +end + +# TODO don't propagate escape information to the 1st argument, but propagate information to aliased field +function escape_builtin!(::typeof(getfield), args::Vector{Any}, pc::Int, state::EscapeState, ir::IRCode, changes::Changes) + info = state.ssavalues[pc] + info == NotAnalyzed() && (info = NoEscape()) + # only propagate info when the field itself is non-bitstype + if !isbitstype(widenconst(ir.stmts.type[pc])) + add_changes!(args[2:end], ir, info, changes) + end + return true +end + +end # module EscapeAnalysis diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 1898aa8b75778..96352173307ae 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -289,6 +289,9 @@ function optimize(interp::AbstractInterpreter, opt::OptimizationState, params::O finish(interp, opt, params, ir, result) end +include("compiler/escape.jl") +using .EscapeAnalysis + function run_passes(ci::CodeInfo, sv::OptimizationState) preserve_coverage = coverage_enabled(sv.mod) ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, sv) @@ -299,6 +302,10 @@ function run_passes(ci::CodeInfo, sv::OptimizationState) @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) #@timeit "verify 2" verify_ir(ir) ir = compact!(ir) + svdef = sv.linfo.def + nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0 + state = find_escapes!(ir, nargs+1) + EscapeAnalysis.GLOBAL_ESCAPE_CACHE[sv.linfo] = state #@Base.show ("before_sroa", ir) @timeit "SROA" ir = getfield_elim_pass!(ir) #@Base.show ir.new_nodes diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index ed09d5316473a..63182f0fd801e 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -27,6 +27,9 @@ function _all(@nospecialize(f), a) return true end +all(itr) = all(identity, itr) +all(f, itr) = _all(f, itr) + function contains_is(itr, @nospecialize(x)) for y in itr if y === x diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 48f0005440d5d..0e9103617f3af 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1572,7 +1572,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, Success = ctx.builder.CreateZExt(Success, T_int8); jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)}; jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype); - return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv); + return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false); } else if (isswapfield) { return ghostValue(jltype); @@ -1581,7 +1581,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, jl_cgval_t oldval = ghostValue(jltype); jl_cgval_t argv[2] = { oldval, newval(oldval) }; jl_datatype_t *rettyp = jl_apply_modify_type(jltype); - return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv); + return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false); } } Value *intcast = nullptr; @@ -1864,7 +1864,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (ismodifyfield) { jl_cgval_t argv[2] = { oldval, rhs }; jl_datatype_t *rettyp = jl_apply_modify_type(jltype); - oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv); + oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false); } else if (!issetfield) { // swapfield or replacefield if (realelty != elty) @@ -1883,7 +1883,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, Success = ctx.builder.CreateZExt(Success, T_int8); jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)}; jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype); - oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv); + oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false); } } return oldval; @@ -3347,12 +3347,12 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx, Success = ctx.builder.CreateZExt(Success, T_int8); jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)}; jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty); - oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv); + oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false); } else if (ismodifyfield) { jl_cgval_t argv[2] = {oldval, rhs}; jl_datatype_t *rettyp = jl_apply_modify_type(jfty); - oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv); + oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false); } return oldval; } @@ -3368,7 +3368,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx, } } -static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv) +static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool tag_metadata) { assert(jl_is_datatype(ty)); assert(jl_is_concrete_type(ty)); @@ -3522,6 +3522,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), literal_pointer_val(ctx, (jl_value_t*)ty)); + if (tag_metadata) { + MDNode* temp_node = MDNode::get(jl_LLVMContext, ConstantAsMetadata::get(ConstantInt::get(jl_LLVMContext, llvm::APInt(64, 0, false)))); + MDNode* node = MDNode::get(jl_LLVMContext, temp_node); + ((CallInst *)strct)->setMetadata("julia.noescape", node); + } jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty); strct = decay_derived(ctx, strct); undef_derived_strct(ctx.builder, strct, sty, strctinfo.tbaa); diff --git a/src/codegen.cpp b/src/codegen.cpp index 246534491151a..175a6ccf345ac 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1170,7 +1170,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *t jl_cgval_t *args, size_t nargs, CallingConv::ID cc); static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2, Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr); -static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv); +static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool tag_metadata); static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p); static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G); @@ -2769,7 +2769,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, return true; } if (jl_is_tuple_type(rt) && jl_is_concrete_type(rt) && nargs == jl_datatype_nfields(rt)) { - *ret = emit_new_struct(ctx, rt, nargs, &argv[1]); + *ret = emit_new_struct(ctx, rt, nargs, &argv[1], false); return true; } } @@ -4680,7 +4680,20 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) jl_is_datatype(jl_tparam0(ty)) && jl_is_concrete_type(jl_tparam0(ty))) { assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1); - return emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1]); + // TODO: refactor the position of this flag + const uint8_t IR_FLAG_NO_ESCAPE = 0x01 << 5; + jl_code_info_t *info = ctx.source; + uint8_t ssaflag = jl_array_len(info->ssaflags) > ssaval ? ((uint8_t*)jl_array_data(info->ssaflags))[ssaval] : 0; + // uint8_t ssaflag = ((uint8_t*)jl_array_data(info->ssaflags))[ssaval]; + bool tag_metadata = false; + if ((ssaflag & IR_FLAG_NO_ESCAPE) != 0) { + tag_metadata = true; + printf("locate no-escape flag set stmt !!!!!! %d %d\n", ssaflag & IR_FLAG_NO_ESCAPE, ssaval); + printf("file name: %s func name: %s\n", ctx.file.str().c_str(), ctx.funcName.c_str()); + // jl_(expr); + // jl_(ctx.code); + } + return emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1], tag_metadata); } Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv, nargs, JLCALL_F_CC); // temporarily mark as `Any`, expecting `emit_ssaval_assign` to update @@ -4768,7 +4781,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) jl_cgval_t env; // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC if (jl_is_concrete_type((jl_value_t*)env_t)) { - env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-5, &argv.data()[5]); + env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-5, &argv.data()[5], false); } else { Value *env_val = emit_jlcall(ctx, jltuple_func, V_rnull, @@ -4826,7 +4839,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) fptr }; - jl_cgval_t ret = emit_new_struct(ctx, closure_t, 6, closure_fields); + jl_cgval_t ret = emit_new_struct(ctx, closure_t, 6, closure_fields, false); ctx.oc_modules.push_back(std::move(closure_m)); @@ -6818,7 +6831,7 @@ static std::pair, jl_llvm_functions_t> vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed); } if (jl_is_concrete_type(vi.value.typ)) { - jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs); + jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs, false); emit_varinfo_assign(ctx, vi, tuple); } else { diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index ec7060bd10a5e..f5867b23aee5c 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -74,6 +74,13 @@ static bool hasObjref(Type *ty) return false; } +static bool hasJuliaNoEscapeMeta(CallInst *inst) { + if (inst->hasMetadataOtherThanDebugLoc()) { + MDNode *JLMD = inst->getMetadata("julia.noescape"); + return JLMD != nullptr; + } + return false; +} /** * Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca. * Uses that are not considered to escape the object (i.e. heap address) includes, @@ -323,19 +330,6 @@ void Optimizer::optimizeAll() auto orig = item.first; size_t sz = item.second; checkInst(orig); - if (use_info.escaped) { - if (use_info.hastypeof) - optimizeTag(orig); - continue; - } - if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve || - !use_info.refstore)) { - // No one took the address, no one reads anything and there's no meaningful - // preserve of fields (either no preserve/ccall or no object reference fields) - // We can just delete all the uses. - removeAlloc(orig); - continue; - } bool has_ref = false; bool has_refaggr = false; for (auto memop: use_info.memops) { @@ -350,6 +344,23 @@ void Optimizer::optimizeAll() } } } + // if (hasJuliaNoEscapeMeta(orig)) { + // moveToStack(orig, sz, has_ref); + // continue; + // } + if (use_info.escaped) { + if (use_info.hastypeof) + optimizeTag(orig); + continue; + } + if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve || + !use_info.refstore)) { + // No one took the address, no one reads anything and there's no meaningful + // preserve of fields (either no preserve/ccall or no object reference fields) + // We can just delete all the uses. + removeAlloc(orig); + continue; + } if (!use_info.hasunknownmem && !use_info.addrescaped && !has_refaggr) { // No one actually care about the memory layout of this object, split it. splitOnStack(orig); @@ -939,6 +950,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) } insertLifetime(ptr, ConstantInt::get(pass.T_int64, sz), orig_inst); auto new_inst = cast(prolog_builder.CreateBitCast(ptr, pass.T_pjlvalue)); + // if (hasJuliaNoEscapeMeta(orig_inst)) { + // new_inst = cast(prolog_builder.CreateAddrSpaceCast(new_inst, pass.T_prjlvalue)); + // } new_inst->takeName(orig_inst); auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) { @@ -975,11 +989,19 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) }; // Both `orig_i` and `new_i` should be pointer of the same type // but possibly different address spaces. `new_i` is always in addrspace 0. + // printf("LLVM code before opt\n"); + // llvm_dump(&F); auto replace_inst = [&] (Instruction *user) { + // llvm_dump(user); Instruction *orig_i = cur.orig_i; Instruction *new_i = cur.new_i; + + // llvm_dump(orig_i); + // llvm_dump(new_i); if (isa(user) || isa(user)) { user->replaceUsesOfWith(orig_i, new_i); + // printf("hit here\n"); + // llvm_dump(user); } else if (auto call = dyn_cast(user)) { auto callee = call->getCalledOperand(); @@ -1018,6 +1040,14 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) user->replaceUsesOfWith(orig_i, replace); } else if (isa(user) || isa(user)) { + // if (auto call_inst = dyn_cast(orig_inst)) { + // if (hasJuliaNoEscapeMeta(call_inst) && isa(user)) { + // auto *new_addrcast_inst = new AddrSpaceCastInst(new_i, user->getType(), "", user); + // user->replaceAllUsesWith(new_addrcast_inst); + // user->eraseFromParent(); + // return; + // } + // } auto cast_t = PointerType::get(cast(user->getType())->getElementType(), 0); auto replace_i = new_i; @@ -1040,6 +1070,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) push_frame(gep, new_gep); } else { + printf("LLVM code before crashing:\n"); + llvm_dump(user); + // llvm_dump(&F); abort(); } };