diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index f4adb47abf3d1..b78f6c29c05e6 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -30,6 +30,10 @@ let
             println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
         end
     end
+    # bootstraps for escape analysis
+    # NOTE make sure we first infer `find_escapes!`, which seems to be costly when run in interpreter,
+    # otherwise the bootstrap of `typeinf_ext` can be really slow
+    pushfirst!(fs, EscapeAnalysis.find_escapes!, EscapeAnalysis.escape_builtin!)
     starttime = time()
     for f in fs
         for m in _methods_by_ftype(Tuple{typeof(f), Vararg{Any}}, 10, typemax(UInt))
diff --git a/base/compiler/escape.jl b/base/compiler/escape.jl
new file mode 100644
index 0000000000000..63b9d91dd0c88
--- /dev/null
+++ b/base/compiler/escape.jl
@@ -0,0 +1,484 @@
+module EscapeAnalysis
+
+export find_escapes!
+
+import Core:
+    SimpleVector, GotoNode, GotoIfNot, Argument, IntrinsicFunction, Const, sizeof
+
+import ..Compiler:
+    Vector, IdDict, BitSet, MethodInstance, IRCode, SSAValue, PiNode, PhiNode,
+    PhiCNode, UpsilonNode, ReturnNode, IR_FLAG_EFFECT_FREE,
+    ==, !, !==, !=, ≠, :, ≤, &, |, +, -, *, <, <<, ∪, ∩, ⊆,
+    isbitstype, ismutabletype, widenconst, argextype, argtype_to_function, isexpr,
+    is_meta_expr_head, copy, zip, empty!, length, get, first, isassigned, push!, isempty,
+    @assert, @nospecialize
+
+isnothing(x) = x === nothing
+
+# analysis
+# ========
+
+"""
+    x::EscapeLattice
+
+A lattice for escape information, which holds the following properties:
+- `x.Analyzed::Bool`: not formally part of the lattice, indicates this statement has not been analyzed at all
+- `x.ReturnEscape::BitSet`: keeps SSA numbers of return statements where it can be returned to the caller
+  * `isempty(x.ReturnEscape)` means it never escapes to the caller
+  * otherwise it indicates it will escape to the caller via return (possibly as a field),
+    where `0 ∈ x.ReturnEscape` has the special meaning that it's visible to the caller
+    simply because it's passed as call argument
+- `x.ThrownEscape::Bool`: indicates it may escape to somewhere through an exception (possibly as a field)
+- `x.GlobalEscape::Bool`: indicates it may escape to a global space an exception (possibly as a field)
+- `x.ArgEscape::Int` (not implemented yet): indicates it will escape to the caller through `setfield!` on argument(s)
+  * `-1` : no escape
+  * `0` : unknown or multiple
+  * `n` : through argument N
+
+These attributes can be combined to create a partial lattice that has a finite height, given
+that input program has a finite number of statements, which is assured by Julia's semantics.
+
+There are utility constructors to create common `EscapeLattice`s, e.g.,
+- `NoEscape()`: the bottom element of this lattice, meaning it won't escape to anywhere
+- `AllEscape()`: the topmost element of this lattice, meaning it will escape to everywhere
+
+The escape analysis will transition these elements from the bottom to the top,
+in the same direction as Julia's native type inference routine.
+An abstract state will be initialized with the bottom(-like) elements:
+- the call arguments are initialized as `ArgumentReturnEscape()`, because they're visible from a caller immediately
+- the other states are initialized as `NotAnalyzed()`, which is a special lattice element that
+  is slightly lower than `NoEscape`, but at the same time doesn't represent any meaning
+  other than it's not analyzed yet (thus it's not formally part of the lattice).
+"""
+struct EscapeLattice
+    Analyzed::Bool
+    ReturnEscape::BitSet
+    ThrownEscape::Bool
+    GlobalEscape::Bool
+    # TODO: ArgEscape::Int
+end
+
+function ==(x::EscapeLattice, y::EscapeLattice)
+    return x.Analyzed === y.Analyzed &&
+           x.ReturnEscape == y.ReturnEscape &&
+           x.ThrownEscape === y.ThrownEscape &&
+           x.GlobalEscape === y.GlobalEscape
+end
+
+# lattice constructors
+# precompute default values in order to eliminate computations at callsites
+const NO_RETURN = BitSet()
+const ARGUMENT_RETURN = BitSet(0)
+NotAnalyzed() = EscapeLattice(false, NO_RETURN, false, false) # not formally part of the lattice
+NoEscape() = EscapeLattice(true, NO_RETURN, false, false)
+ReturnEscape(returns::BitSet) = EscapeLattice(true, returns, false, false)
+ReturnEscape(pc::Int) = ReturnEscape(BitSet(pc))
+ArgumentReturnEscape() = ReturnEscape(ARGUMENT_RETURN)
+ThrownEscape() = EscapeLattice(true, NO_RETURN, true, false)
+GlobalEscape() = EscapeLattice(true, NO_RETURN, false, true)
+let
+    all_return = BitSet(0:1000000)
+    global AllReturnEscape() = ReturnEscape(all_return) # used for `show`
+    global AllEscape() = EscapeLattice(true, all_return, true, true)
+end
+
+# Convenience names for some ⊑ queries
+export
+    has_not_analyzed,
+    has_no_escape,
+    has_return_escape,
+    has_thrown_escape,
+    has_global_escape,
+    has_all_escape,
+    can_elide_finalizer
+has_not_analyzed(x::EscapeLattice) = x == NotAnalyzed()
+has_no_escape(x::EscapeLattice) = x ⊑ NoEscape()
+has_return_escape(x::EscapeLattice) = !isempty(x.ReturnEscape)
+has_return_escape(x::EscapeLattice, pc::Int) = pc in x.ReturnEscape
+has_thrown_escape(x::EscapeLattice) = x.ThrownEscape
+has_global_escape(x::EscapeLattice) = x.GlobalEscape
+has_all_escape(x::EscapeLattice) = AllEscape() == x
+
+"""
+    can_elide_finalizer(x::EscapeLattice, pc::Int) -> Bool
+
+Queries the validity of the finalizer elision optimization at the `return` site of statement `pc`,
+which inserts `finalize` call when the lifetime of interested object ends.
+Note that we don't need to take `x.ThrownEscape` into account because it would have never
+been thrown when the program execution reaches the `return` site.
+"""
+function can_elide_finalizer(x::EscapeLattice, pc::Int)
+    x.GlobalEscape && return false
+    0 in x.ReturnEscape && return false
+    return pc ∉ x.ReturnEscape
+end
+
+"""
+    can_allocate_on_stack(x::EscapeLattice) -> Bool
+
+Queries the validity of heap-to-stack optimization, which allocates `mutable` object that
+`x` represents on stack rather than heap.
+The condition is almost same as `has_no_escape(x)`, but additionally we can ignore
+`ThrownEscape` if it's handled within analysis frame.
+"""
+function can_allocate_on_stack(x::EscapeLattice)
+    return x.Analyzed &&
+           !has_return_escape(x) &&
+           !has_unhandled_thrown_escape(x) &&
+           !x.GlobalEscape
+end
+
+function ⊑(x::EscapeLattice, y::EscapeLattice)
+    if x.Analyzed ≤ y.Analyzed &&
+       x.ReturnEscape ⊆ y.ReturnEscape &&
+       x.ThrownEscape ≤ y.ThrownEscape &&
+       x.GlobalEscape ≤ y.GlobalEscape
+       return true
+    end
+    return false
+end
+⋤(x::EscapeLattice, y::EscapeLattice) = ⊑(x, y) && !⊑(y, x)
+
+function ⊔(x::EscapeLattice, y::EscapeLattice)
+    return EscapeLattice(
+        x.Analyzed | y.Analyzed,
+        x.ReturnEscape ∪ y.ReturnEscape,
+        x.ThrownEscape | y.ThrownEscape,
+        x.GlobalEscape | y.GlobalEscape,
+        )
+end
+
+function ⊓(x::EscapeLattice, y::EscapeLattice)
+    return EscapeLattice(
+        x.Analyzed & y.Analyzed,
+        x.ReturnEscape ∩ y.ReturnEscape,
+        x.ThrownEscape & y.ThrownEscape,
+        x.GlobalEscape & y.GlobalEscape,
+        )
+end
+
+"""
+    state::EscapeState
+
+Extended lattice that maps arguments and SSA values to escape information represented as `EscapeLattice`:
+- `state.arguments::Vector{EscapeLattice}`: escape information about "arguments" – note that
+  "argument" can include both call arguments and slots appearing in analysis frame
+- `ssavalues::Vector{EscapeLattice}`: escape information about each SSA value
+"""
+struct EscapeState
+    arguments::Vector{EscapeLattice}
+    ssavalues::Vector{EscapeLattice}
+end
+function EscapeState(nslots::Int, nargs::Int, nstmts::Int)
+    arguments = EscapeLattice[
+        1 ≤ i ≤ nargs ? ArgumentReturnEscape() : NotAnalyzed() for i in 1:nslots]
+    ssavalues = EscapeLattice[NotAnalyzed() for _ in 1:nstmts]
+    return EscapeState(arguments, ssavalues)
+end
+
+const GLOBAL_ESCAPE_CACHE = IdDict{MethodInstance,EscapeState}()
+__clear_escape_cache!() = empty!(GLOBAL_ESCAPE_CACHE)
+
+const Changes = Vector{Tuple{Any,EscapeLattice}}
+
+"""
+    find_escapes!(ir::IRCode, nargs::Int) -> EscapeState
+
+Escape analysis implementation is based on the data-flow algorithm described in the paper [^MM02].
+The analysis works on the lattice of [`EscapeLattice`](@ref) and transitions lattice elements
+from the bottom to the top in a _backward_ way, i.e. data flows from usage cites to definitions,
+until every lattice gets converged to a fixed point by maintaining a (conceptual) working set
+that contains program counters corresponding to remaining SSA statements to be analyzed.
+Note that the analysis only manages a single global state, with some flow-sensitivity
+encoded as property of `EscapeLattice`.
+
+[^MM02]: A Graph-Free approach to Data-Flow Analysis.
+         Markas Mohnen, 2002, April.
+         <https://api.semanticscholar.org/CorpusID:28519618>
+"""
+function find_escapes!(ir::IRCode, nargs::Int)
+    (; stmts, sptypes, argtypes) = ir
+    nstmts = length(stmts)
+
+    # only manage a single state, some flow-sensitivity is encoded as `EscapeLattice` properties
+    state = EscapeState(length(ir.argtypes), nargs, nstmts)
+    changes = Changes() # stashes changes that happen at current statement
+
+    while true
+        local anyupdate = false
+
+        for pc in nstmts:-1:1
+            stmt = stmts.inst[pc]
+
+            # we escape statements with the `ThrownEscape` property using the effect-freeness
+            # information computed by the inliner
+            is_effect_free = stmts.flag[pc] & IR_FLAG_EFFECT_FREE ≠ 0
+
+            # collect escape information
+            if isa(stmt, Expr)
+                head = stmt.head
+                if head === :call
+                    has_changes = escape_call!(stmt.args, pc, state, ir, changes)
+                    if !is_effect_free
+                        add_changes!(stmt.args, ir, ThrownEscape(), changes)
+                    else
+                        has_changes || continue
+                    end
+                elseif head === :invoke
+                    escape_invoke!(stmt.args, pc, state, ir, changes)
+                elseif head === :new
+                    info = state.ssavalues[pc]
+                    info == NotAnalyzed() && (info = NoEscape())
+                    for arg in stmt.args[2:end]
+                        push!(changes, (arg, info))
+                    end
+                    push!(changes, (SSAValue(pc), info)) # we will be interested in if this allocation is not escape or not
+                elseif head === :splatnew
+                    info = state.ssavalues[pc]
+                    info == NotAnalyzed() && (info = NoEscape())
+                    # splatnew passes field values using a single tuple (args[2])
+                    push!(changes, (stmt.args[2], info))
+                    push!(changes, (SSAValue(pc), info)) # we will be interested in if this allocation is not escape or not
+                elseif head === :(=)
+                    lhs, rhs = stmt.args
+                    if isa(lhs, GlobalRef) # global store
+                        add_change!(rhs, ir, GlobalEscape(), changes)
+                    end
+                elseif head === :foreigncall
+                    # for foreigncall we simply escape every argument (args[6:length(args[3])])
+                    # and its name (args[1])
+                    # TODO: we can apply a similar strategy like builtin calls to specialize some foreigncalls
+                    foreigncall_nargs = length((stmt.args[3])::SimpleVector)
+                    name = stmt.args[1]
+                    # if normalize(name) === :jl_gc_add_finalizer_th
+                    #     continue # XXX assume this finalizer call is valid for finalizer elision
+                    # end
+                    push!(changes, (name, ThrownEscape()))
+                    add_changes!(stmt.args[6:5+foreigncall_nargs], ir, ThrownEscape(), changes)
+                elseif head === :throw_undef_if_not # XXX when is this expression inserted ?
+                    add_change!(stmt.args[1], ir, ThrownEscape(), changes)
+                elseif is_meta_expr_head(head)
+                    # meta expressions doesn't account for any usages
+                    continue
+                elseif head === :static_parameter
+                    # :static_parameter refers any of static parameters, but since they exist
+                    # statically, we're really not interested in their escapes
+                    continue
+                elseif head === :copyast
+                    # copyast simply copies a surface syntax AST, and should never use any of arguments or SSA values
+                    continue
+                elseif head === :undefcheck
+                    # undefcheck is temporarily inserted by compiler
+                    # it will be processd be later pass so it won't change any of escape states
+                    continue
+                elseif head === :the_exception
+                    # we don't propagate escape information on exceptions via this expression, but rather
+                    # use a dedicated lattice property `ThrownEscape`
+                    continue
+                elseif head === :isdefined
+                    # just returns `Bool`, nothing accounts for any usages
+                    continue
+                elseif head === :enter || head === :leave || head === :pop_exception
+                    # these exception frame managements doesn't account for any usages
+                    # we can just ignore escape information from
+                    continue
+                elseif head === :gc_preserve_begin || head === :gc_preserve_end
+                    # `GC.@preserve` may "use" arbitrary values, but we can just ignore the escape information
+                    # imposed on `GC.@preserve` expressions since they're supposed to never be used elsewhere
+                    continue
+                else
+                    add_changes!(stmt.args, ir, AllEscape(), changes)
+                end
+            elseif isa(stmt, GlobalRef) # global load
+                add_change!(SSAValue(pc), ir, GlobalEscape(), changes)
+            elseif isa(stmt, PiNode)
+                if isdefined(stmt, :val)
+                    info = state.ssavalues[pc]
+                    push!(changes, (stmt.val, info))
+                end
+            elseif isa(stmt, PhiNode)
+                info = state.ssavalues[pc]
+                values = stmt.values
+                for i in 1:length(values)
+                    if isassigned(values, i)
+                        push!(changes, (values[i], info))
+                    end
+                end
+            elseif isa(stmt, PhiCNode)
+                info = state.ssavalues[pc]
+                values = stmt.values
+                for i in 1:length(values)
+                    if isassigned(values, i)
+                        push!(changes, (values[i], info))
+                    end
+                end
+            elseif isa(stmt, UpsilonNode)
+                if isdefined(stmt, :val)
+                    info = state.ssavalues[pc]
+                    push!(changes, (stmt.val, info))
+                end
+            elseif isa(stmt, ReturnNode)
+                if isdefined(stmt, :val)
+                    add_change!(stmt.val, ir, ReturnEscape(pc), changes)
+                end
+            else
+                @assert stmt isa GotoNode || stmt isa GotoIfNot || stmt isa GlobalRef || isnothing(stmt) # TODO remove me
+                continue
+            end
+
+            isempty(changes) && continue
+
+            anyupdate |= propagate_changes!(state, changes)
+
+            empty!(changes)
+        end
+
+        anyupdate || break
+    end
+
+    for pc in 1:nstmts
+        # heap-to-stack optimization are carried for heap-allocated objects that are not escaped
+        if isexpr(stmts.inst[pc], :new) && ismutabletype(widenconst(stmts.type[pc])) && has_no_escape(state.ssavalues[pc])
+            stmts.flag[pc] |= IR_FLAG_NO_ESCAPE
+        end
+    end
+
+    return state
+end
+
+# propagate changes, and check convergence
+function propagate_changes!(state::EscapeState, changes::Changes)
+    local anychanged = false
+
+    for (x, info) in changes
+        if isa(x, Argument)
+            old = state.arguments[x.n]
+            new = old ⊔ info
+            if old ≠ new
+                state.arguments[x.n] = new
+                anychanged |= true
+            end
+        elseif isa(x, SSAValue)
+            old = state.ssavalues[x.id]
+            new = old ⊔ info
+            if old ≠ new
+                state.ssavalues[x.id] = new
+                anychanged |= true
+            end
+        end
+    end
+
+    return anychanged
+end
+
+function add_changes!(args::Vector{Any}, ir::IRCode, info::EscapeLattice, changes::Changes)
+    for x in args
+        add_change!(x, ir, info, changes)
+    end
+end
+
+function add_change!(@nospecialize(x), ir::IRCode, info::EscapeLattice, changes::Changes)
+    if !isbitstype(widenconst(argextype(x, ir, ir.sptypes, ir.argtypes)))
+        push!(changes, (x, info))
+    end
+end
+
+function escape_invoke!(args::Vector{Any}, pc::Int,
+                        state::EscapeState, ir::IRCode, changes::Changes)
+    linfo = first(args)::MethodInstance
+    linfostate = get(GLOBAL_ESCAPE_CACHE, linfo, nothing)
+    args = args[2:end]
+    if isnothing(linfostate)
+        add_changes!(args, ir, AllEscape(), changes)
+    else
+        retinfo = state.ssavalues[pc] # escape information imposed on the call statement
+        for i in 1:length(args)
+            arg = args[i]
+            arginfo = linfostate.arguments[i]
+            info = from_interprocedural(arginfo, retinfo)
+            push!(changes, (arg, info))
+        end
+    end
+end
+
+# reinterpret the escape information imposed on the callee argument (`arginfo`) in the
+# context of the caller frame using the escape information imposed on the return value (`retinfo`)
+function from_interprocedural(arginfo::EscapeLattice, retinfo::EscapeLattice)
+    ar = arginfo.ReturnEscape
+    @assert !isempty(ar) "invalid escape lattice element returned from inter-procedural context"
+    newarginfo = EscapeLattice(true, NO_RETURN, arginfo.ThrownEscape, arginfo.GlobalEscape)
+    if ar == ARGUMENT_RETURN
+        # if this is simply passed as the call argument, we can discard the `ReturnEscape`
+        # information and just propagate the other escape information
+        return newarginfo
+    else
+        # if this can be a return value, we have to merge it with the escape information
+        return newarginfo ⊔ retinfo
+    end
+end
+
+function escape_call!(args::Vector{Any}, pc::Int,
+                      state::EscapeState, ir::IRCode, changes::Changes)
+    ft = argextype(first(args), ir, ir.sptypes, ir.argtypes)
+    f = argtype_to_function(ft)
+    if isa(f, Core.IntrinsicFunction)
+        return false # COMBAK we may break soundness here, e.g. `pointerref`
+    else
+        ishandled = escape_builtin!(f, args, pc, state, ir, changes)::Union{Nothing,Bool}
+    end
+    isnothing(ishandled) && return false # nothing to propagate
+    if !ishandled
+        # if this call hasn't been handled by any of pre-defined handlers,
+        # we escape this call conservatively
+        add_changes!(args[2:end], ir, AllEscape(), changes)
+    end
+    return true
+end
+
+# TODO implement more builtins, make them more accurate
+# TODO use `T_IFUNC`-like logic and don't not abuse the dispatch
+
+escape_builtin!(@nospecialize(f), _...) = return false
+
+escape_builtin!(::typeof(isa), _...) = return nothing
+escape_builtin!(::typeof(typeof), _...) = return nothing
+escape_builtin!(::typeof(Core.sizeof), _...) = return nothing
+escape_builtin!(::typeof(===), _...) = return nothing
+
+function escape_builtin!(::typeof(ifelse), args::Vector{Any}, pc::Int, state::EscapeState, ir::IRCode, changes::Changes)
+    length(args) == 4 || return false
+    f, cond, th, el = args
+    info = state.ssavalues[pc]
+    condt = argextype(cond, ir, ir.sptypes, ir.argtypes)
+    if isa(condt, Const) && (cond = condt.val; isa(cond, Bool))
+        if cond
+            push!(changes, (th, info))
+        else
+            push!(changes, (el, info))
+        end
+    else
+        push!(changes, (th, info))
+        push!(changes, (el, info))
+    end
+    return true
+end
+
+function escape_builtin!(::typeof(tuple), args::Vector{Any}, pc::Int, state::EscapeState, ir::IRCode, changes::Changes)
+    info = state.ssavalues[pc]
+    info == NotAnalyzed() && (info = NoEscape())
+    add_changes!(args[2:end], ir, info, changes)
+    return true
+end
+
+# TODO don't propagate escape information to the 1st argument, but propagate information to aliased field
+function escape_builtin!(::typeof(getfield), args::Vector{Any}, pc::Int, state::EscapeState, ir::IRCode, changes::Changes)
+    info = state.ssavalues[pc]
+    info == NotAnalyzed() && (info = NoEscape())
+    # only propagate info when the field itself is non-bitstype
+    if !isbitstype(widenconst(ir.stmts.type[pc]))
+        add_changes!(args[2:end], ir, info, changes)
+    end
+    return true
+end
+
+end # module EscapeAnalysis
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 1898aa8b75778..96352173307ae 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -289,6 +289,9 @@ function optimize(interp::AbstractInterpreter, opt::OptimizationState, params::O
     finish(interp, opt, params, ir, result)
 end
 
+include("compiler/escape.jl")
+using .EscapeAnalysis
+
 function run_passes(ci::CodeInfo, sv::OptimizationState)
     preserve_coverage = coverage_enabled(sv.mod)
     ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, sv)
@@ -299,6 +302,10 @@ function run_passes(ci::CodeInfo, sv::OptimizationState)
     @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
     #@timeit "verify 2" verify_ir(ir)
     ir = compact!(ir)
+    svdef = sv.linfo.def
+    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
+    state = find_escapes!(ir, nargs+1)
+    EscapeAnalysis.GLOBAL_ESCAPE_CACHE[sv.linfo] = state
     #@Base.show ("before_sroa", ir)
     @timeit "SROA" ir = getfield_elim_pass!(ir)
     #@Base.show ir.new_nodes
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index ed09d5316473a..63182f0fd801e 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -27,6 +27,9 @@ function _all(@nospecialize(f), a)
     return true
 end
 
+all(itr) = all(identity, itr)
+all(f, itr) = _all(f, itr)
+
 function contains_is(itr, @nospecialize(x))
     for y in itr
         if y === x
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 48f0005440d5d..0e9103617f3af 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -1572,7 +1572,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             Success = ctx.builder.CreateZExt(Success, T_int8);
             jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
-            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false);
         }
         else if (isswapfield) {
             return ghostValue(jltype);
@@ -1581,7 +1581,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             jl_cgval_t oldval = ghostValue(jltype);
             jl_cgval_t argv[2] = { oldval, newval(oldval) };
             jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
-            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false);
         }
     }
     Value *intcast = nullptr;
@@ -1864,7 +1864,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
     if (ismodifyfield) {
         jl_cgval_t argv[2] = { oldval, rhs };
         jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
-        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false);
     }
     else if (!issetfield) { // swapfield or replacefield
         if (realelty != elty)
@@ -1883,7 +1883,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             Success = ctx.builder.CreateZExt(Success, T_int8);
             jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false);
         }
     }
     return oldval;
@@ -3347,12 +3347,12 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
             Success = ctx.builder.CreateZExt(Success, T_int8);
             jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false);
         }
         else if (ismodifyfield) {
             jl_cgval_t argv[2] = {oldval, rhs};
             jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv, false);
         }
         return oldval;
     }
@@ -3368,7 +3368,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
     }
 }
 
-static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv)
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool tag_metadata)
 {
     assert(jl_is_datatype(ty));
     assert(jl_is_concrete_type(ty));
@@ -3522,6 +3522,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
         }
         Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
                                      literal_pointer_val(ctx, (jl_value_t*)ty));
+        if (tag_metadata) {
+            MDNode* temp_node = MDNode::get(jl_LLVMContext, ConstantAsMetadata::get(ConstantInt::get(jl_LLVMContext, llvm::APInt(64, 0, false))));
+            MDNode* node = MDNode::get(jl_LLVMContext, temp_node);
+            ((CallInst *)strct)->setMetadata("julia.noescape", node);
+        }
         jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty);
         strct = decay_derived(ctx, strct);
         undef_derived_strct(ctx.builder, strct, sty, strctinfo.tbaa);
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 246534491151a..175a6ccf345ac 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -1170,7 +1170,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *t
                              jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
-static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool tag_metadata);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
@@ -2769,7 +2769,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             return true;
         }
         if (jl_is_tuple_type(rt) && jl_is_concrete_type(rt) && nargs == jl_datatype_nfields(rt)) {
-            *ret = emit_new_struct(ctx, rt, nargs, &argv[1]);
+            *ret = emit_new_struct(ctx, rt, nargs, &argv[1], false);
             return true;
         }
     }
@@ -4680,7 +4680,20 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                 jl_is_datatype(jl_tparam0(ty)) &&
                 jl_is_concrete_type(jl_tparam0(ty))) {
             assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1);
-            return emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1]);
+            // TODO: refactor the position of this flag
+            const uint8_t IR_FLAG_NO_ESCAPE = 0x01 << 5;
+            jl_code_info_t *info = ctx.source;
+            uint8_t ssaflag = jl_array_len(info->ssaflags) > ssaval ? ((uint8_t*)jl_array_data(info->ssaflags))[ssaval] : 0;
+            // uint8_t ssaflag = ((uint8_t*)jl_array_data(info->ssaflags))[ssaval];
+            bool tag_metadata = false;
+            if ((ssaflag & IR_FLAG_NO_ESCAPE) != 0) {
+                tag_metadata = true;
+                printf("locate no-escape flag set stmt !!!!!! %d %d\n", ssaflag & IR_FLAG_NO_ESCAPE, ssaval);
+                printf("file name: %s func name: %s\n", ctx.file.str().c_str(), ctx.funcName.c_str());
+                // jl_(expr);
+                // jl_(ctx.code);
+            }
+            return emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1], tag_metadata);
         }
         Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv, nargs, JLCALL_F_CC);
         // temporarily mark as `Any`, expecting `emit_ssaval_assign` to update
@@ -4768,7 +4781,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             jl_cgval_t env;
             // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
             if (jl_is_concrete_type((jl_value_t*)env_t)) {
-                env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-5, &argv.data()[5]);
+                env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-5, &argv.data()[5], false);
             }
             else {
                 Value *env_val = emit_jlcall(ctx, jltuple_func, V_rnull,
@@ -4826,7 +4839,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                 fptr
             };
 
-            jl_cgval_t ret = emit_new_struct(ctx, closure_t, 6, closure_fields);
+            jl_cgval_t ret = emit_new_struct(ctx, closure_t, 6, closure_fields, false);
 
             ctx.oc_modules.push_back(std::move(closure_m));
 
@@ -6818,7 +6831,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             if (jl_is_concrete_type(vi.value.typ)) {
-                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs);
+                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs, false);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
             else {
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index ec7060bd10a5e..f5867b23aee5c 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -74,6 +74,13 @@ static bool hasObjref(Type *ty)
     return false;
 }
 
+static bool hasJuliaNoEscapeMeta(CallInst *inst) {
+    if (inst->hasMetadataOtherThanDebugLoc()) {
+        MDNode *JLMD = inst->getMetadata("julia.noescape");
+        return JLMD != nullptr;
+    }
+    return false;
+}
 /**
  * Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca.
  * Uses that are not considered to escape the object (i.e. heap address) includes,
@@ -323,19 +330,6 @@ void Optimizer::optimizeAll()
         auto orig = item.first;
         size_t sz = item.second;
         checkInst(orig);
-        if (use_info.escaped) {
-            if (use_info.hastypeof)
-                optimizeTag(orig);
-            continue;
-        }
-        if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
-                                                           !use_info.refstore)) {
-            // No one took the address, no one reads anything and there's no meaningful
-            // preserve of fields (either no preserve/ccall or no object reference fields)
-            // We can just delete all the uses.
-            removeAlloc(orig);
-            continue;
-        }
         bool has_ref = false;
         bool has_refaggr = false;
         for (auto memop: use_info.memops) {
@@ -350,6 +344,23 @@ void Optimizer::optimizeAll()
                 }
             }
         }
+        // if (hasJuliaNoEscapeMeta(orig)) {
+        //     moveToStack(orig, sz, has_ref);
+        //     continue;
+        // }
+        if (use_info.escaped) {
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
+        }
+        if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
+                                                           !use_info.refstore)) {
+            // No one took the address, no one reads anything and there's no meaningful
+            // preserve of fields (either no preserve/ccall or no object reference fields)
+            // We can just delete all the uses.
+            removeAlloc(orig);
+            continue;
+        }
         if (!use_info.hasunknownmem && !use_info.addrescaped && !has_refaggr) {
             // No one actually care about the memory layout of this object, split it.
             splitOnStack(orig);
@@ -939,6 +950,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
     }
     insertLifetime(ptr, ConstantInt::get(pass.T_int64, sz), orig_inst);
     auto new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, pass.T_pjlvalue));
+    // if (hasJuliaNoEscapeMeta(orig_inst)) {
+    //     new_inst = cast<Instruction>(prolog_builder.CreateAddrSpaceCast(new_inst, pass.T_prjlvalue));
+    // }
     new_inst->takeName(orig_inst);
 
     auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
@@ -975,11 +989,19 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
     };
     // Both `orig_i` and `new_i` should be pointer of the same type
     // but possibly different address spaces. `new_i` is always in addrspace 0.
+    // printf("LLVM code before opt\n");
+    // llvm_dump(&F);
     auto replace_inst = [&] (Instruction *user) {
+        // llvm_dump(user);
         Instruction *orig_i = cur.orig_i;
         Instruction *new_i = cur.new_i;
+
+        // llvm_dump(orig_i);
+        // llvm_dump(new_i);
         if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
             user->replaceUsesOfWith(orig_i, new_i);
+            // printf("hit here\n");
+            // llvm_dump(user);
         }
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
@@ -1018,6 +1040,14 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
+            // if (auto call_inst = dyn_cast<CallInst>(orig_inst)) {
+            //     if (hasJuliaNoEscapeMeta(call_inst) && isa<AddrSpaceCastInst>(user)) {
+            //         auto *new_addrcast_inst = new AddrSpaceCastInst(new_i, user->getType(), "", user);
+            //         user->replaceAllUsesWith(new_addrcast_inst);
+            //         user->eraseFromParent();
+            //         return;
+            //     }
+            // }
             auto cast_t = PointerType::get(cast<PointerType>(user->getType())->getElementType(),
                                            0);
             auto replace_i = new_i;
@@ -1040,6 +1070,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             push_frame(gep, new_gep);
         }
         else {
+            printf("LLVM code before crashing:\n");
+            llvm_dump(user);
+            // llvm_dump(&F);
             abort();
         }
     };