From c291735022e360fd321e6ea6c1407a01a23ff3c5 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Tue, 4 Nov 2025 14:17:14 +0000 Subject: [PATCH 1/6] Try out set_valid_worlds! instead of manual optimisation --- src/copyable_task.jl | 10 +++-- src/utils.jl | 88 +++++++++++++++----------------------------- 2 files changed, 35 insertions(+), 63 deletions(-) diff --git a/src/copyable_task.jl b/src/copyable_task.jl index 98ea8b69..26768f38 100644 --- a/src/copyable_task.jl +++ b/src/copyable_task.jl @@ -79,7 +79,8 @@ function build_callable(sig::Type{<:Tuple}) TapedTask from that function.""" throw(ArgumentError(msg)) end - key = CacheKey(Base.get_world_counter(), sig) + world_age = Base.get_world_counter() + key = CacheKey(world_age, sig) if haskey(mc_cache, key) return fresh_copy(mc_cache[key]) else @@ -88,11 +89,12 @@ function build_callable(sig::Type{<:Tuple}) isva = which(sig).isva bb, refs, types = derive_copyable_task_ir(BBCode(ir)) unoptimised_ir = IRCode(bb) + @static if VERSION > v"1.12-" + unoptimised_ir = set_valid_world!(unoptimised_ir, world_age) + end optimised_ir = optimise_ir!(unoptimised_ir) mc_ret_type = callable_ret_type(sig, types) - mc = optimized_misty_closure( - mc_ret_type, optimised_ir, refs...; isva=isva, do_compile=true - ) + mc = misty_closure(mc_ret_type, optimised_ir, refs...; isva=isva, do_compile=true) mc_cache[key] = mc return mc, refs[end] end diff --git a/src/utils.jl b/src/utils.jl index cd958ae1..140fe321 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -220,55 +220,6 @@ function opaque_closure( )::Core.OpaqueClosure{sig,ret_type} end -function optimized_opaque_closure(rtype, ir::IRCode, env...; kwargs...) - oc = opaque_closure(rtype, ir, env...; kwargs...) - world = UInt(oc.world) - set_world_bounds_for_optimization!(oc) - optimized_oc = optimize_opaque_closure(oc, rtype, env...; kwargs...) - return optimized_oc -end - -function optimize_opaque_closure(oc::Core.OpaqueClosure, rtype, env...; kwargs...) - method = oc.source - ci = method.specializations.cache - world = UInt(oc.world) - ir = reinfer_and_inline(ci, world) - ir === nothing && return oc # nothing to optimize - return opaque_closure(rtype, ir, env...; kwargs...) -end - -# Allows optimization to make assumptions about binding access, -# enabling inlining and other optimizations. -function set_world_bounds_for_optimization!(oc::Core.OpaqueClosure) - ci = oc.source.specializations.cache - ci.inferred === nothing && return nothing - ci.inferred.min_world = oc.world - return ci.inferred.max_world = oc.world -end - -function reinfer_and_inline(ci::Core.CodeInstance, world::UInt) - interp = CC.NativeInterpreter(world) - mi = get_mi(ci) - argtypes = collect(Any, mi.specTypes.parameters) - irsv = CC.IRInterpretationState(interp, ci, mi, argtypes, world) - irsv === nothing && return nothing - for stmt in irsv.ir.stmts - inst = stmt[:inst] - if Meta.isexpr(inst, :loopinfo) || - Meta.isexpr(inst, :pop_exception) || - isa(inst, CC.GotoIfNot) || - isa(inst, CC.GotoNode) || - Meta.isexpr(inst, :copyast) - continue - end - stmt[:flag] |= CC.IR_FLAG_REFINED - end - CC.ir_abstract_constant_propagation(interp, irsv) - state = CC.InliningState(interp) - ir = CC.ssa_inlining_pass!(irsv.ir, state, CC.propagate_inbounds(irsv)) - return ir -end - """ misty_closure( ret_type::Type, @@ -291,14 +242,33 @@ function misty_closure( return MistyClosure(opaque_closure(ret_type, ir, env...; isva, do_compile), Ref(ir)) end -function optimized_misty_closure( - ret_type::Type, - ir::IRCode, - @nospecialize env...; - isva::Bool=false, - do_compile::Bool=true, -) - return MistyClosure( - optimized_opaque_closure(ret_type, ir, env...; isva, do_compile), Ref(ir) - ) +@static if VERSION > v"1.12-" + """ + set_valid_world!(ir::IRCode, world::UInt)::IRCode + + (1.12+ only) + Create a shallow copy of the given IR code, with its `valid_worlds` field updated + to a single valid world. This allows the compiler to perform more inlining. + + In particular, if the IR comes from say a function `f` which makes a call to another + function `g` which only got defined after `f`, then at the min_world when `f` was + defined, `g` was not available yet. If we restrict the IR to a world where `g` is + available then `g` can be inlined. + + Will error if `world` is not in the existing `valid_worlds` of `ir`. + """ + function set_valid_world!(ir::IRCode, world::UInt) + if world ∉ ir.valid_worlds + error("World $world is not valid for this IRCode: $(ir.valid_worlds).") + end + return CC.IRCode( + ir.stmts, + ir.cfg, + ir.debuginfo, + ir.argtypes, + ir.meta, + ir.sptypes, + CC.WorldRange(world, world), + ) + end end From fe948c61948af1146e9a4343e6e4af22b5ce8320 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Wed, 5 Nov 2025 13:11:02 +0000 Subject: [PATCH 2/6] Add a comment --- src/copyable_task.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/copyable_task.jl b/src/copyable_task.jl index 26768f38..6b22354d 100644 --- a/src/copyable_task.jl +++ b/src/copyable_task.jl @@ -90,6 +90,9 @@ function build_callable(sig::Type{<:Tuple}) bb, refs, types = derive_copyable_task_ir(BBCode(ir)) unoptimised_ir = IRCode(bb) @static if VERSION > v"1.12-" + # This is a performance optimisation, copied over from Mooncake, where setting + # the valid world age to be very strictly just the current age allows the + # compiler to do more inlining and other optimisation. unoptimised_ir = set_valid_world!(unoptimised_ir, world_age) end optimised_ir = optimise_ir!(unoptimised_ir) From 36b5ebe222a1363b83f90d5670c4f4bcfa2b3936 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Wed, 5 Nov 2025 13:12:07 +0000 Subject: [PATCH 3/6] Give more type information to deref_phi. --- src/copyable_task.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/copyable_task.jl b/src/copyable_task.jl index 6b22354d..46e6f096 100644 --- a/src/copyable_task.jl +++ b/src/copyable_task.jl @@ -824,9 +824,10 @@ function derive_copyable_task_ir(ir::BBCode)::Tuple{BBCode,Tuple,Vector{Any}} deref_ids = map(phi_inds) do n id = bb.inst_ids[n] phi_id = phi_ids[n] + ref_ind = ssa_id_to_ref_index_map[id] push!( inst_pairs, - (id, new_inst(Expr(:call, deref_phi, refs_id, phi_id))), + (id, new_inst(Expr(:call, deref_phi, refs_id, phi_id, ref_index_to_type_map[ref_ind]))), ) return id end @@ -1206,8 +1207,11 @@ end @inline resume_block_is(refs::R, id::Int32) where {R<:Tuple} = !(refs[end][] === id) # Helper used in `derive_copyable_task_ir`. -@inline deref_phi(refs::R, n::TupleRef) where {R<:Tuple} = refs[n.n][] -@inline deref_phi(::R, x) where {R<:Tuple} = x +@inline function deref_phi(refs::R, n::TupleRef, ::Type{T}) where {R<:Tuple,T} + ref = refs[n.n]::Base.RefValue{T} + return ref[] +end +@inline deref_phi(::R, x, t::Type) where {R<:Tuple} = x # Helper used in `derived_copyable_task_ir`. @inline not_a_produced(x) = !(isa(x, ProducedValue)) From f078e74018a942d0f192e20e628b22261210c2db Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Wed, 5 Nov 2025 13:12:29 +0000 Subject: [PATCH 4/6] Code formatting --- src/copyable_task.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/copyable_task.jl b/src/copyable_task.jl index 46e6f096..1c8c6776 100644 --- a/src/copyable_task.jl +++ b/src/copyable_task.jl @@ -827,7 +827,18 @@ function derive_copyable_task_ir(ir::BBCode)::Tuple{BBCode,Tuple,Vector{Any}} ref_ind = ssa_id_to_ref_index_map[id] push!( inst_pairs, - (id, new_inst(Expr(:call, deref_phi, refs_id, phi_id, ref_index_to_type_map[ref_ind]))), + ( + id, + new_inst( + Expr( + :call, + deref_phi, + refs_id, + phi_id, + ref_index_to_type_map[ref_ind], + ), + ), + ), ) return id end From 86046e46c483e630668932ef25360e3aaaf3a211 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Wed, 5 Nov 2025 13:15:39 +0000 Subject: [PATCH 5/6] Add a comment --- src/copyable_task.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/copyable_task.jl b/src/copyable_task.jl index 1c8c6776..8a3340b4 100644 --- a/src/copyable_task.jl +++ b/src/copyable_task.jl @@ -827,6 +827,12 @@ function derive_copyable_task_ir(ir::BBCode)::Tuple{BBCode,Tuple,Vector{Any}} ref_ind = ssa_id_to_ref_index_map[id] push!( inst_pairs, + # The last argument, ref_index_to_type_map[ref_ind], is a + # performance optimisation. The idea is that we know the inferred + # type of the PhiNode from the original IR, and by passing it to + # deref_phi we can type annotate the element type of the Ref + # that it's being dereferenced, resulting in more concrete types + # in the generated IR. ( id, new_inst( From 3953b52b6c0c5ecff179684b7fe607337371dbcf Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Wed, 5 Nov 2025 13:20:58 +0000 Subject: [PATCH 6/6] Bump patch version to 0.9.8 --- HISTORY.md | 4 ++++ Project.toml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index acfd8114..ae26b89d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,7 @@ +# 0.9.8 + +Remove manual opaque closure optimisation functions in favour of setting the world age and letting the compiler do more work for us, and providing it with some more type information. This changes no functionality, and shouldn't change performance either, but simplifies code. + # 0.9.7 Fix a concurrency bug, where Libtask would sometimes crash with a "Multiple concurrent writes to Dict detected!" error when TapedTasks were being executed concurrently. diff --git a/Project.toml b/Project.toml index 54cc27d7..46236bde 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,7 @@ uuid = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" license = "MIT" desc = "Tape based task copying in Turing" repo = "https://github.com/TuringLang/Libtask.jl.git" -version = "0.9.7" +version = "0.9.8" [deps] MistyClosures = "dbe65cb8-6be2-42dd-bbc5-4196aaced4f4"