- 
                Notifications
    
You must be signed in to change notification settings  - Fork 10
 
Description
Running the following MWE on Julia 1.11 and 1.12 with 4 threads:
using Libtask
function Base.hash(ck::Libtask.CacheKey, h::UInt)
    Threads.sleep(0.1)
    Base.hash_uint(3h - Base.objectid(ck))
end
# This forces every invocation with a different value of `i` to compile a new method,
# ensuring that Libtask has to construct a new MistyClosure and add it to the cache
function f(::Val{i}) where {i}
    produce(i)
    return nothing
end
fetch.([Threads.@spawn TapedTask(nothing, f, Val{i}()) for i in 1:20])errors with:
ERROR: LoadError: TaskFailedException
Stacktrace:
  [1] #wait#582
    @ ./task.jl:363 [inlined]
  [2] wait
    @ ./task.jl:360 [inlined]
  [3] fetch
    @ ./task.jl:525 [inlined]
  [4] _broadcast_getindex_evalf
    @ ./broadcast.jl:699 [inlined]
  [5] _broadcast_getindex
    @ ./broadcast.jl:672 [inlined]
  [6] _getindex
    @ ./broadcast.jl:620 [inlined]
  [7] getindex
    @ ./broadcast.jl:616 [inlined]
  [8] copyto_nonleaf!(dest::Vector{TapedTask{Nothing}}, bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(fetch), Tuple{Base.Broadcast.Extruded{Vector{Task}, Tuple{Bool}, Tuple{Int64}}}}, iter::Base.OneTo{Int64}, state::Int64, count::Int64)
    @ Base.Broadcast ./broadcast.jl:1104
  [9] restart_copyto_nonleaf!(newdest::Vector{TapedTask{Nothing}}, dest::Vector{TapedTask{Nothing, Tuple{typeof(f), Val{1}}, MistyClosures.MistyClosure{Core.OpaqueClosure{Tuple{typeof(f), Val{1}}, Union{Nothing, Libtask.ProducedValue{Int64}}}}}}, bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(fetch), Tuple{Base.Broadcast.Extruded{Vector{Task}, Tuple{Bool}, Tuple{Int64}}}}, val::TapedTask{Nothing, Tuple{typeof(f), Val{2}}, MistyClosures.MistyClosure{Core.OpaqueClosure{Tuple{typeof(f), Val{2}}, Union{Nothing, Libtask.ProducedValue{Int64}}}}}, I::Int64, iter::Base.OneTo{Int64}, state::Int64, count::Int64)
    @ Base.Broadcast ./broadcast.jl:1095
 [10] copyto_nonleaf!(dest::Vector{TapedTask{Nothing, Tuple{typeof(f), Val{1}}, MistyClosures.MistyClosure{Core.OpaqueClosure{Tuple{typeof(f), Val{1}}, Union{Nothing, Libtask.ProducedValue{Int64}}}}}}, bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(fetch), Tuple{Base.Broadcast.Extruded{Vector{Task}, Tuple{Bool}, Tuple{Int64}}}}, iter::Base.OneTo{Int64}, state::Int64, count::Int64)
    @ Base.Broadcast ./broadcast.jl:1111
 [11] copy
    @ ./broadcast.jl:941 [inlined]
 [12] materialize(bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(fetch), Tuple{Vector{Task}}})
    @ Base.Broadcast ./broadcast.jl:894
 [13] top-level scope
    @ ~/ppl/scrap.jl:79
 [14] include(mod::Module, _path::String)
    @ Base ./Base.jl:306
 [15] exec_options(opts::Base.JLOptions)
    @ Base ./client.jl:317
 [16] _start()
    @ Base ./client.jl:550
    nested task error: AssertionError: Multiple concurrent writes to Dict detected!
    Stacktrace:
     [1] rehash!(h::Dict{Libtask.CacheKey, MistyClosures.MistyClosure}, newsz::Int64)
       @ Base ./dict.jl:182
     [2] ht_keyindex2_shorthash!(h::Dict{Libtask.CacheKey, MistyClosures.MistyClosure}, key::Libtask.CacheKey)
       @ Base ./dict.jl:314
     [3] setindex!(h::Dict{Libtask.CacheKey, MistyClosures.MistyClosure}, v0::MistyClosures.MistyClosure{Core.OpaqueClosure{Tuple{typeof(f), Val{5}}, Union{Nothing, Libtask.ProducedValue{Int64}}}}, key::Libtask.CacheKey)
       @ Base ./dict.jl:356
     [4] build_callable(sig::Type{Tuple{typeof(f), Val{5}}})
       @ Libtask ~/ppl/Libtask.jl/src/copyable_task.jl:94
     [5] TapedTask(::Nothing, ::Function, ::Vararg{Any}; kwargs::@Kwargs{})
       @ Libtask ~/ppl/Libtask.jl/src/copyable_task.jl:282
     [6] TapedTask(::Nothing, ::Function, ::Vararg{Any})
       @ Libtask ~/ppl/Libtask.jl/src/copyable_task.jl:279
     [7] (::var"#5#6"{Int64})()
       @ Main ~/ppl/scrap.jl:79This might not always be reproducible, if you are struggling to get it to fail, try to increase the length of the sleep and/or the number of threads and/or the number of tasks spawned.
Concurrent writes to Dicts are 'detected' here: see https://github.com/JuliaLang/julia/blob/ba1e628ee49351af0b704afd2b2903d253bd3564/base/dict.jl#L160-L182. But I think if the code between lines 161 and 181 (inclusive) run fast enough, then it won't be caught.
My suspicion is that it can be slow for very complicated types. In practice, we've seen it error with Turing / PG models, but I couldn't get it to error with boring, simple, types defined in Base. That's why for this MWE I had to patch Base.hash to make it artificially slow.
The definition of Base.hash in the MWE is taken from lines 28 and 37 of https://github.com/JuliaLang/julia/blob/ba1e628ee49351af0b704afd2b2903d253bd3564/base/hashing.jl#L28-L37, it's exactly the same as what happens 'ordinarily' except for the sleep.