diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 5c2cf2f..b74ff2c 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1 +1,7 @@ -indent = 2 \ No newline at end of file +indent = 2 +margin = 80 +remove_extra_newlines = true +long_to_short_function_def = true +format_docstrings = true +trailing_comma = false +separate_kwargs_with_semicolon = true diff --git a/Project.toml b/Project.toml index 21f34e6..ee9d4e4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,20 +1,18 @@ name = "PolyesterWeave" uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" authors = ["Chris Elrod and contributors"] -version = "0.2.1" +version = "0.3.0" [deps] BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b" CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" -Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5" [compat] BitTwiddlingConvenienceFunctions = "0.1" CPUSummary = "0.1.2, 0.2" IfElse = "0.1" -Static = "0.3.1, 0.4, 0.5, 0.6, 0.7, 0.8" ThreadingUtilities = "0.4.5, 0.5" julia = "1.5" diff --git a/src/PolyesterWeave.jl b/src/PolyesterWeave.jl index 88c388b..9002905 100644 --- a/src/PolyesterWeave.jl +++ b/src/PolyesterWeave.jl @@ -1,31 +1,34 @@ module PolyesterWeave -if isdefined(Base, :Experimental) && - isdefined(Base.Experimental, Symbol("@max_methods")) - @eval Base.Experimental.@max_methods 1 +if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_methods")) + @eval Base.Experimental.@max_methods 1 end using BitTwiddlingConvenienceFunctions: nextpow2 using ThreadingUtilities: _atomic_store!, _atomic_or!, _atomic_xchg! -using Static using IfElse: ifelse export request_threads, free_threads! @static if VERSION ≥ v"1.6.0-DEV.674" @inline function assume(b::Bool) - Base.llvmcall(( - """ - declare void @llvm.assume(i1) + Base.llvmcall( + ( + """ + declare void @llvm.assume(i1) - define void @entry(i8 %byte) alwaysinline { - top: - %bit = trunc i8 %byte to i1 - call void @llvm.assume(i1 %bit) - ret void - } -""", - "entry", - ), Cvoid, Tuple{Bool}, b) + define void @entry(i8 %byte) alwaysinline { + top: + %bit = trunc i8 %byte to i1 + call void @llvm.assume(i1 %bit) + ret void + } + """, + "entry", + ), + Cvoid, + Tuple{Bool}, + b, + ) end else @inline assume(b::Bool) = Base.llvmcall( diff --git a/src/request.jl b/src/request.jl index 3165231..48994f6 100644 --- a/src/request.jl +++ b/src/request.jl @@ -1,13 +1,14 @@ import CPUSummary function worker_bits() - wts = nextpow2(CPUSummary.sys_threads()) # Typically sys_threads (i.e. Sys.CPU_THREADS) does not change between runs, thus it will precompile well. - ws = static(8sizeof(UInt)) # For testing purposes it can be overridden by JULIA_CPU_THREADS, - ifelse(Static.lt(wts, ws), ws, wts) + wts = Int(nextpow2(CPUSummary.sys_threads())) + ws = 8sizeof(UInt) + # For testing purposes it can be overridden by JULIA_CPU_THREADS, + Core.ifelse(wts < ws, ws, wts) end function worker_mask_count() bits = worker_bits() - (bits + StaticInt{63}()) ÷ StaticInt{64}() # cld not defined on `StaticInt` + (bits + 63) >>> 6 end worker_pointer() = Base.unsafe_convert(Ptr{UInt}, pointer_from_objref(WORKERS)) @@ -20,7 +21,9 @@ function free_threads!(freed_threads_tuple::NTuple{1,U}) where {U<:Unsigned} _atomic_or!(worker_pointer(), freed_threads_tuple[1]) nothing end -function free_threads!(freed_threads_tuple::Tuple{U,Vararg{U,N}}) where {N,U<:Unsigned} +function free_threads!( + freed_threads_tuple::Tuple{U,Vararg{U,N}} +) where {N,U<:Unsigned} wp = worker_pointer() for freed_threads in freed_threads_tuple _atomic_or!(wp, freed_threads) @@ -37,25 +40,23 @@ end @inline function _request_threads( num_requested::UInt32, wp::Ptr, - ::StaticInt{N}, - threadmask, + ::Val{N}, + threadmask ) where {N} - ui, ft, num_requested, wp = __request_threads(num_requested, wp, _first(threadmask)) - uit, ftt = _request_threads( - num_requested, - wp, - StaticInt{N}() - StaticInt{1}(), - _remaining(threadmask), - ) + ui, ft, num_requested, wp = + __request_threads(num_requested, wp, _first(threadmask)) + uit, ftt = + _request_threads(num_requested, wp, Val{N - 1}(), _remaining(threadmask)) (ui, uit...), (ft, ftt...) end @inline function _request_threads( num_requested::UInt32, wp::Ptr, - ::StaticInt{1}, - threadmask, + ::Val{1}, + threadmask ) - ui, ft, num_requested, wp = __request_threads(num_requested, wp, _first(threadmask)) + ui, ft, num_requested, wp = + __request_threads(num_requested, wp, _first(threadmask)) (ui,), (ft,) end @@ -72,18 +73,24 @@ end end @inline function __request_threads(num_requested::UInt32, wp::Ptr, threadmask) no_threads = zero(UInt) - if (num_requested ≢ StaticInt{-1}()) && (num_requested % Int32 ≤ zero(Int32)) - return UnsignedIteratorEarlyStop(zero(UInt), 0x00000000), no_threads, 0x00000000, wp + if (num_requested != typemax(num_requested)) && + (num_requested % Int32 ≤ zero(Int32)) + return UnsignedIteratorEarlyStop(zero(UInt), 0x00000000), + no_threads, + 0x00000000, + wp end # to get more, we xchng, setting all to `0` # then see which we need, and free those we aren't using. - wpret = wp + 8 # (UInt === UInt64) | (worker_mask_count() === StaticInt(1)) #, so adding 8 is fine. + wpret = wp + 8 # _all_threads = all_threads = _apply_mask(_atomic_xchg!(wp, no_threads), threadmask) _all_threads, all_threads = _exchange_mask!(wp, threadmask) additional_threads = count_ones(all_threads) % UInt32 - # num_requested === StaticInt{-1}() && return reserved_threads, all_threads - if num_requested === StaticInt{-1}() - return UnsignedIteratorEarlyStop(all_threads), all_threads, num_requested, wpret + if num_requested === typemax(num_requested) + return UnsignedIteratorEarlyStop(all_threads), + all_threads, + num_requested, + wpret end nexcess = num_requested - additional_threads if signed(nexcess) ≥ 0 @@ -111,8 +118,8 @@ end _request_threads( num_requested % UInt32, worker_pointer(), - worker_mask_count(), - threadmask, + Val(worker_mask_count()), + threadmask ) end @inline request_threads(num_requested) = request_threads(num_requested, nothing)