Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .JuliaFormatter.toml
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
indent = 2
indent = 2
margin = 80
remove_extra_newlines = true
long_to_short_function_def = true
format_docstrings = true
trailing_comma = false
separate_kwargs_with_semicolon = true
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
name = "PolyesterWeave"
uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
authors = ["Chris Elrod <[email protected]> and contributors"]
version = "0.2.1"
version = "0.3.0"

[deps]
BitTwiddlingConvenienceFunctions = "62783981-4cbd-42fc-bca8-16325de8dc4b"
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5"

[compat]
BitTwiddlingConvenienceFunctions = "0.1"
CPUSummary = "0.1.2, 0.2"
IfElse = "0.1"
Static = "0.3.1, 0.4, 0.5, 0.6, 0.7, 0.8"
ThreadingUtilities = "0.4.5, 0.5"
julia = "1.5"

Expand Down
35 changes: 19 additions & 16 deletions src/PolyesterWeave.jl
Original file line number Diff line number Diff line change
@@ -1,31 +1,34 @@
module PolyesterWeave
if isdefined(Base, :Experimental) &&
isdefined(Base.Experimental, Symbol("@max_methods"))
@eval Base.Experimental.@max_methods 1
if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_methods"))
@eval Base.Experimental.@max_methods 1
end

using BitTwiddlingConvenienceFunctions: nextpow2
using ThreadingUtilities: _atomic_store!, _atomic_or!, _atomic_xchg!
using Static
using IfElse: ifelse

export request_threads, free_threads!

@static if VERSION ≥ v"1.6.0-DEV.674"
@inline function assume(b::Bool)
Base.llvmcall((
"""
declare void @llvm.assume(i1)
Base.llvmcall(
(
"""
declare void @llvm.assume(i1)

define void @entry(i8 %byte) alwaysinline {
top:
%bit = trunc i8 %byte to i1
call void @llvm.assume(i1 %bit)
ret void
}
""",
"entry",
), Cvoid, Tuple{Bool}, b)
define void @entry(i8 %byte) alwaysinline {
top:
%bit = trunc i8 %byte to i1
call void @llvm.assume(i1 %bit)
ret void
}
""",
"entry",
),
Cvoid,
Tuple{Bool},
b,
)
end
else
@inline assume(b::Bool) = Base.llvmcall(
Expand Down
57 changes: 32 additions & 25 deletions src/request.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import CPUSummary

function worker_bits()
wts = nextpow2(CPUSummary.sys_threads()) # Typically sys_threads (i.e. Sys.CPU_THREADS) does not change between runs, thus it will precompile well.
ws = static(8sizeof(UInt)) # For testing purposes it can be overridden by JULIA_CPU_THREADS,
ifelse(Static.lt(wts, ws), ws, wts)
wts = Int(nextpow2(CPUSummary.sys_threads()))
ws = 8sizeof(UInt)
# For testing purposes it can be overridden by JULIA_CPU_THREADS,
Core.ifelse(wts < ws, ws, wts)
end
function worker_mask_count()
bits = worker_bits()
(bits + StaticInt{63}()) ÷ StaticInt{64}() # cld not defined on `StaticInt`
(bits + 63) >>> 6
end

worker_pointer() = Base.unsafe_convert(Ptr{UInt}, pointer_from_objref(WORKERS))
Expand All @@ -20,7 +21,9 @@ function free_threads!(freed_threads_tuple::NTuple{1,U}) where {U<:Unsigned}
_atomic_or!(worker_pointer(), freed_threads_tuple[1])
nothing
end
function free_threads!(freed_threads_tuple::Tuple{U,Vararg{U,N}}) where {N,U<:Unsigned}
function free_threads!(
freed_threads_tuple::Tuple{U,Vararg{U,N}}
) where {N,U<:Unsigned}
wp = worker_pointer()
for freed_threads in freed_threads_tuple
_atomic_or!(wp, freed_threads)
Expand All @@ -37,25 +40,23 @@ end
@inline function _request_threads(
num_requested::UInt32,
wp::Ptr,
::StaticInt{N},
threadmask,
::Val{N},
threadmask
) where {N}
ui, ft, num_requested, wp = __request_threads(num_requested, wp, _first(threadmask))
uit, ftt = _request_threads(
num_requested,
wp,
StaticInt{N}() - StaticInt{1}(),
_remaining(threadmask),
)
ui, ft, num_requested, wp =
__request_threads(num_requested, wp, _first(threadmask))
uit, ftt =
_request_threads(num_requested, wp, Val{N - 1}(), _remaining(threadmask))
(ui, uit...), (ft, ftt...)
end
@inline function _request_threads(
num_requested::UInt32,
wp::Ptr,
::StaticInt{1},
threadmask,
::Val{1},
threadmask
)
ui, ft, num_requested, wp = __request_threads(num_requested, wp, _first(threadmask))
ui, ft, num_requested, wp =
__request_threads(num_requested, wp, _first(threadmask))
(ui,), (ft,)
end

Expand All @@ -72,18 +73,24 @@ end
end
@inline function __request_threads(num_requested::UInt32, wp::Ptr, threadmask)
no_threads = zero(UInt)
if (num_requested ≢ StaticInt{-1}()) && (num_requested % Int32 ≤ zero(Int32))
return UnsignedIteratorEarlyStop(zero(UInt), 0x00000000), no_threads, 0x00000000, wp
if (num_requested != typemax(num_requested)) &&
(num_requested % Int32 ≤ zero(Int32))
return UnsignedIteratorEarlyStop(zero(UInt), 0x00000000),
no_threads,
0x00000000,
wp
end
# to get more, we xchng, setting all to `0`
# then see which we need, and free those we aren't using.
wpret = wp + 8 # (UInt === UInt64) | (worker_mask_count() === StaticInt(1)) #, so adding 8 is fine.
wpret = wp + 8
# _all_threads = all_threads = _apply_mask(_atomic_xchg!(wp, no_threads), threadmask)
_all_threads, all_threads = _exchange_mask!(wp, threadmask)
additional_threads = count_ones(all_threads) % UInt32
# num_requested === StaticInt{-1}() && return reserved_threads, all_threads
if num_requested === StaticInt{-1}()
return UnsignedIteratorEarlyStop(all_threads), all_threads, num_requested, wpret
if num_requested === typemax(num_requested)
return UnsignedIteratorEarlyStop(all_threads),
all_threads,
num_requested,
wpret
end
nexcess = num_requested - additional_threads
if signed(nexcess) ≥ 0
Expand Down Expand Up @@ -111,8 +118,8 @@ end
_request_threads(
num_requested % UInt32,
worker_pointer(),
worker_mask_count(),
threadmask,
Val(worker_mask_count()),
threadmask
)
end
@inline request_threads(num_requested) = request_threads(num_requested, nothing)
Expand Down