Skip to content
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ version = "0.2.6"
CpuId = "adafc99b-e345-5852-983c-f28acb93d879"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"

[compat]
CpuId = "0.3"
IfElse = "0.1"
Static = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1"
Preferences = "1"
PrecompileTools = "1.1"
Static = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1"
julia = "1.6"

[extras]
Expand Down
26 changes: 4 additions & 22 deletions src/CPUSummary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ end
using Static
using Static: Zero, One, gt, lt
using IfElse: ifelse
using Preferences
export cache_size,
cache_linesize, cache_associativity, cache_type, cache_inclusive, num_cache, num_cores

Expand Down Expand Up @@ -47,28 +48,12 @@ function get_cpu_threads()::Int
return Int(ccall(:jl_cpu_threads, Int32, ()))::Int
end
end
if (Sys.ARCH === :x86_64)

@static if (Sys.ARCH === :x86_64)
include("x86.jl")
else
include("generic_topology.jl")
end
function __init__()
ccall(:jl_generating_output, Cint, ()) == 1 && return
nc = _get_num_cores()
syst = Sys.CPU_THREADS::Int
if nc != num_l1cache()
@eval num_l1cache() = static($nc)
end
if nc != num_cores()
@eval num_cores() = static($nc)
end
if syst != sys_threads()
@eval sys_threads() = static($syst)
end
_extra_init()
return nothing
end


# end
num_cache(::Union{Val{1},StaticInt{1}}) = num_l1cache()
Expand All @@ -95,9 +80,6 @@ function num_cache_levels()
)
end

# explicit precompilation only on Julia v1.9 and newer
if VERSION >= v"1.9"
include("precompile.jl")
end
include("precompile.jl")

end
51 changes: 17 additions & 34 deletions src/x86.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,17 @@ num_sockets() = static(1)

_get_num_cores()::Int = clamp(CpuId.cpucores(), 1, (get_cpu_threads())::Int)

let nc = static(_get_num_cores())
global num_l1cache() = nc
global num_cores() = nc
end
let syst = static((get_cpu_threads())::Int)
global sys_threads() = syst
end
const nc = @load_preference("nc", _get_num_cores())
const syst = @load_preference("syst", get_cpu_threads())

num_l1cache() = static(nc)
num_cores() = static(nc)
sys_threads() = static(syst)
num_l2cache() = num_l1cache()
num_l3cache() = static(1)
num_l4cache() = static(0)

const PrecompiledCacheSize = let cs = CpuId.cachesize()
ntuple(i -> i == 3 ? cs[3] ÷ _get_num_cores() : cs[i], length(cs))
end
const PrecompiledCacheInclusive = CpuId.cacheinclusive()
cache_inclusive(_) = False()
@noinline function _eval_cache_size(cachesize)
for (i, csi) in enumerate(cachesize)
@eval cache_size(::Union{Val{$i},StaticInt{$i}}) = $(static(csi))
end
end
@noinline function _eval_cache_inclusive(cacheinclusive)
for (i, cii) in enumerate(cacheinclusive)
@eval cache_inclusive(::Union{Val{$i},StaticInt{$i}}) = $(static(cii != 0))
end
end
_eval_cache_size(PrecompiledCacheSize)
_eval_cache_inclusive(PrecompiledCacheInclusive)

# TODO: implement
cache_associativity(_) = static(0)

Expand All @@ -45,15 +28,15 @@ let lnsize = static(CpuId.cachelinesize())
end
cache_size(_) = StaticInt{0}()

# cache_size(::Union{Val{3},StaticInt{3}}) = num_cores() * StaticInt{1441792}()
function _extra_init()
cs = let cs = CpuId.cachesize()
const cs = @load_preference("cs",
let cs = CpuId.cachesize()
ntuple(i -> i == 3 ? cs[3] ÷ _get_num_cores() : cs[i], length(cs))
end
cs !== PrecompiledCacheSize && _eval_cache_size(cs)
ci = CpuId.cacheinclusive()
ci !== PrecompiledCacheInclusive && _eval_cache_inclusive(ci)
return nothing
end

end)
const ci = @load_preference("ci", CpuId.cacheinclusive())

for (i, csi) in enumerate(cs)
@eval cache_size(::Union{Val{$i},StaticInt{$i}}) = $(static(csi))
end
for (i, cii) in enumerate(ci)
@eval cache_inclusive(::Union{Val{$i},StaticInt{$i}}) = $(static(cii != 0))
end