diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aab695c..82eadd4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -4,25 +4,24 @@ on: - pull_request jobs: test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: version: - - '1.6' + - 'lts' - '1' - - 'nightly' + - 'pre' os: - ubuntu-latest - arch: - - x64 + - macos-latest + - windows-latest steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - uses: actions/cache@v4 env: cache-name: cache-artifacts diff --git a/Project.toml b/Project.toml index 808c511..636f328 100644 --- a/Project.toml +++ b/Project.toml @@ -7,13 +7,15 @@ version = "0.2.6" CpuId = "adafc99b-e345-5852-983c-f28acb93d879" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" [compat] CpuId = "0.3" IfElse = "0.1" -Static = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1" +Preferences = "1" PrecompileTools = "1.1" +Static = "1" julia = "1.6" [extras] diff --git a/src/CPUSummary.jl b/src/CPUSummary.jl index 4101c01..73b7063 100644 --- a/src/CPUSummary.jl +++ b/src/CPUSummary.jl @@ -7,6 +7,7 @@ end using Static using Static: Zero, One, gt, lt using IfElse: ifelse +using Preferences export cache_size, cache_linesize, cache_associativity, cache_type, cache_inclusive, num_cache, num_cores @@ -47,28 +48,12 @@ function get_cpu_threads()::Int return Int(ccall(:jl_cpu_threads, Int32, ()))::Int end end -if (Sys.ARCH === :x86_64) + +@static if (Sys.ARCH === :x86_64) include("x86.jl") else include("generic_topology.jl") end -function __init__() - ccall(:jl_generating_output, Cint, ()) == 1 && return - nc = _get_num_cores() - syst = Sys.CPU_THREADS::Int - if nc != num_l1cache() - @eval num_l1cache() = static($nc) - end - if nc != num_cores() - @eval num_cores() = static($nc) - end - if syst != sys_threads() - @eval sys_threads() = static($syst) - end - _extra_init() - return nothing -end - # end num_cache(::Union{Val{1},StaticInt{1}}) = num_l1cache() @@ -95,9 +80,6 @@ function num_cache_levels() ) end -# explicit precompilation only on Julia v1.9 and newer -if VERSION >= v"1.9" - include("precompile.jl") -end +include("precompile.jl") end diff --git a/src/generic_topology.jl b/src/generic_topology.jl index be93684..44af90c 100644 --- a/src/generic_topology.jl +++ b/src/generic_topology.jl @@ -2,13 +2,14 @@ num_machines() = static(1) num_sockets() = static(1) -_get_num_cores() = (get_cpu_threads())::Int >> (Sys.ARCH !== :aarch64) +const syst = @load_preference("syst", get_cpu_threads()) +const nc = @load_preference("nc", syst >> (Sys.ARCH !== :aarch64)) + +_get_num_cores() = nc +num_l1cache() = static(nc) +num_cores() = static(nc) +sys_threads() = static(syst) -let syst = static(get_cpu_threads()), nc = static(syst >> (Sys.ARCH !== :aarch64)) - global num_l1cache() = nc - global num_cores() = nc - global sys_threads() = syst -end @static if Sys.ARCH === :aarch64 num_l2cache() = static(1) num_l3cache() = static(0) diff --git a/src/precompile.jl b/src/precompile.jl index c0d738b..87325f0 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -1,9 +1,5 @@ using PrecompileTools: @compile_workload @compile_workload begin - __init__() - # `_extra_init()` is called by `__init__()` - # However, it does not seem to be recognized correctly since we can - # further reduce the time of `using CPUSummary` significantly by - # precompiling it here in addition ot `__init__()`. + end diff --git a/src/x86.jl b/src/x86.jl index d87e621..8d6bb3c 100644 --- a/src/x86.jl +++ b/src/x86.jl @@ -5,34 +5,17 @@ num_sockets() = static(1) _get_num_cores()::Int = clamp(CpuId.cpucores(), 1, (get_cpu_threads())::Int) -let nc = static(_get_num_cores()) - global num_l1cache() = nc - global num_cores() = nc -end -let syst = static((get_cpu_threads())::Int) - global sys_threads() = syst -end +const nc = @load_preference("nc", _get_num_cores()) +const syst = @load_preference("syst", get_cpu_threads()) + +num_l1cache() = static(nc) +num_cores() = static(nc) +sys_threads() = static(syst) num_l2cache() = num_l1cache() num_l3cache() = static(1) num_l4cache() = static(0) - -const PrecompiledCacheSize = let cs = CpuId.cachesize() - ntuple(i -> i == 3 ? cs[3] ÷ _get_num_cores() : cs[i], length(cs)) -end -const PrecompiledCacheInclusive = CpuId.cacheinclusive() cache_inclusive(_) = False() -@noinline function _eval_cache_size(cachesize) - for (i, csi) in enumerate(cachesize) - @eval cache_size(::Union{Val{$i},StaticInt{$i}}) = $(static(csi)) - end -end -@noinline function _eval_cache_inclusive(cacheinclusive) - for (i, cii) in enumerate(cacheinclusive) - @eval cache_inclusive(::Union{Val{$i},StaticInt{$i}}) = $(static(cii != 0)) - end -end -_eval_cache_size(PrecompiledCacheSize) -_eval_cache_inclusive(PrecompiledCacheInclusive) + # TODO: implement cache_associativity(_) = static(0) @@ -45,15 +28,15 @@ let lnsize = static(CpuId.cachelinesize()) end cache_size(_) = StaticInt{0}() -# cache_size(::Union{Val{3},StaticInt{3}}) = num_cores() * StaticInt{1441792}() -function _extra_init() - cs = let cs = CpuId.cachesize() +const cs = @load_preference("cs", + let cs = CpuId.cachesize() ntuple(i -> i == 3 ? cs[3] ÷ _get_num_cores() : cs[i], length(cs)) - end - cs !== PrecompiledCacheSize && _eval_cache_size(cs) - ci = CpuId.cacheinclusive() - ci !== PrecompiledCacheInclusive && _eval_cache_inclusive(ci) - return nothing -end - + end) +const ci = @load_preference("ci", CpuId.cacheinclusive()) +for (i, csi) in enumerate(cs) + @eval cache_size(::Union{Val{$i},StaticInt{$i}}) = $(static(csi)) +end +for (i, cii) in enumerate(ci) + @eval cache_inclusive(::Union{Val{$i},StaticInt{$i}}) = $(static(cii != 0)) +end