Skip to content

Commit c2fbadb

Browse files
committed
tests -> testitems; up ThreadPinning compat; minor refactoring of exports
1 parent 7af7e0c commit c2fbadb

File tree

8 files changed

+291
-293
lines changed

8 files changed

+291
-293
lines changed

Project.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ Glob = "1.3"
2828
HDF5 = "0.16"
2929
Reexport = "1.2"
3030
Requires = "1.3"
31-
ThreadPinning = "0.3, 0.4"
31+
ThreadPinning = "0.3, 0.4, 0.5, 0.6"
3232
UnicodePlots = "2.8"
3333
julia = "1.6"
3434

3535
[extras]
3636
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
3737
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
3838
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
39+
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
3940

4041
[targets]
41-
test = ["Test", "InteractiveUtils", "CairoMakie"]
42+
test = ["Test", "InteractiveUtils", "CairoMakie", "TestItemRunner"]

src/GPUInspector.jl

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,39 @@ using Glob: glob
2222

2323
# export BFloat16 for convenience
2424
const BFloat16 = CUDA.BFloat16
25-
export BFloat16
2625

2726
include("UnitPrefixedBytes.jl")
28-
export UnitPrefixedBytes, B, KB, MB, GB, TB, KiB, MiB, GiB, TiB
29-
export bytes, simplify, change_base, value
3027
include("cuda_wrappers.jl")
31-
export get_temperatures, get_power_usages, get_gpu_utilizations
3228
include("utility.jl")
3329
include("utility_unroll.jl")
30+
include("monitoring.jl")
31+
include("workers.jl")
32+
include("gpuinfo.jl")
33+
include("p2p_bandwidth.jl")
34+
include("host2device_bandwidth.jl")
35+
include("stresstest_tests.jl")
36+
include("stresstest.jl")
37+
include("stresstest_cpu.jl")
38+
include("peakflops_gpu.jl")
39+
include("peakflops_gpu_matmul.jl")
40+
include("peakflops_gpu_fmas.jl")
41+
include("peakflops_gpu_wmmas.jl")
42+
include("memory_bandwidth.jl")
43+
include("memory_bandwidth_saxpy.jl")
44+
include("hdf5.jl")
45+
46+
function __init__()
47+
@require CairoMakie="13f3f980-e62b-5c42-98c6-ff1f3baf88f0" include("requires/cairomakie.jl")
48+
49+
if CUDA.functional()
50+
toggle_tensorcoremath(true; verbose=false) # by default, use CUDA.FAST_MATH
51+
end
52+
end
53+
54+
export BFloat16
55+
export UnitPrefixedBytes, B, KB, MB, GB, TB, KiB, MiB, GiB, TiB
56+
export bytes, simplify, change_base, value
57+
export get_temperatures, get_power_usages, get_gpu_utilizations
3458
export clear_gpu_memory,
3559
clear_all_gpus_memory,
3660
cublasGemmEx_wrapper!,
@@ -39,56 +63,31 @@ export clear_gpu_memory,
3963
hastensorcores, MultiLogger, multi_log
4064
export get_cpusocket_temperatures, get_cpu_utilizations, get_cpu_utilization
4165
export logspace
42-
include("monitoring.jl")
4366
export MonitoringResults,
4467
monitoring_start,
4568
monitoring_stop,
4669
plot_monitoring_results,
4770
savefig_monitoring_results,
4871
livemonitor_temperature,
4972
livemonitor_powerusage
50-
include("workers.jl")
5173
export @worker, @worker_create, @worker_killall
52-
53-
include("gpuinfo.jl")
5474
export gpuinfo, gpuinfo_p2p_access, gpus
55-
include("p2p_bandwidth.jl")
5675
export p2p_bandwidth,
5776
p2p_bandwidth_all, p2p_bandwidth_bidirectional, p2p_bandwidth_bidirectional_all
58-
include("host2device_bandwidth.jl")
5977
export host2device_bandwidth
60-
include("stresstest_tests.jl")
61-
include("stresstest.jl")
62-
include("stresstest_cpu.jl")
6378
export stresstest, stresstest_cpu
64-
include("peakflops_gpu.jl")
65-
include("peakflops_gpu_matmul.jl")
66-
include("peakflops_gpu_fmas.jl")
67-
include("peakflops_gpu_wmmas.jl")
6879
export peakflops_gpu,
6980
peakflops_gpu_fmas,
7081
peakflops_gpu_wmmas,
7182
peakflops_gpu_matmul,
7283
peakflops_gpu_matmul_graphs,
7384
peakflops_gpu_matmul_scaling
7485
export theoretical_peakflops_gpu, theoretical_peakflops_gpu_tensorcores
75-
include("memory_bandwidth.jl")
76-
include("memory_bandwidth_saxpy.jl")
7786
export memory_bandwidth,
7887
memory_bandwidth_saxpy,
7988
memory_bandwidth_scaling,
8089
memory_bandwidth_saxpy_scaling,
8190
theoretical_memory_bandwidth
82-
83-
include("hdf5.jl")
8491
export save_monitoring_results, load_monitoring_results
8592

86-
function __init__()
87-
@require CairoMakie="13f3f980-e62b-5c42-98c6-ff1f3baf88f0" include("requires/cairomakie.jl")
88-
89-
if CUDA.functional()
90-
toggle_tensorcoremath(true; verbose=false) # by default, use CUDA.FAST_MATH
91-
end
92-
end
93-
9493
end

test/bandwidth_tests.jl

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
@testitem "p2p_bandwidth" begin
2+
using LinearAlgebra
3+
4+
@testset "unidirectional" begin
5+
# p2p_bandwidth
6+
@test typeof(p2p_bandwidth(; verbose=false)) == Float64
7+
@test 0 p2p_bandwidth(; verbose=false)
8+
# options
9+
@test typeof(p2p_bandwidth(MB(100); verbose=false)) == Float64
10+
@test typeof(
11+
p2p_bandwidth(; src=CuDevice(0), dst=CuDevice(1), verbose=false)
12+
) == Float64
13+
@test typeof(p2p_bandwidth(; dtype=Float16, verbose=false)) == Float64
14+
@test typeof(p2p_bandwidth(; nbench=10, verbose=false)) == Float64
15+
@test typeof(p2p_bandwidth(; hist=true, verbose=true)) == Float64
16+
# p2p_bandwidth_all
17+
@test typeof(p2p_bandwidth_all(; verbose=false)) ==
18+
Matrix{Union{Nothing,Float64}}
19+
Mp2p = p2p_bandwidth_all(; verbose=false)
20+
@test all(isnothing, diag(Mp2p))
21+
@test all(
22+
!isnothing(Mp2p[i, j]) for i in axes(Mp2p, 1), j in axes(Mp2p, 2) if i != j
23+
)
24+
end
25+
@testset "bidirectional" begin
26+
# p2p_bandwidth_bidirectional
27+
@test typeof(p2p_bandwidth_bidirectional(; verbose=false)) == Float64
28+
@test 0 p2p_bandwidth_bidirectional(; verbose=false)
29+
# options
30+
@test typeof(p2p_bandwidth_bidirectional(MB(100); verbose=false)) == Float64
31+
@test typeof(p2p_bandwidth_bidirectional(; dtype=Float16, verbose=false)) ==
32+
Float64
33+
@test typeof(p2p_bandwidth_bidirectional(; nbench=10, verbose=false)) ==
34+
Float64
35+
@test typeof(p2p_bandwidth_bidirectional(; hist=true, verbose=true)) ==
36+
Float64
37+
# p2p_bandwidth_bidirectional_all
38+
@test typeof(p2p_bandwidth_bidirectional_all(; verbose=false)) ==
39+
Matrix{Union{Nothing,Float64}}
40+
Mp2p = p2p_bandwidth_bidirectional_all(; verbose=false)
41+
@test all(isnothing, diag(Mp2p))
42+
@test all(
43+
!isnothing(Mp2p[i, j]) for i in axes(Mp2p, 1), j in axes(Mp2p, 2) if i != j
44+
)
45+
end
46+
end
47+
48+
@testitem "host2device_bandwidth" begin
49+
@test isnothing(host2device_bandwidth())
50+
@test isnothing(host2device_bandwidth(MB(100)))
51+
@test isnothing(host2device_bandwidth(; dtype=Float16))
52+
end
53+
54+
@testitem "memory_bandwidth" begin
55+
@test typeof(memory_bandwidth()) == Float64
56+
@test typeof(memory_bandwidth(MiB(10))) == Float64
57+
@test typeof(memory_bandwidth(; dtype=Float32)) == Float64
58+
59+
@test typeof(memory_bandwidth_saxpy()) == Float64
60+
@test typeof(memory_bandwidth_saxpy(; size=2^20*2)) == Float64
61+
@test typeof(memory_bandwidth_saxpy(; dtype=Float32)) == Float64
62+
end

test/gpuinfo_tests.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
@testitem "gpuinfo / gpus" begin
2+
@test isnothing(gpus())
3+
@test isnothing(gpuinfo())
4+
@test isnothing(gpuinfo(0))
5+
@test isnothing(gpuinfo(device()))
6+
end

test/peakflops_tests.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
@testset "peakflops_gpu (CUDA cores)" begin
2+
@test typeof(peakflops_gpu(; verbose=false, tensorcores=false)) == Float64
3+
@test typeof(peakflops_gpu(; dtype=Float32, verbose=false, tensorcores=false)) == Float64
4+
@test typeof(peakflops_gpu(; dtype=Float64, verbose=false, tensorcores=false)) == Float64
5+
end
6+
7+
@testset "peakflops_gpu (Tensor cores)" begin
8+
@test typeof(peakflops_gpu(; verbose=false, tensorcores=true)) == Float64
9+
@test typeof(peakflops_gpu(; dtype=Float16, verbose=false, tensorcores=true)) == Float64
10+
end
11+
12+
@testset "peakflops_gpu_matmul / scaling" begin
13+
@test typeof(peakflops_gpu_matmul(; verbose=false)) == Float64
14+
@test typeof(peakflops_gpu_matmul(; size=1024, dtype=Float64, verbose=false)) == Float64
15+
@test typeof(peakflops_gpu_matmul(; nmatmuls=2, nbench=2, verbose=false)) == Float64
16+
@test typeof(peakflops_gpu_matmul_scaling(; verbose=false)) == Tuple{Vector{Int64}, Vector{Float64}}
17+
end

0 commit comments

Comments
 (0)