@@ -22,15 +22,39 @@ using Glob: glob
2222
2323# export BFloat16 for convenience
2424const BFloat16 = CUDA. BFloat16
25- export BFloat16
2625
2726include (" UnitPrefixedBytes.jl" )
28- export UnitPrefixedBytes, B, KB, MB, GB, TB, KiB, MiB, GiB, TiB
29- export bytes, simplify, change_base, value
3027include (" cuda_wrappers.jl" )
31- export get_temperatures, get_power_usages, get_gpu_utilizations
3228include (" utility.jl" )
3329include (" utility_unroll.jl" )
30+ include (" monitoring.jl" )
31+ include (" workers.jl" )
32+ include (" gpuinfo.jl" )
33+ include (" p2p_bandwidth.jl" )
34+ include (" host2device_bandwidth.jl" )
35+ include (" stresstest_tests.jl" )
36+ include (" stresstest.jl" )
37+ include (" stresstest_cpu.jl" )
38+ include (" peakflops_gpu.jl" )
39+ include (" peakflops_gpu_matmul.jl" )
40+ include (" peakflops_gpu_fmas.jl" )
41+ include (" peakflops_gpu_wmmas.jl" )
42+ include (" memory_bandwidth.jl" )
43+ include (" memory_bandwidth_saxpy.jl" )
44+ include (" hdf5.jl" )
45+
46+ function __init__ ()
47+ @require CairoMakie= " 13f3f980-e62b-5c42-98c6-ff1f3baf88f0" include (" requires/cairomakie.jl" )
48+
49+ if CUDA. functional ()
50+ toggle_tensorcoremath (true ; verbose= false ) # by default, use CUDA.FAST_MATH
51+ end
52+ end
53+
54+ export BFloat16
55+ export UnitPrefixedBytes, B, KB, MB, GB, TB, KiB, MiB, GiB, TiB
56+ export bytes, simplify, change_base, value
57+ export get_temperatures, get_power_usages, get_gpu_utilizations
3458export clear_gpu_memory,
3559 clear_all_gpus_memory,
3660 cublasGemmEx_wrapper!,
@@ -39,56 +63,31 @@ export clear_gpu_memory,
3963 hastensorcores, MultiLogger, multi_log
4064export get_cpusocket_temperatures, get_cpu_utilizations, get_cpu_utilization
4165export logspace
42- include (" monitoring.jl" )
4366export MonitoringResults,
4467 monitoring_start,
4568 monitoring_stop,
4669 plot_monitoring_results,
4770 savefig_monitoring_results,
4871 livemonitor_temperature,
4972 livemonitor_powerusage
50- include (" workers.jl" )
5173export @worker , @worker_create , @worker_killall
52-
53- include (" gpuinfo.jl" )
5474export gpuinfo, gpuinfo_p2p_access, gpus
55- include (" p2p_bandwidth.jl" )
5675export p2p_bandwidth,
5776 p2p_bandwidth_all, p2p_bandwidth_bidirectional, p2p_bandwidth_bidirectional_all
58- include (" host2device_bandwidth.jl" )
5977export host2device_bandwidth
60- include (" stresstest_tests.jl" )
61- include (" stresstest.jl" )
62- include (" stresstest_cpu.jl" )
6378export stresstest, stresstest_cpu
64- include (" peakflops_gpu.jl" )
65- include (" peakflops_gpu_matmul.jl" )
66- include (" peakflops_gpu_fmas.jl" )
67- include (" peakflops_gpu_wmmas.jl" )
6879export peakflops_gpu,
6980 peakflops_gpu_fmas,
7081 peakflops_gpu_wmmas,
7182 peakflops_gpu_matmul,
7283 peakflops_gpu_matmul_graphs,
7384 peakflops_gpu_matmul_scaling
7485export theoretical_peakflops_gpu, theoretical_peakflops_gpu_tensorcores
75- include (" memory_bandwidth.jl" )
76- include (" memory_bandwidth_saxpy.jl" )
7786export memory_bandwidth,
7887 memory_bandwidth_saxpy,
7988 memory_bandwidth_scaling,
8089 memory_bandwidth_saxpy_scaling,
8190 theoretical_memory_bandwidth
82-
83- include (" hdf5.jl" )
8491export save_monitoring_results, load_monitoring_results
8592
86- function __init__ ()
87- @require CairoMakie= " 13f3f980-e62b-5c42-98c6-ff1f3baf88f0" include (" requires/cairomakie.jl" )
88-
89- if CUDA. functional ()
90- toggle_tensorcoremath (true ; verbose= false ) # by default, use CUDA.FAST_MATH
91- end
92- end
93-
9493end
0 commit comments