-
Notifications
You must be signed in to change notification settings - Fork 269
Expand file tree
/
Copy pathinitialization.jl
More file actions
265 lines (222 loc) · 11.2 KB
/
initialization.jl
File metadata and controls
265 lines (222 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# initialization
# XXX: we currently allow loading CUDA.jl even if the package is not functional, because
# downstream packages can only unconditionally depend on CUDA.jl. that's why we have
# the errors be non-fatal, sometimes even silencing them, and why we have the
# `functional()` API that allows checking for successfull initialization.
# TODO: once we have conditional dependencies, remove this complexity and have __init__ fail
const _initialized = Ref{Bool}(false)
const _initialization_error = Ref{String}()
# World age captured at __init__ time. Running the GPU compiler infrastructure
# (typeinf_local, etc.) in this world avoids recompilation of native code that was
# cached during precompilation but invalidated by later method definitions.
# Default to typemax(UInt) so that during precompilation (before __init__ runs)
# invoke_in_world clamps to the current world and behaves normally.
const _initialization_world = Ref{UInt}(typemax(UInt))
"""
invoke_frozen(f, args...; kwargs...)
Invoke `f(args...; kwargs...)` in the world captured at `__init__` time.
This allows precompiled native code for the GPU compiler infrastructure
(typeinf_local, etc.) to be reused, avoiding expensive recompilation.
"""
function invoke_frozen(f, args...; kwargs...)
@inline
kwargs = merge(NamedTuple(), kwargs)
if isempty(kwargs)
return Base.invoke_in_world(_initialization_world[], f, args...)
end
return Base.invoke_in_world(_initialization_world[], Core.kwcall, kwargs, f, args...)
end
"""
functional(show_reason=false)
Check if the package has been configured successfully and is ready to use.
This call is intended for packages that support conditionally using an available GPU. If you
fail to check whether CUDA is functional, actual use of functionality might warn and error.
"""
function functional(show_reason::Bool=false)
_initialized[] && return true
if show_reason && isassigned(_initialization_error)
error(_initialization_error[])
elseif show_reason
error("unknown initialization error")
end
return false
end
function __init__()
precompiling = ccall(:jl_generating_output, Cint, ()) != 0
# TODO: make errors here (and in submodules/subpackages like cuBLAS and cuDNN) fatal,
# and remove functional(), once people sufficiently use weak dependencies.
# check that we have a driver
global libcuda
if CUDA_Driver_jll.is_available()
if isnothing(CUDA_Driver_jll.libcuda)
_initialization_error[] = "CUDA driver not found"
return
end
libcuda = CUDA_Driver_jll.libcuda
else
# CUDA_Driver_jll only kicks in for supported platforms, so fall back to
# a system search if the artifact isn't available (JLLWrappers.jl#50)
library = if Sys.iswindows()
Libdl.find_library("nvcuda")
else
Libdl.find_library(["libcuda.so.1", "libcuda.so"])
end
if library != ""
libcuda = library
else
_initialization_error[] = "CUDA driver not found"
return
end
end
driver = try
set_driver_version()
catch err
@debug "CUDA driver failed to report a version" exception=(err, catch_backtrace())
_initialization_error[] = "CUDA driver not functional"
return
end
if driver < v"12"
@error "This version of CUDA.jl requires an NVIDIA driver for CUDA 12.x or higher (yours only supports up to CUDA $driver)"
_initialization_error[] = "NVIDIA driver too old"
return
end
# check that we have a runtime
if !CUDA_Runtime.is_available()
# try to find out why
reason = if CUDA_Runtime != CUDA_Runtime_jll
"""You requested use of a local CUDA toolkit, but not all
required components were discovered.
Try running with `JULIA_DEBUG=CUDA_Runtime_Discovery` in
your environment and re-loading CUDA.jl for more details."""
elseif !Sys.iswindows() && !Sys.islinux() && !in(Sys.ARCH, [:x86_64, :aarch64])
"""You are using an unsupported platform: this version of CUDA.jl
only supports Linux (x86_64, aarch64) and Windows (x86_64).
Consider downgrading CUDA.jl (refer to the README for a list of
supported platforms) or manually installing the CUDA toolkit and make
CUDA.jl use it by calling `CUDA.set_runtime_version!(local_toolkit=true)`."""
elseif CUDA_Runtime_jll.host_platform["cuda"] == "none"
"""CUDA.jl's JLLs were precompiled without an NVIDIA driver present.
This can happen when installing CUDA.jl on an HPC log-in node,
or in a container. In that case, you need to specify which CUDA
version to use at run time by calling `CUDA.set_runtime_version!`
or provisioning the preference it sets at compile time.
If you are not running in a container or on an HPC log-in node,
try re-compiling the CUDA runtime JLL and re-loading CUDA.jl:
pkg = Base.PkgId(Base.UUID("76a88914-d11a-5bdc-97e0-2f5a05c973a2"),
"CUDA_Runtime_jll")
Base.compilecache(pkg)
# re-start Julia and re-load CUDA.jl"""
else
"""Could not diagnose why the CUDA runtime is not available.
If the issue persists, please file a support ticket with the following details:
- host platform: $(Base.BinaryPlatforms.triplet(CUDA_Runtime_jll.host_platform))
- libcuda: $libcuda (loaded through JLL: $(CUDA_Driver_jll.is_available()))
- driver version: $driver
"""
end
@error """CUDA.jl could not find an appropriate CUDA runtime to use.
$reason
For more details, refer to the CUDA.jl documentation at
https://cuda.juliagpu.org/stable/installation/overview/"""
_initialization_error[] = "CUDA runtime not found"
return
end
runtime = try
runtime_version()
catch err
if err isa CuError && err.code == ERROR_NO_DEVICE
_initialization_error[] = "No CUDA-capable device found"
return
end
rethrow()
end
# ensure the loaded runtime is supported
if runtime < v"12"
@error "This version of CUDA.jl only supports CUDA 12 or higher (your toolkit provides CUDA $runtime)"
end
# ensure the loaded runtime matches what we precompiled for.
if toolkit_version == nothing
@error """CUDA.jl was precompiled without knowing the CUDA toolkit version. This is unsupported.
You should either precompile CUDA.jl in an environment where the CUDA toolkit is available,
or call `CUDA.set_runtime_version!` to specify which CUDA version to use."""
elseif Base.thisminor(runtime) != Base.thisminor(toolkit_version)
# this can only happen with a local toolkit, but let's always check to be sure
if local_toolkit
@error """You are using a local CUDA $(Base.thisminor(runtime)) toolkit, but CUDA.jl was precompiled for CUDA $(Base.thisminor(toolkit_version)). This is unsupported.
Call `CUDA.set_runtime_version!` to update the CUDA version to match your local installation."""
else
@error """You are using CUDA $(Base.thisminor(runtime)), but CUDA.jl was precompiled for CUDA $(Base.thisminor(toolkit_version)).
This is unexpected; please file an issue."""
end
end
# finally, initialize CUDA
try
cuInit(0)
catch err
_initialization_error[] = "CUDA initialization failed: " * sprint(showerror, err)
return
end
# warn if we're not using an official build of Julia
official_release = startswith(Base.TAGGED_RELEASE_BANNER, "Official")
if !official_release
@warn """You are using a non-official build of Julia. This may cause issues with CUDA.jl.
Please consider using an official build from https://julialang.org/downloads/."""
end
# enable generation of FMA instructions to mimic behavior of nvcc
LLVM.clopts("-nvptx-fma-level=1")
# warn about old, deprecated environment variables
if haskey(ENV, "JULIA_CUDA_USE_BINARYBUILDER") && !local_toolkit
@error """JULIA_CUDA_USE_BINARYBUILDER is deprecated. Call `CUDA.jl.set_runtime_version!` to use a local toolkit."""
# we do not warn about this when we're already using the new preference,
# because during the transition clusters will be deploying both mechanisms.
end
if haskey(ENV, "JULIA_CUDA_VERSION")
@error """JULIA_CUDA_VERSION is deprecated. Call `CUDA.jl.set_runtime_version!` to use a different version."""
end
if !local_toolkit
# scan for CUDA libraries that may have been loaded from system paths
# note that this must cover more that the libraries provided by the
# runtime JLL, in order to detect possible conditional dependencies.
runtime_libraries = ["cudart",
"nvperf", "nvvm", "nvrtc", "nvJitLink",
"cublas", "cupti", "cusparse", "cufft", "curand", "cusolver"]
for lib in Libdl.dllist()
contains(lib, "artifacts") && continue
# skip driver store directories on Windows - these contain legitimate libraries
# that are part of the display driver installation (at least on CUDA 13+)
if Sys.iswindows() && contains(lib, "DriverStore")
continue
end
if any(rtlib -> contains(lib, rtlib), runtime_libraries)
@warn """CUDA runtime library `$(basename(lib))` was loaded from a system path, `$lib`.
This may cause errors.
If you're running under a profiler, this situation is expected. Otherwise,
ensure that your library path environment variable (e.g., `PATH` on Windows
or `LD_LIBRARY_PATH` on Linux) does not include CUDA library paths.
In any other case, please file an issue."""
end
end
end
# capture the world age so that the compiler infrastructure can be invoked
# in this world, reusing precompiled native code for typeinf_local etc.
_initialization_world[] = Base.get_world_counter()
_initialized[] = true
end
## convenience functions
# TODO: update docstrings
export has_cuda, has_cuda_gpu
@public functional
"""
has_cuda()::Bool
Check whether the local system provides an installation of the CUDA driver and runtime.
Use this function if your code loads packages that require CUDA.jl.
```
"""
has_cuda(show_reason::Bool=false) = functional(show_reason)
"""
has_cuda_gpu()::Bool
Check whether the local system provides an installation of the CUDA driver and runtime, and
if it contains a CUDA-capable GPU. See [`has_cuda`](@ref) for more details.
Note that this function initializes the CUDA API in order to check for the number of GPUs.
"""
has_cuda_gpu(show_reason::Bool=false) = has_cuda(show_reason) && length(devices()) > 0