-
Notifications
You must be signed in to change notification settings - Fork 269
Expand file tree
/
Copy pathprecompile.jl
More file actions
46 lines (44 loc) · 1.94 KB
/
precompile.jl
File metadata and controls
46 lines (44 loc) · 1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# @profile infrastructure (GPU-dependent, can't execute during precompilation)
precompile(Tuple{typeof(Profile.detect_cupti)})
precompile(Tuple{typeof(Profile.profile_internally), Function})
precompile(Tuple{typeof(Profile.capture), CUPTI.ActivityConfig})
using PrecompileTools
@compile_workload begin
# exercise the @profile display path with a dummy result (no GPU needed).
# the show method expects at least two cuCtxSynchronize entries in the host trace
# to delimit the profiled region, and at least one event between them.
dummy = Profile.ProfileResults(;
host = (
id = Int[1, 2, 3, 4],
start = Float64[0.0, 0.001, 0.002, 0.010],
stop = Float64[0.001, 0.002, 0.009, 0.011],
name = String["cuCtxSynchronize", "cuCtxSynchronize",
"cuLaunchKernel", "cuCtxSynchronize"],
tid = Int[1, 1, 1, 1],
),
device = (
id = Int[3],
start = Float64[0.003],
stop = Float64[0.008],
name = String["kernel"],
device = Int[0],
context = Int[1],
stream = Int[1],
grid = Union{Missing,CUDACore.CuDim3}[CUDACore.CuDim3(1,1,1)],
block = Union{Missing,CUDACore.CuDim3}[CUDACore.CuDim3(1,1,1)],
registers = Union{Missing,Int64}[32],
shared_mem = Union{Missing,@NamedTuple{static::Int64,dynamic::Int64}}[(static=0,dynamic=0)],
local_mem = Union{Missing,@NamedTuple{thread::Int64,total::Int64}}[(thread=0,total=0)],
size = Union{Missing,Int64}[missing],
),
nvtx = (
id = Int[],
start = Float64[],
type = Symbol[],
tid = Int[],
name = Union{Missing,String}[],
domain = Union{Missing,String}[],
),
)
show(devnull, dummy)
end