Skip to content

Commit 7d7617d

Browse files
authored
Add support for signposts (#300)
1 parent 2764df0 commit 7d7617d

File tree

6 files changed

+98
-60
lines changed

6 files changed

+98
-60
lines changed

docs/src/profiling.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,24 @@ julia> Metal.@profile @metal threads=length(c) vadd(a, b, c);
4646
[ Info: System trace saved to julia_3.trace; open the resulting trace in Instruments
4747
```
4848
49+
It is possible to augment the trace with additional information by using signposts: Similar
50+
to NVTX markers and ranges in CUDA.jl, signpost intervals and events can be used to add
51+
respectively time intervals and points of interest to the trace. This can be done by using
52+
the signpost functionality from ObjectiveC.jl:
53+
54+
```julia
55+
using ObjectiveC, .OS
56+
57+
@signpost_interval "My Interval" begin
58+
# code to profile
59+
@signpost_event "My Event"
60+
end
61+
```
62+
63+
For more information, e.g. how to pass additional messages to the signposts, or how to
64+
use a custom logger, consult the ObjectiveC.jl documentation, or the docstrings of the
65+
`@signpost_interval` and `@signpost_event` macros.
66+
4967
## Frame capture
5068
5169
For more details on specific operations, you can use Metal's frame capture feature to

src/Metal.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ using Python_jll
1111
using ObjectFile
1212
using ExprTools: splitdef, combinedef
1313
using Artifacts
14-
using ObjectiveC, .CoreFoundation, .Foundation, .Dispatch
14+
using ObjectiveC, .CoreFoundation, .Foundation, .Dispatch, .OS
1515

1616
if !isdefined(Base, :get_extension)
1717
using Requires: @require

src/compiler/compilation.jl

Lines changed: 66 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -55,52 +55,59 @@ end
5555

5656
# compile to executable machine code
5757
function compile(@nospecialize(job::CompilerJob))
58-
# TODO: on 1.9, this actually creates a context. cache those.
59-
ir, entry = JuliaContext() do ctx
60-
mod, meta = GPUCompiler.compile(:llvm, job)
61-
string(mod), LLVM.name(meta.entry)
62-
end
63-
64-
# generate AIR
65-
air = let
66-
input = Pipe()
67-
output = Pipe()
58+
@signpost_event log=log_compiler() "Compile" "Job=$job"
6859

69-
cmd = `$(LLVMDowngrader_jll.llvm_as()) --bitcode-version=5.0 -o -`
70-
proc = run(pipeline(cmd, stdout=output, stderr=stderr, stdin=input); wait=false)
71-
close(output.in)
72-
73-
writer = @async begin
74-
write(input, ir)
75-
close(input)
60+
@signpost_interval log=log_compiler() "Generate LLVM IR" begin
61+
# TODO: on 1.9, this actually creates a context. cache those.
62+
ir, entry = JuliaContext() do ctx
63+
mod, meta = GPUCompiler.compile(:llvm, job)
64+
string(mod), LLVM.name(meta.entry)
7665
end
77-
reader = @async read(output)
78-
79-
wait(proc)
80-
if !success(proc)
81-
file = tempname(cleanup=false) * ".ll"
82-
write(file, ir)
83-
error("""Compilation to AIR failed; see above for details.
84-
If you think this is a bug, please file an issue and attach $(file)""")
66+
end
67+
68+
@signpost_interval log=log_compiler() "Downgrade to AIR" begin
69+
# generate AIR
70+
air = let
71+
input = Pipe()
72+
output = Pipe()
73+
74+
cmd = `$(LLVMDowngrader_jll.llvm_as()) --bitcode-version=5.0 -o -`
75+
proc = run(pipeline(cmd, stdout=output, stderr=stderr, stdin=input); wait=false)
76+
close(output.in)
77+
78+
writer = @async begin
79+
write(input, ir)
80+
close(input)
81+
end
82+
reader = @async read(output)
83+
84+
wait(proc)
85+
if !success(proc)
86+
file = tempname(cleanup=false) * ".ll"
87+
write(file, ir)
88+
error("""Compilation to AIR failed; see above for details.
89+
If you think this is a bug, please file an issue and attach $(file)""")
90+
end
91+
fetch(reader)
8592
end
86-
fetch(reader)
8793
end
8894

89-
# create a Metal library
90-
image = try
91-
metallib_fun = MetalLibFunction(; name=entry, air_module=air,
92-
air_version=job.config.target.air,
93-
metal_version=job.config.target.metal)
94-
metallib = MetalLib(; functions = [metallib_fun])
95-
96-
image_stream = IOBuffer()
97-
write(image_stream, metallib)
98-
take!(image_stream)
99-
catch err
100-
file = tempname(cleanup=false) * ".air"
101-
write(file, air)
102-
error("""Compilation to Metal library failed; see below for details.
103-
If you think this is a bug, please file an issue and attach $(file)""")
95+
@signpost_interval log=log_compiler() "Create Metal library" begin
96+
image = try
97+
metallib_fun = MetalLibFunction(; name=entry, air_module=air,
98+
air_version=job.config.target.air,
99+
metal_version=job.config.target.metal)
100+
metallib = MetalLib(; functions = [metallib_fun])
101+
102+
image_stream = IOBuffer()
103+
write(image_stream, metallib)
104+
take!(image_stream)
105+
catch err
106+
file = tempname(cleanup=false) * ".air"
107+
write(file, air)
108+
error("""Compilation to Metal library failed; see below for details.
109+
If you think this is a bug, please file an issue and attach $(file)""")
110+
end
104111
end
105112

106113
return (; image, entry)
@@ -109,20 +116,24 @@ end
109116
# link into an executable kernel
110117
@autoreleasepool function link(@nospecialize(job::CompilerJob), compiled;
111118
return_function=false)
112-
dev = current_device()
113-
lib = MTLLibraryFromData(dev, compiled.image)
114-
fun = MTLFunction(lib, compiled.entry)
115-
pipeline_state = try
116-
MTLComputePipelineState(dev, fun)
117-
catch err
118-
isa(err, NSError) || rethrow()
119-
120-
# the back-end compiler likely failed
121-
# XXX: check more accurately? the error domain doesn't help much here
122-
file = tempname(cleanup=false) * ".metallib"
123-
write(file, compiled.image)
124-
error("""Compilation to native code failed; see below for details.
125-
If you think this is a bug, please file an issue and attach $(file)""")
119+
@signpost_event log=log_compiler() "Link" "Job=$job"
120+
121+
@signpost_interval log=log_compiler() "Instantiate compute pipeline" begin
122+
dev = current_device()
123+
lib = MTLLibraryFromData(dev, compiled.image)
124+
fun = MTLFunction(lib, compiled.entry)
125+
pipeline_state = try
126+
MTLComputePipelineState(dev, fun)
127+
catch err
128+
isa(err, NSError) || rethrow()
129+
130+
# the back-end compiler likely failed
131+
# XXX: check more accurately? the error domain doesn't help much here
132+
file = tempname(cleanup=false) * ".metallib"
133+
write(file, compiled.image)
134+
error("""Compilation to native code failed; see below for details.
135+
If you think this is a bug, please file an issue and attach $(file)""")
136+
end
126137
end
127138

128139
# most of the time, we don't need the function object,

src/pool.jl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ The storage kwarg controls where the buffer is stored. Possible values are:
5151
Note that `Private` buffers can't be directly accessed from the CPU, therefore you cannot
5252
use this option if you pass a ptr to initialize the memory.
5353
"""
54-
function alloc(dev::Union{MTLDevice,MTLHeap}, bytesize::Integer, args...;
55-
storage, kwargs...)
54+
function alloc(dev::Union{MTLDevice,MTLHeap}, sz::Integer, args...; storage, kwargs...)
55+
@signpost_event log=log_array() "Allocate" "Size=$(Base.format_bytes(sz))"
5656

5757
time = Base.@elapsed begin
58-
buf = @autoreleasepool MTLBuffer(dev, bytesize, args...; storage, kwargs...)
58+
buf = @autoreleasepool MTLBuffer(dev, sz, args...; storage, kwargs...)
5959
end
6060

6161
Base.@atomic alloc_stats.alloc_count + 1
62-
Base.@atomic alloc_stats.alloc_bytes + bytesize
62+
Base.@atomic alloc_stats.alloc_bytes + sz
6363
Base.@atomic alloc_stats.total_time + time
6464

6565
return buf
@@ -73,6 +73,7 @@ This does not protect against double-freeing of the same buffer!
7373
"""
7474
function free(buf::MTLBuffer)
7575
sz::Int = buf.length
76+
@signpost_event log=log_array() "Free" "Size=$(Base.format_bytes(sz))"
7677

7778
time = Base.@elapsed begin
7879
@autoreleasepool unsafe=true release(buf)

src/state.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
export current_device, device!, global_queue, synchronize, device_synchronize
22

3+
log_compiler() = OSLog("org.juliagpu.metal", "Compiler")
4+
log_compiler(args...) = log_compiler()(args...)
5+
log_array() = OSLog("org.juliagpu.metal", "Array")
6+
log_array(args...) = log_array()(args...)
7+
38
"""
49
current_device()::MTLDevice
510

src/utilities.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ function profiled(f)
161161
notification_name = "julia.metal.profile"
162162
folder = profile_dir()
163163
instruments = [
164+
# relevant instruments taken from `xcrun xctrace list instruments`
164165
"GPU",
165166

166167
# CPU
@@ -169,6 +170,8 @@ function profiled(f)
169170
"Metal Application",
170171
"Metal GPU Counters",
171172
"Metal Resource Events",
173+
174+
"os_signpost",
172175
]
173176
cmd = `xctrace record`
174177
for instrument in instruments

0 commit comments

Comments
 (0)