Skip to content

Commit 3c1e9e8

Browse files
tgymnichchristiangnrd
authored andcommitted
Implement @mtlprintf using os_log
1 parent 962e9e1 commit 3c1e9e8

File tree

13 files changed

+597
-14
lines changed

13 files changed

+597
-14
lines changed

docs/src/api/kernel.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,13 @@ MtlThreadGroupArray
5353
MemoryFlags
5454
threadgroup_barrier
5555
simdgroup_barrier
56+
```
57+
58+
## Printing
59+
60+
```@docs
61+
@mtlprintf
62+
@mtlprint
63+
@mtlprintln
64+
@mtlshow
5665
```

docs/src/usage/kernel.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,34 @@ Additional notes:
8484
- Kernels must always return nothing
8585
- Kernels are asynchronous. To synchronize, use the `Metal.@sync` macro.
8686

87+
## Printing
88+
89+
When debugging, it's not uncommon to want to print some values. This is achieved with `@mtlprintf`:
90+
91+
```julia
92+
function gpu_add2_print!(y, x)
93+
index = thread_position_in_grid_1d()
94+
@mtlprintf("thread %d", index)
95+
@inbounds y[i] += x[i]
96+
return nothing
97+
end
98+
99+
A = Metal.ones(Float32, 8);
100+
B = Metal.rand(Float32, 8);
101+
102+
@metal threads=length(A) gpu_add2_print!(A, B)
103+
```
104+
105+
`@mtlprintf` is supported on macOS 15 and later. `@mtlprintf` support most of the format specifiers that `printf`
106+
supports in C with the following exceptions:
107+
- `%n` and `%s` conversion specifiers are not supported
108+
- Default argument promotion applies to arguments of half type which promote to the `double` type
109+
- The format string must be a string literal
110+
111+
Metal places output from `@mtlprintf` into a log buffer. The system only removes the messages from the log buffer when the command buffer completes. When the log buffer becomes full, the system drops all subsequent messages.
112+
113+
See also: `@mtlprint`, `@mtlprintln` and `@mtlshow`
114+
87115
## Other Helpful Links
88116

89117
[Metal Shading Language Specification](https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf)

lib/mtl/MTL.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ include("events.jl")
3434
include("fences.jl")
3535
include("heap.jl")
3636
include("buffer.jl")
37+
include("log_state.jl")
3738
include("command_queue.jl")
3839
include("command_buf.jl")
3940
include("compute_pipeline.jl")

lib/mtl/command_queue.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
export MTLCommandQueueDescriptor
2+
3+
@objcwrapper immutable=false MTLCommandQueueDescriptor <: NSObject
4+
5+
@objcproperties MTLCommandQueueDescriptor begin
6+
@autoproperty maxCommandBufferCount::NSUInteger
7+
@autoproperty logState::id{MTLLogState} setter=setLogState
8+
end
9+
10+
function MTLCommandQueueDescriptor()
11+
handle = @objc [MTLCommandQueueDescriptor alloc]::id{MTLCommandQueueDescriptor}
12+
obj = MTLCommandQueueDescriptor(handle)
13+
finalizer(release, obj)
14+
@objc [obj::id{MTLCommandQueueDescriptor} init]::id{MTLCommandQueueDescriptor}
15+
return obj
16+
end
17+
18+
119
export MTLCommandQueue
220

321
# @objcwrapper immutable=false MTLCommandQueue <: NSObject
@@ -8,3 +26,10 @@ function MTLCommandQueue(dev::MTLDevice)
826
finalizer(release, obj)
927
return obj
1028
end
29+
30+
function MTLCommandQueue(dev::MTLDevice, descriptor::MTLCommandQueueDescriptor)
31+
handle = @objc [dev::id{MTLDevice} newCommandQueueWithDescriptor:descriptor::id{MTLCommandQueueDescriptor}]::id{MTLCommandQueue}
32+
obj = MTLCommandQueue(handle)
33+
finalizer(release, obj)
34+
return obj
35+
end

lib/mtl/log_state.jl

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
export MTLLogLevel
2+
3+
@cenum MTLLogLevel::NSInteger begin
4+
MTLLogLevelUndefined = 0
5+
MTLLogLevelDebug = 1
6+
MTLLogLevelInfo = 2
7+
MTLLogLevelNotice = 3
8+
MTLLogLevelError = 4
9+
MTLLogLevelFault = 5
10+
end
11+
12+
export MTLLogStateDescriptor
13+
14+
@objcwrapper immutable=false MTLLogStateDescriptor <: NSObject
15+
16+
@objcproperties MTLLogStateDescriptor begin
17+
@autoproperty level::MTLLogLevel setter=setLevel
18+
@autoproperty bufferSize::NSInteger setter=setBufferSize
19+
end
20+
21+
function MTLLogStateDescriptor()
22+
handle = @objc [MTLLogStateDescriptor alloc]::id{MTLLogStateDescriptor}
23+
obj = MTLLogStateDescriptor(handle)
24+
finalizer(release, obj)
25+
@objc [obj::id{MTLLogStateDescriptor} init]::id{MTLLogStateDescriptor}
26+
return obj
27+
end
28+
29+
30+
export MTLLogState
31+
32+
@objcwrapper MTLLogState <: NSObject
33+
34+
function MTLLogState(dev::MTLDevice, descriptor::MTLLogStateDescriptor)
35+
err = Ref{id{NSError}}(nil)
36+
handle = @objc [dev::id{MTLDevice} newLogStateWithDescriptor:descriptor::id{MTLLogStateDescriptor}
37+
error:err::Ptr{id{NSError}}]::id{MTLLogState}
38+
err[] == nil || throw(NSError(err[]))
39+
MTLLogState(handle)
40+
end

src/Metal.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ include("device/intrinsics/synchronization.jl")
3737
include("device/intrinsics/memory.jl")
3838
include("device/intrinsics/simd.jl")
3939
include("device/intrinsics/atomics.jl")
40+
include("device/intrinsics/output.jl")
4041
include("device/quirks.jl")
4142

4243
# array essentials

src/compiler/compilation.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ function compile(@nospecialize(job::CompilerJob))
104104

105105
@signpost_interval log=log_compiler() "Generate LLVM IR" begin
106106
# TODO: on 1.9, this actually creates a context. cache those.
107-
ir, entry = JuliaContext() do ctx
107+
ir, entry, loggingEnabled = JuliaContext() do ctx
108108
mod, meta = GPUCompiler.compile(:llvm, job)
109-
string(mod), LLVM.name(meta.entry)
109+
string(mod), LLVM.name(meta.entry), haskey(functions(mod), "air.os_log")
110110
end
111111
end
112112

@@ -172,7 +172,7 @@ function compile(@nospecialize(job::CompilerJob))
172172
end
173173
end
174174

175-
return (; ir, air, metallib, entry)
175+
return (; ir, air, metallib, entry, loggingEnabled)
176176
end
177177

178178
# link into an executable kernel
@@ -210,5 +210,7 @@ end
210210
end
211211
end
212212

213-
pipeline_state
213+
# most of the time, we don't need the function object,
214+
# so don't keep it alive unconditionally in GPUCompiler's caches
215+
pipeline_state, compiled.loggingEnabled
214216
end

src/compiler/execution.jl

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ mtlconvert(arg, cce=nothing) = adapt(Adaptor(cce), arg)
161161
struct HostKernel{F,TT}
162162
f::F
163163
pipeline::MTLComputePipelineState
164+
loggingEnabled::Bool
164165
end
165166

166167
const mtlfunction_lock = ReentrantLock()
@@ -186,15 +187,15 @@ function mtlfunction(f::F, tt::TT=Tuple{}; name=nothing, kwargs...) where {F,TT}
186187
cache = compiler_cache(dev)
187188
source = methodinstance(F, tt)
188189
config = compiler_config(dev; name, kwargs...)::MetalCompilerConfig
189-
pipeline = GPUCompiler.cached_compilation(cache, source, config, compile, link)
190+
pipeline, loggingEnabled = GPUCompiler.cached_compilation(cache, source, config, compile, link)
190191

191192
# create a callable object that captures the function instance. we don't need to think
192193
# about world age here, as GPUCompiler already does and will return a different object
193194
h = hash(pipeline, hash(f, hash(tt)))
194195
kernel = get(_kernel_instances, h, nothing)
195196
if kernel === nothing
196197
# create the kernel state object
197-
kernel = HostKernel{F,tt}(f, pipeline)
198+
kernel = HostKernel{F,tt}(f, pipeline, loggingEnabled)
198199
_kernel_instances[h] = kernel
199200
end
200201
return kernel::HostKernel{F,tt}
@@ -275,7 +276,34 @@ end
275276
(threads.width * threads.height * threads.depth) > kernel.pipeline.maxTotalThreadsPerThreadgroup &&
276277
throw(ArgumentError("Number of threads in group ($(threads.width * threads.height * threads.depth)) should not exceed $(kernel.pipeline.maxTotalThreadsPerThreadgroup)"))
277278

278-
cmdbuf = MTLCommandBuffer(queue)
279+
cmdbuf = if kernel.loggingEnabled
280+
if macos_version() < v"15"
281+
@error "Logging is only supported on macOS 15 or higher"
282+
end
283+
284+
if MTLCaptureManager().isCapturing
285+
@error "Logging is not supported while GPU frame capturing"
286+
end
287+
288+
log_state_descriptor = MTLLogStateDescriptor()
289+
log_state_descriptor.level = MTL.MTLLogLevelDebug
290+
log_state = MTLLogState(queue.device, log_state_descriptor)
291+
292+
function log_handler(subSystem, category, logLevel, message)
293+
print(String(NSString(message)))
294+
return nothing
295+
end
296+
297+
block = @objcblock(log_handler, Nothing, (id{NSString}, id{NSString}, NSInteger, id{NSString}))
298+
@objc [log_state::id{MTLLogState} addLogHandler:block::id{NSBlock}]::Nothing
299+
300+
cmdbuf_descriptor = MTLCommandBufferDescriptor()
301+
cmdbuf_descriptor.logState = log_state
302+
MTLCommandBuffer(queue, cmdbuf_descriptor)
303+
else
304+
MTLCommandBuffer(queue)
305+
end
306+
279307
cmdbuf.label = "MTLCommandBuffer($(nameof(kernel.f)))"
280308
cce = MTLComputeCommandEncoder(cmdbuf)
281309
argument_buffers = try

0 commit comments

Comments
 (0)