diff --git a/lib/mtl/events.jl b/lib/mtl/events.jl index b3b9cb7bf..374f42857 100644 --- a/lib/mtl/events.jl +++ b/lib/mtl/events.jl @@ -29,6 +29,10 @@ function MTLSharedEvent(dev::MTLDevice) return obj end +function waitUntilSignaledValue(ev::MTLSharedEvent, value, timeoutMS=typemax(UInt64)) + @objc [ev::id{MTLSharedEvent} waitUntilSignaledValue:value::UInt64 + timeoutMS:timeoutMS::UInt64]::Bool +end ## shared event handle diff --git a/src/state.jl b/src/state.jl index 3a0512e52..0782db68f 100644 --- a/src/state.jl +++ b/src/state.jl @@ -55,6 +55,17 @@ function global_queue(dev::MTLDevice) end::MTLCommandQueue end +""" + queue_event(queue::MTLCommandQueue)::MTLSharedEvent + +Return the `MTLSharedEvent` used to synchronize a queue +""" +function queue_event(queue::MTLCommandQueue) + get!(task_local_storage(), (:MTLSharedEvent, queue)) do + MTLSharedEvent(queue.device) + end::MTLSharedEvent +end + # TODO: Increase performance (currently ~15us) """ synchronize(queue) @@ -66,9 +77,13 @@ and simply wait for it to be completed. Since command buffers *should* execute i First-In-First-Out manner, this synchronizes the GPU. """ @autoreleasepool function synchronize(queue::MTLCommandQueue=global_queue(device())) + ev = queue_event(queue) + val = ev.signaledValue + 1 cmdbuf = MTLCommandBuffer(queue) + MTL.encode_signal!(cmdbuf, ev, val) commit!(cmdbuf) - wait_completed(cmdbuf) + MTL.waitUntilSignaledValue(ev,val) + return end """