- 
                Notifications
    You must be signed in to change notification settings 
- Fork 46
          MTLSharedEvent for synchronization take 2
          #690
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/mtl/capture.jl b/lib/mtl/capture.jl
index 7d28793f..9f29bdd3 100644
--- a/lib/mtl/capture.jl
+++ b/lib/mtl/capture.jl
@@ -12,13 +12,13 @@ Use [`beginScope()`](@ref) and [`endScope()`](@ref) to set the boundaries for a
 """
 MTLCaptureScope
 
-function MTLCaptureScope(queue::MTLDevice, manager=MTLCaptureManager())
+function MTLCaptureScope(queue::MTLDevice, manager = MTLCaptureManager())
     handle = @objc [manager::id{MTLCaptureManager} newCaptureScopeWithDevice:queue::id{MTLDevice}]::id{MTLCaptureScope}
-    MTLCaptureScope(handle)
+    return MTLCaptureScope(handle)
 end
-function MTLCaptureScope(queue::MTLCommandQueue, manager=MTLCaptureManager())
+function MTLCaptureScope(queue::MTLCommandQueue, manager = MTLCaptureManager())
     handle = @objc [manager::id{MTLCaptureManager} newCaptureScopeWithCommandQueue:queue::id{MTLCommandQueue}]::id{MTLCaptureScope}
-    MTLCaptureScope(handle)
+    return MTLCaptureScope(handle)
 end
 
 # @objcwrapper MTLCaptureScope <: NSObject
@@ -68,7 +68,8 @@ function MTLCaptureDescriptor()
 end
 
 # TODO: Add capture state
-function MTLCaptureDescriptor(obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
+function MTLCaptureDescriptor(
+        obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
                               destination::MTLCaptureDestination;
                               folder::String=nothing)
     desc = MTLCaptureDescriptor()
@@ -119,7 +120,8 @@ end
 
 Start GPU frame capture using the default capture object and specifying capture descriptor parameters directly.
 """
-function startCapture(obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
+function startCapture(
+        obj::Union{MTLDevice, MTLCommandQueue, MTLCaptureScope},
                       destination::MTLCaptureDestination=MTLCaptureDestinationGPUTraceDocument;
                       folder::String=nothing)
     if destination == MTLCaptureDestinationGPUTraceDocument && folder === nothing
diff --git a/lib/mtl/events.jl b/lib/mtl/events.jl
index 374f4285..8d2345ed 100644
--- a/lib/mtl/events.jl
+++ b/lib/mtl/events.jl
@@ -29,9 +29,11 @@ function MTLSharedEvent(dev::MTLDevice)
     return obj
 end
 
-function waitUntilSignaledValue(ev::MTLSharedEvent, value, timeoutMS=typemax(UInt64))
-    @objc [ev::id{MTLSharedEvent} waitUntilSignaledValue:value::UInt64
-                        timeoutMS:timeoutMS::UInt64]::Bool
+function waitUntilSignaledValue(ev::MTLSharedEvent, value, timeoutMS = typemax(UInt64))
+    return @objc [
+        ev::id{MTLSharedEvent} waitUntilSignaledValue:value::UInt64
+        timeoutMS:timeoutMS::UInt64
+    ]::Bool
 end
 
 ## shared event handle
diff --git a/src/state.jl b/src/state.jl
index ffa5f53d..918731c4 100644
--- a/src/state.jl
+++ b/src/state.jl
@@ -61,7 +61,7 @@ end
 Return the `MTLSharedEvent` used to synchronize a queue
 """
 function queue_event(queue::MTLCommandQueue)
-    get!(task_local_storage(), (:MTLSharedEvent, queue)) do
+    return get!(task_local_storage(), (:MTLSharedEvent, queue)) do
         MTLSharedEvent(queue.device)
     end::MTLSharedEvent
 end
diff --git a/test/capturing.jl b/test/capturing.jl
index 9b1da7f5..7dee3cae 100644
--- a/test/capturing.jl
+++ b/test/capturing.jl
@@ -36,9 +36,9 @@ desc.captureObject = dev
 desc.destination = MTL.MTLCaptureDestinationGPUTraceDocument
 @test desc.destination == MTL.MTLCaptureDestinationGPUTraceDocument
 
-# Capture Manager supports destination
-@test !supports_destination(manager, MTL.MTLCaptureDestinationDeveloperTools)
-@test supports_destination(manager, MTL.MTLCaptureDestinationGPUTraceDocument)
+            # Capture Manager supports destination
+            @test !supports_destination(manager, MTL.MTLCaptureDestinationDeveloperTools)
+            @test supports_destination(manager, MTL.MTLCaptureDestinationGPUTraceDocument)
 
 # Output URL
 @test desc.outputURL === nothing
@@ -69,7 +69,7 @@ bufferA = MtlArray{Float32,1,SharedStorage}(undef, tuple(4))
 @test manager.isCapturing == false
 startCapture(manager, desc)
 @test manager.isCapturing
-@test_throws "Capture manager is already capturing." startCapture(manager, desc)
+            @test_throws "Capture manager is already capturing." startCapture(manager, desc)
 Metal.@sync @metal threads=4 tester(bufferA)
 stopCapture(manager)
 @test manager.isCapturing == false | 
f695de3    to
    7804676      
    Compare
  
    There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Metal Benchmarks
| Benchmark suite | Current: 04e52b9 | Previous: 822abde | Ratio | 
|---|---|---|---|
| latency/precompile | 25029149041ns | 24961495792ns | 1.00 | 
| latency/ttfp | 2129357833ns | 2126570334ns | 1.00 | 
| latency/import | 1223676125ns | 1221863250ns | 1.00 | 
| integration/metaldevrt | 836250ns | 920646ns | 0.91 | 
| integration/byval/slices=1 | 1544333ns | 1636875ns | 0.94 | 
| integration/byval/slices=3 | 9111916ns | 8533229ns | 1.07 | 
| integration/byval/reference | 1533291.5ns | 1625125ns | 0.94 | 
| integration/byval/slices=2 | 2568958ns | 2708917ns | 0.95 | 
| kernel/indexing | 568167ns | 674520.5ns | 0.84 | 
| kernel/indexing_checked | 572750ns | 692979.5ns | 0.83 | 
| kernel/launch | 12084ns | 12292ns | 0.98 | 
| kernel/rand | 552417ns | 623458ns | 0.89 | 
| array/construct | 6291ns | 6250ns | 1.01 | 
| array/broadcast | 593666.5ns | 667000ns | 0.89 | 
| array/random/randn/Float32 | 794125ns | 822375ns | 0.97 | 
| array/random/randn!/Float32 | 605666.5ns | 627417ns | 0.97 | 
| array/random/rand!/Int64 | 549958ns | 555042ns | 0.99 | 
| array/random/rand!/Float32 | 464000ns | 588291ns | 0.79 | 
| array/random/rand/Int64 | 774208ns | 806208ns | 0.96 | 
| array/random/rand/Float32 | 645208ns | 649083ns | 0.99 | 
| array/accumulate/Int64/1d | 1255834ns | 1351708ns | 0.93 | 
| array/accumulate/Int64/dims=1 | 1815625ns | 1895167ns | 0.96 | 
| array/accumulate/Int64/dims=2 | 2141166.5ns | 2224583ns | 0.96 | 
| array/accumulate/Int64/dims=1L | 11691792ns | 11807458ns | 0.99 | 
| array/accumulate/Int64/dims=2L | 9648791.5ns | 9845792ns | 0.98 | 
| array/accumulate/Float32/1d | 1100917ns | 1211020.5ns | 0.91 | 
| array/accumulate/Float32/dims=1 | 1542875ns | 1623125ns | 0.95 | 
| array/accumulate/Float32/dims=2 | 1858625ns | 1913375ns | 0.97 | 
| array/accumulate/Float32/dims=1L | 9777021ns | 10039875ns | 0.97 | 
| array/accumulate/Float32/dims=2L | 7226166ns | 7298229.5ns | 0.99 | 
| array/reductions/reduce/Int64/1d | 1280729.5ns | 1341854ns | 0.95 | 
| array/reductions/reduce/Int64/dims=1 | 1078083ns | 1141520.5ns | 0.94 | 
| array/reductions/reduce/Int64/dims=2 | 1176666ns | 1277145.5ns | 0.92 | 
| array/reductions/reduce/Int64/dims=1L | 2020708ns | 2034687.5ns | 0.99 | 
| array/reductions/reduce/Int64/dims=2L | 3404792ns | 3549500ns | 0.96 | 
| array/reductions/reduce/Float32/1d | 964958ns | 1067125ns | 0.90 | 
| array/reductions/reduce/Float32/dims=1 | 815000ns | 883229.5ns | 0.92 | 
| array/reductions/reduce/Float32/dims=2 | 751083ns | 810979.5ns | 0.93 | 
| array/reductions/reduce/Float32/dims=1L | 1330021ns | 1373208.5ns | 0.97 | 
| array/reductions/reduce/Float32/dims=2L | 1755500ns | 1888250ns | 0.93 | 
| array/reductions/mapreduce/Int64/1d | 1310875ns | 1375438ns | 0.95 | 
| array/reductions/mapreduce/Int64/dims=1 | 1079792ns | 1176500ns | 0.92 | 
| array/reductions/mapreduce/Int64/dims=2 | 1189562ns | 1285062.5ns | 0.93 | 
| array/reductions/mapreduce/Int64/dims=1L | 1904562.5ns | 2087041ns | 0.91 | 
| array/reductions/mapreduce/Int64/dims=2L | 3304000ns | 3445167ns | 0.96 | 
| array/reductions/mapreduce/Float32/1d | 985334ns | 1065917ns | 0.92 | 
| array/reductions/mapreduce/Float32/dims=1 | 802875ns | 892125ns | 0.90 | 
| array/reductions/mapreduce/Float32/dims=2 | 758625ns | 809583ns | 0.94 | 
| array/reductions/mapreduce/Float32/dims=1L | 1331812.5ns | 1359000ns | 0.98 | 
| array/reductions/mapreduce/Float32/dims=2L | 1775792ns | 1898271ns | 0.94 | 
| array/private/copyto!/gpu_to_gpu | 634333ns | 654166ns | 0.97 | 
| array/private/copyto!/cpu_to_gpu | 802708ns | 826625ns | 0.97 | 
| array/private/copyto!/gpu_to_cpu | 784458ns | 813104.5ns | 0.96 | 
| array/private/iteration/findall/int | 1571125ns | 1668417ns | 0.94 | 
| array/private/iteration/findall/bool | 1425333.5ns | 1504500ns | 0.95 | 
| array/private/iteration/findfirst/int | 1800541.5ns | 1934521ns | 0.93 | 
| array/private/iteration/findfirst/bool | 1652708ns | 1855687ns | 0.89 | 
| array/private/iteration/scalar | 4217125ns | 4774709ns | 0.88 | 
| array/private/iteration/logical | 2525375ns | 2574750ns | 0.98 | 
| array/private/iteration/findmin/1d | 1875812.5ns | 2064500ns | 0.91 | 
| array/private/iteration/findmin/2d | 1518667ns | 1624500ns | 0.93 | 
| array/private/copy | 568292ns | 550458ns | 1.03 | 
| array/shared/copyto!/gpu_to_gpu | 85125ns | 83333ns | 1.02 | 
| array/shared/copyto!/cpu_to_gpu | 82459ns | 94500ns | 0.87 | 
| array/shared/copyto!/gpu_to_cpu | 82167ns | 82958ns | 0.99 | 
| array/shared/iteration/findall/int | 1576021ns | 1643125ns | 0.96 | 
| array/shared/iteration/findall/bool | 1426479ns | 1516125ns | 0.94 | 
| array/shared/iteration/findfirst/int | 1325084ns | 1457083ns | 0.91 | 
| array/shared/iteration/findfirst/bool | 1308916.5ns | 1419167ns | 0.92 | 
| array/shared/iteration/scalar | 210584ns | 159229.5ns | 1.32 | 
| array/shared/iteration/logical | 2399542ns | 2361416ns | 1.02 | 
| array/shared/iteration/findmin/1d | 1392937.5ns | 1620625ns | 0.86 | 
| array/shared/iteration/findmin/2d | 1518708ns | 1622208ns | 0.94 | 
| array/shared/copy | 253458.5ns | 245750ns | 1.03 | 
| array/permutedims/4d | 2343333.5ns | 2439396ns | 0.96 | 
| array/permutedims/2d | 1146875ns | 1226959ns | 0.93 | 
| array/permutedims/3d | 1666333.5ns | 1746270.5ns | 0.95 | 
| metal/synchronization/stream | 18833ns | 14833ns | 1.27 | 
| metal/synchronization/context | 20042ns | 15334ns | 1.31 | 
This comment was automatically generated by workflow using github-action-benchmark.
| Codecov Report❌ Patch coverage is  
 Additional details and impacted files@@            Coverage Diff             @@
##             main     #690      +/-   ##
==========================================
+ Coverage   80.89%   81.20%   +0.31%     
==========================================
  Files          62       62              
  Lines        2790     2810      +20     
==========================================
+ Hits         2257     2282      +25     
+ Misses        533      528       -5     ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
 | 
63af0ed    to
    2a7c978      
    Compare
  
    | I suspect  The current spinlock is a potential option. Another is maybe ensuring we synchronize after  @maleadt any thoughts? | 
| 
 Seems like the cleaner solution, no? | 
| Unfortunately synchronizing doesn't work. With 14d3d0e locally, when it fails, it usually takes ~90 ms for capture to stop. Should I just use a spinlock? | 
| Yeah, that seems reasonable. Do add some code comments about the reason though. | 
- Default to capturing with MTLCaptureScope - Test improvments - Spinlock until capture is over in `stopCapture`
Add `waitUntilSignaledValue`
14d3d0e    to
    04e52b9      
    Compare
  
    
Not ready to merge as I haven't changed anything since #645 but leaving it up.
I'm considering maybe making it an experimental local preference that's off by default and then adding a special CI to try and catch failures.