Skip to content

Commit 2764df0

Browse files
authored
Use NSAutoreleasePool to clean up memory. (#294)
1 parent 356ee6c commit 2764df0

File tree

10 files changed

+39
-25
lines changed

10 files changed

+39
-25
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ KernelAbstractions = "0.9.1"
3838
LLVM = "6"
3939
LLVMDowngrader_jll = "0.1"
4040
ObjectFile = "0.4"
41-
ObjectiveC = "1.1, 2"
41+
ObjectiveC = "2.1"
4242
Reexport = "1.0"
4343
Requires = "1"
4444
SHA = "0.7"

lib/mtl/MTL.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ module MTL
33
using CEnum
44
using ObjectiveC, .Foundation, .Dispatch
55

6+
# Metal APIs generally expect to be running under an autorelease pool.
7+
# In most cases, we handle this in the code calling into the MTL module,
8+
# however, finalizers are out of the caller's control, so we need to
9+
# ensure here already that they are running under an autorelease pool.
10+
release(obj) = @autoreleasepool unsafe=true Foundation.release(obj)
11+
612

713
## source code includes
814

src/compiler/compilation.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ function compile(@nospecialize(job::CompilerJob))
107107
end
108108

109109
# link into an executable kernel
110-
function link(@nospecialize(job::CompilerJob), compiled; return_function=false)
110+
@autoreleasepool function link(@nospecialize(job::CompilerJob), compiled;
111+
return_function=false)
111112
dev = current_device()
112113
lib = MTLLibraryFromData(dev, compiled.image)
113114
fun = MTLFunction(lib, compiled.entry)

src/compiler/execution.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ end
259259
return argument_buffer
260260
end
261261

262-
function (kernel::HostKernel)(args...; groups=1, threads=1,
263-
queue=global_queue(current_device()))
262+
@autoreleasepool function (kernel::HostKernel)(args...; groups=1, threads=1,
263+
queue=global_queue(current_device()))
264264
groups = MTLSize(groups)
265265
threads = MTLSize(threads)
266266
(groups.width>0 && groups.height>0 && groups.depth>0) ||

src/compiler/reflection.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ function code_agx(io::IO, @nospecialize(func), @nospecialize(types),
3636
code_agx(io, job)
3737
end
3838

39-
function code_agx(io::IO, job::MetalCompilerJob)
39+
@autoreleasepool function code_agx(io::IO, job::MetalCompilerJob)
4040
if !job.config.kernel
4141
error("Can only generate AGX code for kernel functions")
4242
end
@@ -58,7 +58,7 @@ function code_agx(io::IO, job::MetalCompilerJob)
5858
bin = MTLBinaryArchive(current_device(), bin_desc)
5959
add_functions!(bin, pipeline_desc)
6060

61-
code = mktempdir() do dir
61+
mktempdir() do dir
6262
# serialize the archive to a file
6363
binary = joinpath(dir, "kernel.macho")
6464
write(binary, bin)
@@ -82,7 +82,6 @@ function code_agx(io::IO, job::MetalCompilerJob)
8282
first = false
8383
end
8484
end
85-
8685
end
8786

8887
@enum GPUMachineType::UInt32 begin

src/initialization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ function __init__()
2828
end
2929
end
3030

31-
try
31+
@autoreleasepool try
3232
load_framework("CoreGraphics")
3333
ver = MTL.MTLCompileOptions().languageVersion
3434
@debug "Successfully loaded Metal; targeting v$ver."

src/memory.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ function Base.unsafe_copyto!(dev::MTLDevice, dst::Ptr{T}, src::MtlPointer{T}, N:
6969
end
7070

7171
# GPU -> GPU
72-
function Base.unsafe_copyto!(dev::MTLDevice, dst::MtlPointer{T}, src::MtlPointer{T}, N::Integer;
73-
queue::MTLCommandQueue=global_queue(dev), async::Bool=false) where T
72+
@autoreleasepool function Base.unsafe_copyto!(dev::MTLDevice, dst::MtlPointer{T},
73+
src::MtlPointer{T}, N::Integer;
74+
queue::MTLCommandQueue=global_queue(dev),
75+
async::Bool=false) where T
7476
cmdbuf = MTLCommandBuffer(queue)
7577
MTLBlitCommandEncoder(cmdbuf) do enc
7678
MTL.append_copy!(enc, dst.buffer, dst.offset, src.buffer, src.offset, N * sizeof(T))
@@ -79,7 +81,8 @@ function Base.unsafe_copyto!(dev::MTLDevice, dst::MtlPointer{T}, src::MtlPointer
7981
async || wait_completed(cmdbuf)
8082
end
8183

82-
function unsafe_fill!(dev::MTLDevice, ptr::MtlPointer{T}, value::Union{UInt8,Int8}, N::Integer) where T
84+
@autoreleasepool function unsafe_fill!(dev::MTLDevice, ptr::MtlPointer{T},
85+
value::Union{UInt8,Int8}, N::Integer) where T
8386
cmdbuf = MTLCommandBuffer(global_queue(dev))
8487
MTLBlitCommandEncoder(cmdbuf) do enc
8588
MTL.append_fillbuffer!(enc, ptr.buffer, value, N * sizeof(T), ptr.offset)

src/pool.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,11 @@ The storage kwarg controls where the buffer is stored. Possible values are:
5151
Note that `Private` buffers can't be directly accessed from the CPU, therefore you cannot
5252
use this option if you pass a ptr to initialize the memory.
5353
"""
54-
function alloc(dev::Union{MTLDevice,MTLHeap},
55-
bytesize::Integer,
56-
args...;
57-
storage,
58-
kwargs...)
54+
function alloc(dev::Union{MTLDevice,MTLHeap}, bytesize::Integer, args...;
55+
storage, kwargs...)
5956

6057
time = Base.@elapsed begin
61-
buf = MTLBuffer(dev, bytesize, args...; storage, kwargs...)
58+
buf = @autoreleasepool MTLBuffer(dev, bytesize, args...; storage, kwargs...)
6259
end
6360

6461
Base.@atomic alloc_stats.alloc_count + 1
@@ -78,7 +75,7 @@ function free(buf::MTLBuffer)
7875
sz::Int = buf.length
7976

8077
time = Base.@elapsed begin
81-
release(buf)
78+
@autoreleasepool unsafe=true release(buf)
8279
end
8380

8481
Base.@atomic alloc_stats.free_count + 1

src/state.jl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ effectively returns the only system GPU.
1111
function current_device()
1212
get!(task_local_storage(), :MTLDevice) do
1313
dev = MTLDevice(1)
14-
supports_family(dev, MTL.MTLGPUFamilyApple7) || @warn """Metal.jl is only supported on M-series Macs, you may run into issues.
15-
See https://github.com/JuliaGPU/Metal.jl/issues/22 for more details.""" maxlog=1
14+
if !supports_family(dev, MTL.MTLGPUFamilyApple7)
15+
@warn """Metal.jl is only supported on M-series Macs, you may run into issues.
16+
See https://github.com/JuliaGPU/Metal.jl/issues/22 for more details.""" maxlog=1
17+
end
1618
return dev
1719
end::MTLDevice
1820
end
@@ -33,10 +35,14 @@ Return the Metal command queue associated with the current Julia thread.
3335
"""
3436
function global_queue(dev::MTLDevice)
3537
get!(task_local_storage(), (:MTLCommandQueue, dev)) do
36-
queue = MTLCommandQueue(dev)
37-
queue.label = "global_queue($(current_task()))"
38-
global_queues[queue] = nothing
39-
queue
38+
@autoreleasepool begin
39+
# NOTE: MTLCommandQueue itself is manually reference-counted,
40+
# the release pool is for resources used during its construction.
41+
queue = MTLCommandQueue(dev)
42+
queue.label = "global_queue($(current_task()))"
43+
global_queues[queue] = nothing
44+
queue
45+
end
4046
end::MTLCommandQueue
4147
end
4248

@@ -50,7 +56,7 @@ Create a new MTLCommandBuffer from the global command queue, commit it to the qu
5056
and simply wait for it to be completed. Since command buffers *should* execute in a
5157
First-In-First-Out manner, this synchronizes the GPU.
5258
"""
53-
function synchronize(queue::MTLCommandQueue=global_queue(current_device()))
59+
@autoreleasepool function synchronize(queue::MTLCommandQueue=global_queue(current_device()))
5460
cmdbuf = MTLCommandBuffer(queue)
5561
commit!(cmdbuf)
5662
wait_completed(cmdbuf)

test/metal.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
@testset "MTL" begin
2+
@autoreleasepool begin
23

34
@testset "devices" begin
45

@@ -462,3 +463,4 @@ end
462463
# TODO: continue adding tests
463464

464465
end
466+
end

0 commit comments

Comments
 (0)