Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions lib/JLArrays/src/JLArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,16 @@ mutable struct JLArray{T, N} <: AbstractGPUArray{T, N}
check_eltype(T)
maxsize = prod(dims) * sizeof(T)

return GPUArrays.cached_alloc((JLArray, T, dims)) do
ref = GPUArrays.cached_alloc((JLArray, maxsize)) do
data = Vector{UInt8}(undef, maxsize)
ref = DataRef(data) do data
DataRef(data) do data
resize!(data, 0)
end
obj = new{T, N}(ref, 0, dims)
finalizer(unsafe_free!, obj)
return obj
end::JLArray{T, N}
end

obj = new{T, N}(ref, 0, dims)
finalizer(unsafe_free!, obj)
return obj
end

# low-level constructor for wrapping existing data
Expand Down
14 changes: 8 additions & 6 deletions src/host/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,19 @@ end

# per-object state, with a flag to indicate whether the object has been freed.
# this is to support multiple calls to `unsafe_free!` on the same object,
# while only lowering the referene count of the underlying data once.
# while only lowering the reference count of the underlying data once.
mutable struct DataRef{D}
rc::RefCounted{D}
freed::Bool
end

function DataRef(finalizer, data::D) where {D}
rc = RefCounted{D}(data, finalizer, Threads.Atomic{Int}(1))
function DataRef(finalizer, ref::D) where {D}
rc = RefCounted{D}(ref, finalizer, Threads.Atomic{Int}(1))
DataRef{D}(rc, false)
end
DataRef(data; kwargs...) = DataRef(nothing, data; kwargs...)
DataRef(ref; kwargs...) = DataRef(nothing, ref; kwargs...)

Base.sizeof(ref::DataRef) = sizeof(ref.rc[])

function Base.getindex(ref::DataRef)
if ref.freed
Expand All @@ -80,15 +82,15 @@ function Base.copy(ref::DataRef{D}) where {D}
return DataRef{D}(ref.rc, false)
end

function unsafe_free!(ref::DataRef, args...)
function unsafe_free!(ref::DataRef)
if ref.freed
# multiple frees *of the same object* are allowed.
# we should only ever call `release` once per object, though,
# as multiple releases of the underlying data is not allowed.
return
end
ref.freed = true
release(ref.rc, args...)
release(ref.rc)
return
end

Expand Down
60 changes: 36 additions & 24 deletions src/host/alloc_cache.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ end

mutable struct AllocCache
lock::ReentrantLock
busy::Dict{UInt64, Vector{Any}} # hash(key) => GPUArray[]
free::Dict{UInt64, Vector{Any}}
busy::Dict{UInt64, Vector{DataRef}}
free::Dict{UInt64, Vector{DataRef}}

function AllocCache()
cache = new(
Expand All @@ -24,43 +24,51 @@ end
function get_pool!(cache::AllocCache, pool::Symbol, uid::UInt64)
pool = getproperty(cache, pool)
uid_pool = get(pool, uid, nothing)
if uid_pool nothing
uid_pool = Base.@lock cache.lock pool[uid] = Any[]
if uid_pool === nothing
uid_pool = pool[uid] = DataRef[]
end
return uid_pool
end

function cached_alloc(f, key)
cache = ALLOC_CACHE[]
if cache === nothing
return f()::AbstractGPUArray
return f()::DataRef
end

x = nothing
ref = nothing
uid = hash(key)

busy_pool = get_pool!(cache, :busy, uid)
free_pool = get_pool!(cache, :free, uid)
isempty(free_pool) && (x = f()::AbstractGPUArray)
Base.@lock cache.lock begin
free_pool = get_pool!(cache, :free, uid)

if !isempty(free_pool)
ref = Base.@lock cache.lock pop!(free_pool)
@assert !ref.freed
end
end

if ref === nothing
ref = f()::DataRef

# increase the refcount of the ref to prevent finalizers from freeing it
retain(ref.rc)
end

while !isempty(free_pool) && x ≡ nothing
tmp = Base.@lock cache.lock pop!(free_pool)
# Array was manually freed via `unsafe_free!`.
GPUArrays.storage(tmp).freed && continue
x = tmp
Base.@lock cache.lock begin
busy_pool = get_pool!(cache, :busy, uid)
push!(busy_pool, ref)
end

x ≡ nothing && (x = f()::AbstractGPUArray)
Base.@lock cache.lock push!(busy_pool, x)
return x
return ref
end

function free_busy!(cache::AllocCache)
for uid in cache.busy.keys
busy_pool = get_pool!(cache, :busy, uid)
isempty(busy_pool) && continue
Base.@lock cache.lock begin
for uid in keys(cache.busy)
busy_pool = get_pool!(cache, :busy, uid)
isempty(busy_pool) && continue

Base.@lock cache.lock begin
free_pool = get_pool!(cache, :free, uid)
append!(free_pool, busy_pool)
empty!(busy_pool)
Expand All @@ -71,14 +79,18 @@ end

function unsafe_free!(cache::AllocCache)
Base.@lock cache.lock begin
for (_, pool) in cache.busy
for pool in values(cache.busy)
isempty(pool) || error(
"Invalidating allocations cache that's currently in use. " *
"Invalidating inside `@cached` is not allowed."
)
end
for (_, pool) in cache.free
map(unsafe_free!, pool)
for pool in values(cache.free), ref in pool
# release our hold on the underlying data
release(ref.rc)

# early-release the reference
unsafe_free!(ref)
end
empty!(cache.free)
end
Expand Down
52 changes: 44 additions & 8 deletions test/testsuite/alloc_cache.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
if AT <: AbstractGPUArray
cache = GPUArrays.AllocCache()

# first allocation populates the cache
T, dims = Float32, (1, 2, 3)
GPUArrays.@cached cache begin
x1 = AT(zeros(T, dims))
Expand All @@ -10,34 +11,69 @@
key = first(keys(cache.free))
@test length(cache.free[key]) == 1
@test length(cache.busy[key]) == 0
@test x1 === cache.free[key][1]
@test cache.free[key][1] === GPUArrays.storage(x1)

# Second allocation hits cache.
# second allocation hits the cache
GPUArrays.@cached cache begin
x2 = AT(zeros(T, dims))
# Does not hit the cache.

# explicitly uncached ones don't
GPUArrays.@uncached x_free = AT(zeros(T, dims))
end
@test sizeof(cache) == sizeof(T) * prod(dims)
key = first(keys(cache.free))
@test length(cache.free[key]) == 1
@test length(cache.busy[key]) == 0
@test x2 === cache.free[key][1]
@test cache.free[key][1] === GPUArrays.storage(x2)
@test x_free !== x2

# Third allocation is of different shape - allocates.
dims = (2, 2)
# compatible shapes should also hit the cache
dims = (3, 2, 1)
GPUArrays.@cached cache begin
x3 = AT(zeros(T, dims))
end
@test sizeof(cache) == sizeof(T) * prod(dims)
key = first(keys(cache.free))
@test length(cache.free[key]) == 1
@test length(cache.busy[key]) == 0
@test cache.free[key][1] === GPUArrays.storage(x3)

# as should compatible eltypes
T = Int32
GPUArrays.@cached cache begin
x4 = AT(zeros(T, dims))
end
@test sizeof(cache) == sizeof(T) * prod(dims)
key = first(keys(cache.free))
@test length(cache.free[key]) == 1
@test length(cache.busy[key]) == 0
@test cache.free[key][1] === GPUArrays.storage(x4)

# different shapes should trigger a new allocation
dims = (2, 2)
GPUArrays.@cached cache begin
x5 = AT(zeros(T, dims))

# we're allowed to early free arrays, which shouldn't release the underlying data
GPUArrays.unsafe_free!(x5)
end
_keys = collect(keys(cache.free))
key2 = _keys[findfirst(i -> i != key, _keys)]
@test length(cache.free[key]) == 1
@test length(cache.free[key2]) == 1
@test x3 === cache.free[key2][1]
@test cache.free[key2][1] === GPUArrays.storage(x5)

# Freeing all memory held by cache.
# freeing all memory held by cache should free all allocations
@test !GPUArrays.storage(x1).freed
@test GPUArrays.storage(x5).freed
@test GPUArrays.storage(x5).rc.count[] == 1 # the ref appears freed, but the data isn't
@test !GPUArrays.storage(x_free).freed
GPUArrays.unsafe_free!(cache)
@test sizeof(cache) == 0
@test GPUArrays.storage(x1).freed
@test GPUArrays.storage(x1).rc.count[] == 0
@test GPUArrays.storage(x5).freed
@test GPUArrays.storage(x5).rc.count[] == 0
@test !GPUArrays.storage(x_free).freed
end
end
Loading