-
Notifications
You must be signed in to change notification settings - Fork 90
Add caching allocator interface #576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
28 commits
Select commit
Hold shift + click to select a range
12cad62
Implement allocation cache
pxl-th c6f128f
Correctly fetch underlying storage
pxl-th c2f32e1
Add cache sizeof
pxl-th 44e8990
Allow bulk-freeing arrays instead of caching them
pxl-th 5ce044d
Add docs
pxl-th 9776578
Add tests
pxl-th c5032ad
Update docs
pxl-th 99a8171
Update docs & disable test for now
pxl-th ad828df
Use array type instead of KA backend & allow arbitrary keys
pxl-th e601f17
Minor cleanups
pxl-th ba1941a
Remove 'free_immediately' param
pxl-th 166254f
Limit caching allocator tests to AbstractGPUArray
pxl-th 01d6abc
Fix tests for 1.10
pxl-th 41bb06d
Runic formatting.
maleadt b2df4c5
Simplify
pxl-th 3ffca03
Cleanup
pxl-th 96af44c
Update src/host/alloc_cache.jl
pxl-th cf5fda2
Update src/host/alloc_cache.jl
pxl-th 36ced83
Update src/host/alloc_cache.jl
pxl-th c98bfa4
Update docs
pxl-th 63ffeae
Rename enable to cached
pxl-th 972b386
Rename.
maleadt 9960b52
Simplify back-end interface.
maleadt 7e32124
Apply suggestions from code review
maleadt e579824
Strip down cache from array type
pxl-th cdc2543
Add return stmt
pxl-th 8734a35
Improve docs.
maleadt a83a527
Remove duplicate gitignore.
maleadt File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| using ..GPUArrays | ||
|
|
||
| @static if VERSION < v"1.11" | ||
| using ScopedValues | ||
| else | ||
| using Base.ScopedValues | ||
| end | ||
|
|
||
| mutable struct AllocCache | ||
| lock::ReentrantLock | ||
| busy::Dict{UInt64, Vector{Any}} # hash(key) => GPUArray[] | ||
| free::Dict{UInt64, Vector{Any}} | ||
|
|
||
| function AllocCache() | ||
| cache = new( | ||
| ReentrantLock(), | ||
| Dict{UInt64, Vector{Any}}(), | ||
| Dict{UInt64, Vector{Any}}() | ||
| ) | ||
| return finalizer(unsafe_free!, cache) | ||
| end | ||
| end | ||
|
|
||
| function get_pool!(cache::AllocCache, pool::Symbol, uid::UInt64) | ||
| pool = getproperty(cache, pool) | ||
| uid_pool = get(pool, uid, nothing) | ||
| if uid_pool ≡ nothing | ||
| uid_pool = Base.@lock cache.lock pool[uid] = Any[] | ||
| end | ||
| return uid_pool | ||
| end | ||
|
|
||
| function cached_alloc(f, key) | ||
| cache = ALLOC_CACHE[] | ||
| if cache === nothing | ||
| return f() | ||
| end | ||
|
|
||
| x = nothing | ||
| uid = hash(key) | ||
|
|
||
| busy_pool = get_pool!(cache, :busy, uid) | ||
| free_pool = get_pool!(cache, :free, uid) | ||
| isempty(free_pool) && (x = f()) | ||
|
|
||
| while !isempty(free_pool) && x ≡ nothing | ||
| tmp = Base.@lock cache.lock pop!(free_pool) | ||
| # Array was manually freed via `unsafe_free!`. | ||
| GPUArrays.storage(tmp).freed && continue | ||
| x = tmp | ||
| end | ||
|
|
||
| x ≡ nothing && (x = f()) | ||
| Base.@lock cache.lock push!(busy_pool, x) | ||
| return x | ||
| end | ||
|
|
||
| function free_busy!(cache::AllocCache) | ||
| for uid in cache.busy.keys | ||
| busy_pool = get_pool!(cache, :busy, uid) | ||
| isempty(busy_pool) && continue | ||
|
|
||
| Base.@lock cache.lock begin | ||
| free_pool = get_pool!(cache, :free, uid) | ||
| append!(free_pool, busy_pool) | ||
| empty!(busy_pool) | ||
| end | ||
| end | ||
| return | ||
| end | ||
|
|
||
| function unsafe_free!(cache::AllocCache) | ||
| Base.@lock cache.lock begin | ||
| for (_, pool) in cache.busy | ||
| isempty(pool) || error( | ||
| "Invalidating allocations cache that's currently in use. " * | ||
| "Invalidating inside `@cached` is not allowed." | ||
| ) | ||
| end | ||
| for (_, pool) in cache.free | ||
| map(unsafe_free!, pool) | ||
| end | ||
| empty!(cache.free) | ||
| end | ||
| return | ||
| end | ||
|
|
||
| function Base.sizeof(cache::AllocCache) | ||
| sz = UInt64(0) | ||
| Base.@lock cache.lock begin | ||
| for kind in (cache.free, cache.busy), (_, pool) in kind | ||
| sz += sum(sizeof, pool; init = UInt64(0)) | ||
| end | ||
| end | ||
| return sz | ||
| end | ||
|
|
||
| function Base.show(io::IO, cache::AllocCache) | ||
| sz, n_free, n_busy = Base.@lock cache.lock begin | ||
| sz = sizeof(cache) | ||
| n_free = sum(p -> length(p[2]), cache.free; init = 0) | ||
| n_busy = sum(p -> length(p[2]), cache.busy; init = 0) | ||
| sz, n_free, n_busy | ||
| end | ||
| return print(io, "AllocCache(n_free=$n_free, n_busy=$n_busy, sizeof=$(Base.format_bytes(sz)))") | ||
| end | ||
|
|
||
| const ALLOC_CACHE = ScopedValue{Union{Nothing, AllocCache}}(nothing) | ||
|
|
||
| """ | ||
| @cached(cache, expr) | ||
|
|
||
| Evaluate `expr` using allocations cache `cache`. | ||
|
|
||
| When GPU memory is allocated during the execution of `expr`, `cache` will first be checked. | ||
| If no memory is available in the cache, a new allocation will be requested. | ||
|
|
||
| After the execution of `expr`, all allocations made under the scope of `@cached` will be | ||
| cached within `cache` for future use. This is useful to avoid relying on GC to free GPU | ||
| memory in time. | ||
|
|
||
| Once `cache` goes out scope, or when the user calls `unsafe_free!` on it, all cached | ||
| allocations will be freed. | ||
|
|
||
| # Example | ||
|
|
||
| In the following example, each iteration of the for-loop requires 8 GiB of GPU memory. | ||
| Without caching those allocations, significant pressure would be put on the GC, resulting | ||
| in high memory usage and latency. By using the allocator cache, the memory usage is stable: | ||
|
|
||
| ```julia | ||
| cache = GPUArrays.AllocCache() | ||
| for i in 1:1000 | ||
| GPUArrays.@cached cache begin | ||
| sin.(CUDA.rand(Float32, 1024^3)) | ||
| end | ||
| end | ||
|
|
||
| # optionally: free the memory now, instead of waiting for the GC to collect `cache` | ||
| GPUArrays.unsafe_free!(cache) | ||
| ``` | ||
|
|
||
| See [`@uncached`](@ref). | ||
| """ | ||
| macro cached(cache, expr) | ||
| return quote | ||
| res = @with $(esc(ALLOC_CACHE)) => $(esc(cache)) $(esc(expr)) | ||
| free_busy!($(esc(cache))) | ||
| res | ||
| end | ||
| end | ||
|
|
||
| """ | ||
| uncached(expr) | ||
|
|
||
| Evaluate expression `expr` without using the allocation. This is useful to call from within | ||
| `@cached` to avoid caching some allocations, e.g., because they can be returned out of the | ||
| `@cached` scope. | ||
| """ | ||
| macro uncached(expr) | ||
| return quote | ||
| @with $(esc(ALLOC_CACHE)) => nothing $(esc(expr)) | ||
| end | ||
| end | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| @testsuite "alloc cache" (AT, eltypes) -> begin | ||
| if AT <: AbstractGPUArray | ||
| cache = GPUArrays.AllocCache() | ||
|
|
||
| T, dims = Float32, (1, 2, 3) | ||
| GPUArrays.@cached cache begin | ||
| x1 = AT(zeros(T, dims)) | ||
| end | ||
| @test sizeof(cache) == sizeof(T) * prod(dims) | ||
| key = first(keys(cache.free)) | ||
| @test length(cache.free[key]) == 1 | ||
| @test length(cache.busy[key]) == 0 | ||
| @test x1 === cache.free[key][1] | ||
|
|
||
| # Second allocation hits cache. | ||
| GPUArrays.@cached cache begin | ||
| x2 = AT(zeros(T, dims)) | ||
| # Does not hit the cache. | ||
| GPUArrays.@uncached x_free = AT(zeros(T, dims)) | ||
| end | ||
| @test sizeof(cache) == sizeof(T) * prod(dims) | ||
| key = first(keys(cache.free)) | ||
| @test length(cache.free[key]) == 1 | ||
| @test length(cache.busy[key]) == 0 | ||
| @test x2 === cache.free[key][1] | ||
| @test x_free !== x2 | ||
|
|
||
| # Third allocation is of different shape - allocates. | ||
| dims = (2, 2) | ||
| GPUArrays.@cached cache begin | ||
| x3 = AT(zeros(T, dims)) | ||
| end | ||
| _keys = collect(keys(cache.free)) | ||
| key2 = _keys[findfirst(i -> i != key, _keys)] | ||
| @test length(cache.free[key]) == 1 | ||
| @test length(cache.free[key2]) == 1 | ||
| @test x3 === cache.free[key2][1] | ||
|
|
||
| # Freeing all memory held by cache. | ||
| GPUArrays.unsafe_free!(cache) | ||
| @test sizeof(cache) == 0 | ||
| end | ||
| end |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.