Skip to content

Commit d21d384

Browse files
authored
Perf optimization: Eliminate allocations from cache-hit case. (#10)
* Perf optimization: Eliminate allocations from cache-hit case. - Outline a nested function that was being allocated as a closure. - Convert a get! + do-closure to a get() with MISS default value, to prevent the side-effect `is_first_task` variable from being Boxed. * Add a test for no allocations on cache-hit! * Remove unnecessary type conversion
1 parent e6e4c3d commit d21d384

File tree

2 files changed

+46
-23
lines changed

2 files changed

+46
-23
lines changed

src/MultiThreadedCaches.jl

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -137,24 +137,6 @@ function Base.get!(func::Base.Callable, cache::MultiThreadedCache{K,V}, key) whe
137137
end
138138
end
139139
# If not, we need to check the base cache.
140-
# When we're done, call this function to set the result
141-
@inline function _store_result!(v::V; test_haskey::Bool)
142-
# Set the value into thread-local cache for the supplied key.
143-
# Note that we must perform two separate get() and setindex!() calls, for
144-
# concurrency-safety, in case the dict has been mutated by another task in between.
145-
# TODO: For 100% concurrency-safety, we maybe want to lock around the get() above
146-
# and the setindex!() here.. it's probably fine without it, but needs considering.
147-
# Currently this is relying on get() and setindex!() having no yields.
148-
Base.@lock tlock begin
149-
if test_haskey
150-
if !haskey(tcache, key)
151-
tcache[key] = v
152-
end
153-
else
154-
tcache[key] = v
155-
end
156-
end
157-
end
158140

159141
# Even though we're using Thread-local caches, we still need to lock during
160142
# construction to prevent multiple tasks redundantly constructing the same object,
@@ -170,9 +152,14 @@ function Base.get!(func::Base.Callable, cache::MultiThreadedCache{K,V}, key) whe
170152
if value_or_miss !== CACHE_MISS
171153
return value_or_miss::V
172154
end
173-
future = get!(cache.base_cache_futures, key) do
155+
future_or_miss = get(cache.base_cache_futures, key, CACHE_MISS)
156+
future = if future_or_miss === CACHE_MISS
174157
is_first_task = true
175-
Channel{V}(1)
158+
ch = Channel{V}(1)
159+
cache.base_cache_futures[key] = ch
160+
ch::Channel{V}
161+
else
162+
future_or_miss::Channel{V}
176163
end
177164
end
178165
if is_first_task
@@ -210,7 +197,7 @@ function Base.get!(func::Base.Callable, cache::MultiThreadedCache{K,V}, key) whe
210197
delete!(cache.base_cache_futures, key)
211198
end
212199
# Store the result in this thread-local dictionary.
213-
_store_result!(v, test_haskey=false)
200+
_store_result!(tcache, tlock, key, v, test_haskey=false)
214201
# Return v to any other Tasks that were blocking on this key.
215202
put!(future, v)
216203
return v
@@ -220,11 +207,29 @@ function Base.get!(func::Base.Callable, cache::MultiThreadedCache{K,V}, key) whe
220207
v = fetch(future)
221208
# Store the result in our original thread-local cache (if another Task hasn't) set
222209
# it already.
223-
_store_result!(v, test_haskey=true)
210+
_store_result!(tcache, tlock, key, v, test_haskey=true)
224211
return v
225212
end
226213
end
227214

215+
# Set the value into thread-local cache for the supplied key.
216+
@inline function _store_result!(tcache, tlock, key, v; test_haskey::Bool)
217+
# Note that we must perform two separate get() and setindex!() calls, for
218+
# concurrency-safety, in case the dict has been mutated by another task in between.
219+
# TODO: For 100% concurrency-safety, we maybe want to lock around the get() above
220+
# and the setindex!() here.. it's probably fine without it, but needs considering.
221+
# Currently this is relying on get() and setindex!() having no yields.
222+
Base.@lock tlock begin
223+
if test_haskey
224+
if !haskey(tcache, key)
225+
tcache[key] = v
226+
end
227+
else
228+
tcache[key] = v
229+
end
230+
end
231+
end
232+
228233
function Base.show(io::IO, cache::MultiThreadedCache{K,V}) where {K,V}
229234
# Contention optimization: don't hold the lock while printing, since that could block
230235
# for an arbitrarily long time. Instead, print the data to an intermediate buffer first.

test/MultiThreadedCaches.jl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ end
120120

121121
# Test that all Tasks saw the same exception thrown.
122122
close(exceptions)
123-
@test all_equal(collect(exceptions))
123+
output = collect(exceptions)
124+
@test all_equal(output)
124125

125126
# Test that after throwing an exception during a get! function, the cache is still
126127
# robust and working as intended.
@@ -142,6 +143,23 @@ end
142143
display(cache)
143144
end
144145

146+
populate!(c,x) = get!(c, x) do
147+
UInt64(2)
148+
end
149+
150+
@testset "no allocations for cache-hit" begin
151+
cache = MultiThreadedCache{Int64, Int64}()
152+
init_cache!(cache)
153+
154+
# Populate the cache
155+
populate!(cache, 10)
156+
157+
@test @allocated(populate!(cache, 10)) == 0
158+
@test @allocated(populate!(cache, 10)) == 0
159+
160+
populate!(cache, 11)
161+
@test @allocated(populate!(cache, 11)) == 0
162+
end
145163

146164

147165

0 commit comments

Comments
 (0)