Skip to content

Commit ab8813e

Browse files
authored
Add option to store LRU cache as mmapped arrays (#203)
* add option to store LRU cache as mmapped arrays * Relax mmap compat * add mmap option to cache function
1 parent 2a48da7 commit ab8813e

File tree

3 files changed

+41
-28
lines changed

3 files changed

+41
-28
lines changed

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,17 @@ version = "0.4.6"
55

66
[deps]
77
LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
8+
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
89
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
910

1011
[compat]
1112
Aqua = "0.8"
1213
LRUCache = "1"
13-
julia = "1.9"
14+
Mmap = "1"
1415
OffsetArrays = "1"
1516
Statistics = "1.9"
1617
Test = "1.9"
18+
julia = "1.9"
1719

1820
[extras]
1921
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"

src/cached.jl

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,27 @@
1-
1+
import Mmap
22
# Force disk any abstractarray into a different chunking pattern.
33
# This is useful in `zip` and other operations that can iterate
44
# over multiple arrays with different patterns.
55

66
"""
77
CachedDiskArray <: AbstractDiskArray
88
9-
CachedDiskArray(A::AbstractArray; maxsize=1000)
9+
CachedDiskArray(A::AbstractArray; maxsize=1000, mmap=false)
1010
1111
Wrap some disk array `A` with a caching mechanism that will
1212
keep chunks up to a total of `maxsize` megabytes, dropping
13-
the least used chunks when `maxsize` is exceeded.
13+
the least used chunks when `maxsize` is exceeded. If `mmap` is
14+
set to `true`, cached chunks will not be kept in RAM but Mmapped
15+
to temproray files.
1416
"""
1517
struct CachedDiskArray{T,N,A<:AbstractArray{T,N},C} <: ChunkTiledDiskArray{T,N}
1618
parent::A
1719
cache::C
20+
mmap::Bool
1821
end
19-
function CachedDiskArray(A::AbstractArray{T,N}; maxsize=1000) where {T,N}
22+
function CachedDiskArray(A::AbstractArray{T,N}; maxsize=1000, mmap=false) where {T,N}
2023
by(x) = sizeof(x) ÷ 1_000_000 # In Megabytes
21-
CachedDiskArray(A, LRU{ChunkIndex{N,OffsetChunks},OffsetArray{T,N,Array{T,N}}}(; by, maxsize))
24+
CachedDiskArray(A, LRU{ChunkIndex{N,OffsetChunks},OffsetArray{T,N,Array{T,N}}}(; by, maxsize),mmap)
2225
end
2326

2427
Base.parent(A::CachedDiskArray) = A.parent
@@ -32,6 +35,11 @@ function getchunk(A::CachedDiskArray, i::ChunkIndex)
3235
get!(A.cache, i) do
3336
inds = eachchunk(A)[i.I]
3437
chunk = parent(A)[inds...]
38+
if A.mmap
39+
mmappedarray = Mmap.mmap(tempname(),Array{eltype(chunk),ndims(chunk)},size(chunk),shared=false)
40+
copyto!(mmappedarray, chunk)
41+
chunk = mmappedarray
42+
end
3543
wrapchunk(chunk, inds)
3644
end
3745
end
@@ -40,11 +48,11 @@ Base.getindex(A::CachedDiskArray, i::ChunkIndex{N,OneBasedChunks}) where {N} = p
4048

4149

4250
"""
43-
cache(A::AbstractArray; maxsize=1000)
51+
cache(A::AbstractArray; maxsize=1000, mmap=false)
4452
4553
Wrap internal disk arrays with `CacheDiskArray`.
4654
4755
This function is intended to be extended by package that want to
4856
re-wrap the disk array afterwards, such as YAXArrays.jl or Rasters.jl.
4957
"""
50-
cache(A::AbstractArray; maxsize=1000) = CachedDiskArray(A; maxsize)
58+
cache(A::AbstractArray; maxsize=1000, mmap=false) = CachedDiskArray(A; maxsize, mmap)

test/runtests.jl

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -909,26 +909,29 @@ end
909909
end
910910

911911
@testset "Cached arrays" begin
912-
M = (1:300) * (1:1200)'
913-
A = cat(M, M, M, M; dims=3)
914-
ch = ChunkedDiskArray(A, (128, 128, 2))
915-
ca = DiskArrays.CachedDiskArray(ch; maxsize=5)
916-
# Read the original
917-
@test sum(ca) == sum(ca)
918-
length(ca.cache)
919-
920-
ca = DiskArrays.cache(ch; maxsize=5)
921-
@test sum(ca) == sum(ca)
922-
923-
@test ca[:, :, 1] == A[:, :, 1]
924-
@test ca[:, :, 2] == A[:, :, 2]
925-
@test ca[:, :, 2] == A[:, :, 3]
926-
@test ca[:, :, 2] == A[:, :, 4]
927-
@test ca[:, 1, 1] == ch[:, 1, 1]
928-
@test ca[:, 2, 1] == ch[:, 2, 1]
929-
@test ca[:, 3, 1] == ch[:, 3, 1]
930-
@test ca[:, 200, 1] == ch[:, 200, 1]
931-
@test ca[200, :, 1] == ch[200, :, 1]
912+
913+
for mm in (false, true)
914+
M = (1:300) * (1:1200)'
915+
A = cat(M, M, M, M; dims=3)
916+
ch = ChunkedDiskArray(A, (128, 128, 2))
917+
ca = DiskArrays.CachedDiskArray(ch; maxsize=5, mmap=mm)
918+
# Read the original
919+
@test sum(ca) == sum(ca)
920+
length(ca.cache)
921+
922+
ca = DiskArrays.cache(ch; maxsize=5)
923+
@test sum(ca) == sum(ca)
924+
925+
@test ca[:, :, 1] == A[:, :, 1]
926+
@test ca[:, :, 2] == A[:, :, 2]
927+
@test ca[:, :, 2] == A[:, :, 3]
928+
@test ca[:, :, 2] == A[:, :, 4]
929+
@test ca[:, 1, 1] == ch[:, 1, 1]
930+
@test ca[:, 2, 1] == ch[:, 2, 1]
931+
@test ca[:, 3, 1] == ch[:, 3, 1]
932+
@test ca[:, 200, 1] == ch[:, 200, 1]
933+
@test ca[200, :, 1] == ch[200, :, 1]
934+
end
932935
end
933936

934937
@testset "Range subset identification" begin

0 commit comments

Comments
 (0)