Skip to content

Commit 5da88b3

Browse files
authored
Add ChunkTiledDiskArray abstract type (#179)
* expose base type for tiled diskarrays * change supertype of cacheddiskarray * bump version
1 parent 7011da5 commit 5da88b3

File tree

4 files changed

+38
-30
lines changed

4 files changed

+38
-30
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "DiskArrays"
22
uuid = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
33
authors = ["Fabian Gans <[email protected]>"]
4-
version = "0.4.4"
4+
version = "0.4.5"
55

66
[deps]
77
LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"

src/cached.jl

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Wrap some disk array `A` with a caching mechanism that will
1212
keep chunks up to a total of `maxsize` megabytes, dropping
1313
the least used chunks when `maxsize` is exceeded.
1414
"""
15-
struct CachedDiskArray{T,N,A<:AbstractArray{T,N},C} <: AbstractDiskArray{T,N}
15+
struct CachedDiskArray{T,N,A<:AbstractArray{T,N},C} <: ChunkTiledDiskArray{T,N}
1616
parent::A
1717
cache::C
1818
end
@@ -23,33 +23,21 @@ end
2323

2424
Base.parent(A::CachedDiskArray) = A.parent
2525
Base.size(A::CachedDiskArray) = size(parent(A))
26-
# These could be more efficient with memory in some cases, but this is simple
27-
readblock!(A::CachedDiskArray, data, I...) = _readblock_cached!(A, data, I...)
28-
readblock!(A::CachedDiskArray, data, I::AbstractVector...) = _readblock_cached!(A, data, I...)
2926
# TODO we need to invalidate caches when we write
3027
# writeblock!(A::CachedDiskArray, data, I...) = writeblock!(parent(A), data, I...)
3128

3229
haschunks(A::CachedDiskArray) = haschunks(parent(A))
3330
eachchunk(A::CachedDiskArray) = eachchunk(parent(A))
34-
35-
function _readblock_cached!(A::CachedDiskArray{T,N}, data, I...) where {T,N}
36-
chunks = eachchunk(A)
37-
chunk_inds = findchunk.(chunks.chunks, I)
38-
data_offset = OffsetArray(data,map(i->first(i)-1,I)...)
39-
foreach(CartesianIndices(chunk_inds)) do ci
40-
chunkindex = ChunkIndex(ci,offset=true)
41-
chunk = get!(A.cache, chunkindex) do
42-
res = parent(A)[chunkindex]
43-
res
44-
end
45-
inner_indices = map(axes(chunk),axes(data_offset)) do ax1, ax2
46-
max(first(ax1),first(ax2)):min(last(ax1),last(ax2))
47-
end
48-
for ii in CartesianIndices(inner_indices)
49-
data_offset[ii] = chunk[ii]
50-
end
31+
function getchunk(A::CachedDiskArray, i::ChunkIndex)
32+
get!(A.cache, i) do
33+
inds = eachchunk(A)[i.I]
34+
chunk = parent(A)[inds...]
35+
wrapchunk(chunk, inds)
5136
end
5237
end
38+
Base.getindex(A::CachedDiskArray, i::ChunkIndex{N,OffsetChunks}) where {N} = getchunk(A, i)
39+
Base.getindex(A::CachedDiskArray, i::ChunkIndex{N,OneBasedChunks}) where {N} = parent(getchunk(A, i))
40+
5341

5442
"""
5543
cache(A::AbstractArray; maxsize=1000)

src/chunks.jl

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,7 @@ haschunks(x) = Unchunked()
272272

273273
struct OffsetChunks end
274274
struct OneBasedChunks end
275-
wrapchunk(::OneBasedChunks, x, _) = x
276-
wrapchunk(::OffsetChunks, x, inds) = OffsetArray(x, inds...)
275+
wrapchunk(x, inds) = OffsetArray(x, inds...)
277276

278277
"""
279278
ChunkIndex{N}
@@ -288,6 +287,9 @@ end
288287
function ChunkIndex(i::CartesianIndex; offset=false)
289288
return ChunkIndex(i, offset ? OffsetChunks() : OneBasedChunks())
290289
end
290+
"Removes the offset from a ChunkIndex"
291+
nooffset(i::ChunkIndex) = ChunkIndex(i.I, OneBasedChunks())
292+
291293
ChunkIndex(i::Integer...; offset=false) = ChunkIndex(CartesianIndex(i); offset)
292294

293295
"""
@@ -336,6 +338,26 @@ function estimate_chunksize(s, si)
336338
return floor(Int, default_chunk_size[] * 1e6 / si / sbefore)
337339
end
338340
end
341+
cs = clamp.(cs, 1, s)
339342
return GridChunks(s, cs)
340343
end
341344

345+
346+
347+
abstract type ChunkTiledDiskArray{T,N} <: AbstractDiskArray{T,N} end
348+
Base.size(a::ChunkTiledDiskArray) = arraysize_from_chunksize.(eachchunk(a).chunks)
349+
function DiskArrays.readblock!(A::ChunkTiledDiskArray{T,N}, data, I...) where {T,N}
350+
chunks = eachchunk(A)
351+
chunk_inds = DiskArrays.findchunk.(chunks.chunks, I)
352+
data_offset = OffsetArray(data, map(i -> first(i) - 1, I)...)
353+
foreach(CartesianIndices(chunk_inds)) do ci
354+
chunkindex = DiskArrays.ChunkIndex(ci, offset=true)
355+
chunk = A[chunkindex]
356+
inner_indices = map(axes(chunk), axes(data_offset)) do ax1, ax2
357+
max(first(ax1), first(ax2)):min(last(ax1), last(ax2))
358+
end
359+
for ii in CartesianIndices(inner_indices)
360+
data_offset[ii] = chunk[ii]
361+
end
362+
end
363+
end

src/diskarray.jl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,10 @@ macro implement_getindex(t)
346346
t = esc(t)
347347
quote
348348
Base.getindex(a::$t, i...) = getindex_disk(a, i...)
349-
350-
function Base.getindex(a::$t, i::ChunkIndex)
351-
cs = eachchunk(a)
352-
inds = cs[i.I]
353-
return wrapchunk(i.chunktype, a[inds...], inds)
354-
end
349+
@inline Base.getindex(a::$t, i::ChunkIndex{<:Any,OneBasedChunks}) =
350+
a[eachchunk(a)[i.I]...]
351+
@inline Base.getindex(a::$t, i::ChunkIndex{<:Any,OffsetChunks}) =
352+
wrapchunk(a[nooffset(i)], eachchunk(a)[i.I])
355353
function DiskArrays.ChunkIndices(a::$t; offset=false)
356354
return ChunkIndices(
357355
map(s->1:s,size(eachchunk(a))), offset ? OffsetChunks() : OneBasedChunks()

0 commit comments

Comments
 (0)