Skip to content

Commit 77c310c

Browse files
felixcremerFelix Cremerasinghvi17
authored
Add mockchunks function (#237)
* Add rechunk function This function could then be overloaded by wrapping packges. * Improve docstring of rechunk * Rename RechunkedDiskArray to MockChunkedDiskArray and rechunk to mockchunk * Update src/rechunk.jl Co-authored-by: Anshul Singhvi <[email protected]> * Update src/rechunk.jl Co-authored-by: Anshul Singhvi <[email protected]> * Update src/rechunk.jl Co-authored-by: Anshul Singhvi <[email protected]> * Rename rechunk.jl to mockchunks.jl * Include the correct file * Add deprecation for RechunkedDiskArray --------- Co-authored-by: Felix Cremer <[email protected]> Co-authored-by: Anshul Singhvi <[email protected]>
1 parent cb1c522 commit 77c310c

File tree

4 files changed

+57
-40
lines changed

4 files changed

+57
-40
lines changed

src/DiskArrays.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ include("mapreduce.jl")
2929
include("permute.jl")
3030
include("reshape.jl")
3131
include("subarray.jl")
32-
include("rechunk.jl")
32+
include("mockchunks.jl")
3333
include("cat.jl")
3434
include("generator.jl")
3535
include("zip.jl")

src/mockchunks.jl

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
MockChunkedDiskArray <: AbstractDiskArray
3+
4+
MockChunkedDiskArray(parent::AbstractArray, chunks::GridChunks)
5+
6+
A disk array that pretends to have a specific chunk pattern,
7+
regardless of the true chunk pattern of the parent array.
8+
9+
This is useful in `zip` and other operations that can iterate
10+
over multiple arrays with different patterns.
11+
"""
12+
struct MockChunkedDiskArray{T,N,A<:AbstractArray{T,N},C<:GridChunks} <: AbstractDiskArray{T,N}
13+
parent::A
14+
chunks::C
15+
end
16+
17+
"""
18+
mockchunks(data::AbstractArray,chunks)
19+
20+
Change the chunk pattern of the underlying DiskArray according to `chunks`.
21+
22+
Note that this will not change the chunking of the underlying data itself, it will just make the data
23+
"look" like it had a different chunking. If you need a persistent on-disk representation of this chunking, save the resulting array.
24+
25+
The chunks argument can take one of the following forms:
26+
27+
- a [`DiskArrays.GridChunks`](@ref) object
28+
- a tuple specifying the chunk size along each dimension, like `(10, 10, 1)` for a 3-D array
29+
"""
30+
mockchunks(data::AbstractDiskArray, chunks::GridChunks) = MockChunkedDiskArray(data, chunks)
31+
32+
Base.parent(A::MockChunkedDiskArray) = A.parent
33+
Base.size(A::MockChunkedDiskArray) = size(parent(A))
34+
35+
# DiskArrays interface
36+
37+
haschunks(::MockChunkedDiskArray) = Chunked()
38+
eachchunk(A::MockChunkedDiskArray) = A.chunks
39+
40+
# These could be more efficient with memory in some cases, but this is simple
41+
readblock!(A::MockChunkedDiskArray, data, I...) = _readblock_mockchunked(A, data, I...)
42+
readblock!(A::MockChunkedDiskArray, data, I::AbstractVector...) =
43+
_readblock_mockchunked(A, data, I...)
44+
writeblock!(A::MockChunkedDiskArray, data, I...) = writeblock!(parent(A), data, I...)
45+
46+
function _readblock_mockchunked(A, data, I...)
47+
if haschunks(parent(A)) isa Chunked
48+
readblock!(parent(A), data, I...)
49+
else
50+
# Handle non disk arrays that may be chunked for e.g. chunked `zip`
51+
copyto!(data, view(parent(A), I...))
52+
end
53+
end
54+
55+
Base.@deprecate_binding RechunkedDiskArray MockChunkedDiskArray

src/rechunk.jl

Lines changed: 0 additions & 38 deletions
This file was deleted.

src/zip.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function DiskZip(As::AbstractArray...)
3030
return DiskZip(As)
3131
else
3232
rechunked = map(As) do A
33-
RechunkedDiskArray(A, chunks)
33+
MockChunkedDiskArray(A, chunks)
3434
end
3535
return DiskZip(rechunked)
3636
end

0 commit comments

Comments
 (0)