Skip to content

Commit dd2d770

Browse files
authored
add PaddedDiskArray and pad (#219)
* add PaddedDiskArray and pad * fix pad * test and bugfix _pad_offset * bugfixes * call getindex not readblock
1 parent 0892642 commit dd2d770

File tree

4 files changed

+157
-1
lines changed

4 files changed

+157
-1
lines changed

src/DiskArrays.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ include("generator.jl")
3030
include("zip.jl")
3131
include("show.jl")
3232
include("cached.jl")
33+
include("pad.jl")
3334

3435
# The all-in-one macro
3536

src/chunks.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct RegularChunks <: ChunkVector
6161
arraysize::Int
6262
function RegularChunks(chunksize::Integer, offset::Integer, arraysize::Integer)
6363
chunksize > 0 || throw(ArgumentError("Chunk sizes must be strictly positive"))
64-
-1 < offset < chunksize || throw(ArgumentError("Offsets must be positive and smaller than the chunk size"))
64+
-1 < offset < chunksize || throw(ArgumentError("Offsets must be positive and smaller than the chunk size, got offset: $offset and chunk size: $chunksize"))
6565
arraysize >= 0 || throw(ArgumentError("Negative axis lengths are not allowed"))
6666
new(Int(chunksize), Int(offset), Int(arraysize))
6767
end

src/pad.jl

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""
2+
PaddedDiskArray <: AbstractDiskArray
3+
4+
PaddedDiskArray(A, padding; fill=zero(eltype(A)))
5+
6+
An `AbstractDiskArray` that adds padding to the edges of the parent array.
7+
This can help changing chunk offsets or padding a larger than memory array
8+
before windowing operations.
9+
10+
# Arguments
11+
12+
- `A`: The parent disk array.
13+
- `padding`: A tuple of `Int` lower and upper padding tuples, one for each dimension.
14+
15+
# Keywords
16+
17+
- `fill=zero(eltype(A))`: The value to pad the array with.
18+
"""
19+
struct PaddedDiskArray{T,N,A<:AbstractArray{T,N},C,F<:T} <: AbstractDiskArray{T,N}
20+
parent::A
21+
padding::NTuple{N,Tuple{Int,Int}}
22+
fill::F
23+
chunks::C
24+
end
25+
function PaddedDiskArray(A::AbstractArray{T,N}, padding::NTuple{N,Tuple{Int,Int}};
26+
fill=zero(eltype(A)),
27+
) where {T,N}
28+
map(padding) do (l, u)
29+
(l < 0 || u < 0) && throw(ArgumentError("Padding must be non-negative"))
30+
end
31+
chunks = GridChunks(map(_pad_offset, eachchunk(A).chunks, padding))
32+
PaddedDiskArray(A, padding, fill, chunks)
33+
end
34+
35+
function _pad_offset(c::RegularChunks, (low, high))
36+
chunksize = c.chunksize
37+
# Handle lower padding larger than chunksize
38+
offset = if low == 0
39+
c.offset
40+
else
41+
c.offset - low + chunksize * (div(low - 1, chunksize) + 1)
42+
end
43+
size = c.arraysize + low + high
44+
return RegularChunks(chunksize, offset, size)
45+
end
46+
function _pad_offset(c::IrregularChunks, (low, high))
47+
nlowchunks = Int(low > 0)
48+
nhighchunks = Int(high > 0)
49+
offsets = Vector{Int}(undef, length(c.offsets) + nlowchunks + nhighchunks)
50+
# First offset is always zero
51+
offsets[begin] = 0
52+
# Increase original offsets by lower padding
53+
for (i, o) in enumerate(c.offsets)
54+
offsets[i + nlowchunks] = o + low
55+
end
56+
# Add offset for start of upper padding
57+
if nhighchunks > 0
58+
offsets[end] = offsets[end-1] + high
59+
end
60+
return IrregularChunks(offsets)
61+
end
62+
63+
Base.parent(A::PaddedDiskArray) = A.parent
64+
function Base.size(A::PaddedDiskArray)
65+
map(size(parent(A)), A.padding) do s, (low, high)
66+
s + low + high
67+
end
68+
end
69+
70+
haschunks(A::PaddedDiskArray) = haschunks(parent(A))
71+
eachchunk(A::PaddedDiskArray) = A.chunks
72+
73+
readblock!(A::PaddedDiskArray, data, I::AbstractRange...) =
74+
_readblock_padded(A, data, I...)
75+
writeblock!(A::PaddedDiskArray, data, I...) =
76+
throw(ArgumentError("Cannot write to a PaddedDiskArray"))
77+
78+
function _readblock_padded(A, data, I::AbstractRange...)
79+
data .= A.fill
80+
Ipadded = map(I, A.padding) do i, (low, high)
81+
i .- low
82+
end
83+
fs = map(axes(parent(A)), Ipadded) do a, ip
84+
searchsortedfirst(ip, first(a))
85+
end
86+
ls = map(axes(parent(A)), Ipadded) do a, ip
87+
searchsortedlast(ip, last(a))
88+
end
89+
return if all(map(<=, fs, ls))
90+
Idata = map(:, fs, ls)
91+
Iparent = map(getindex, Ipadded, Idata)
92+
data[Idata...] .= parent(A)[Iparent...]
93+
else
94+
# No overlap, don't read
95+
data
96+
end
97+
end
98+
99+
"""
100+
101+
pad(A, padding; fill=zero(eltype(A)))
102+
103+
Pad any `AbstractArray` with fill values, updating chunk patterns.
104+
105+
# Arguments
106+
107+
- `A`: The parent disk array.
108+
- `padding`: A tuple of `Int` lower and upper padding tuples, one for each dimension.
109+
110+
# Keywords
111+
112+
- `fill=zero(eltype(A))`: The value to pad the array with.
113+
"""
114+
pad(A::AbstractArray{<:Any,N}, padding::NTuple{N,Tuple{Int,Int}}; kw...) where N =
115+
PaddedDiskArray(A, padding; kw...)

test/runtests.jl

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,46 @@ end
973973
end
974974
end
975975

976+
@testset "Padded disk arrays" begin
977+
M = (1:100) * (1:120)'
978+
A = cat(M, 2M, 3M, 4M; dims=3)
979+
ch = ChunkedDiskArray(A, (128, 128, 2))
980+
pa = DiskArrays.pad(ch, ((10, 20), (30, 40), (1, 2)); fill=999)
981+
@test size(pa) == (130, 190, 7)
982+
# All outside
983+
@test all(==(999), pa[1:10, 1:30, 1:1])
984+
# All inside
985+
@test pa[11:20, 31:40, 3:4] == ch[1:10, 1:10, 2:3]
986+
@testset "Overlapping lower" begin
987+
regions = pa[10:20, 30:40, 1:2]
988+
testdata = copy(regions) .= 999
989+
testdata[2:11, 2:11, 2:2] .= ch[1:10, 1:10, 1:1]
990+
@test regions == testdata
991+
end
992+
@testset "Overlapping upper" begin
993+
regions = pa[101:120, 141:160, 5:7]
994+
testdata = copy(regions) .= 999
995+
testdata[1:10, 1:10, 1:1] .= ch[91:100, 111:120, 4:4]
996+
@test regions == testdata
997+
end
998+
@testset "offsets larger than chunks" begin
999+
pa1 = DiskArrays.pad(ch, ((201, 147), (390, 33), (1, 2)); fill=1)
1000+
@test sum(pa1) == prod(size(pa1)) + sum(ch) - prod(size(ch))
1001+
end
1002+
@testset "_pad_offset" begin
1003+
c1 = DiskArrays.RegularChunks(10, 0, 100)
1004+
@test DiskArrays._pad_offset(c1, (5, 2)) == DiskArrays.RegularChunks(10, 5, 107)
1005+
@test DiskArrays._pad_offset(c1, (30, 2)) == DiskArrays.RegularChunks(10, 0, 132)
1006+
@test DiskArrays._pad_offset(c1, (10, 10)) == DiskArrays.RegularChunks(10, 0, 120)
1007+
@test DiskArrays._pad_offset(c1, (0, 0)) == c1
1008+
1009+
c2 = DiskArrays.IrregularChunks(chunksizes = [10, 10, 20, 30, 40])
1010+
#The following test would assume padding ends up in a separate chunk:
1011+
@test DiskArrays._pad_offset(c2, (5, 5)) == DiskArrays.IrregularChunks(chunksizes = [5, 10, 10, 20, 30, 40, 5])
1012+
@test DiskArrays._pad_offset(c2, (0, 0)) == c2
1013+
end
1014+
end
1015+
9761016
@testset "Range subset identification" begin
9771017
inds = [1, 2, 2, 3, 5, 6, 7, 10, 10]
9781018
readranges, offsets = DiskArrays.find_subranges_sorted(inds, false)

0 commit comments

Comments
 (0)