Skip to content

Commit 240b404

Browse files
committed
last fixes
1 parent c0aac92 commit 240b404

File tree

3 files changed

+96
-22
lines changed

3 files changed

+96
-22
lines changed

src/cat.jl

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,30 +18,54 @@ struct ConcatDiskArray{T,N,P,C,HC} <: AbstractDiskArray{T,N}
1818
chunks::C
1919
haschunks::HC
2020
end
21-
function ConcatDiskArray(arrays::AbstractArray{<:AbstractArray{<:Any,N},M}) where {N,M}
22-
T = mapreduce(eltype, promote_type, init=eltype(first(arrays)), arrays)
2321

22+
function ConcatDiskArray(arrays::AbstractArray{Union{<:AbstractArray,Missing}})
23+
et = Base.nonmissingtype(eltype(arrays))
24+
T = Union{Missing,eltype(et)}
25+
N = ndims(arrays)
26+
M = ndims(et)
27+
_ConcatDiskArray(arrays, T, Val(N), Val(M))
28+
end
29+
function ConcatDiskArray(arrays::AbstractArray{<:AbstractArray})
30+
T = eltype(eltype(arrays))
31+
N = ndims(arrays)
32+
M = ndims(eltype(arrays))
33+
_ConcatDiskArray(arrays, T, Val(N), Val(M))
34+
end
35+
function ConcatDiskArray(arrays::AbstractArray)
36+
N = ndims(arrays)
37+
M, T = foldl(arrays, init=(-1, Union{})) do (M, T), a
38+
if ismissing(a)
39+
(M, promote_type(Missing, T))
40+
else
41+
M == -1 || ndims(a) == M || throw(ArgumentError("All arrays to concatenate must have equal ndims"))
42+
(ndims(a), promote_type(eltype(a), T))
43+
end
44+
end
45+
_ConcatDiskArray(arrays, T, Val(N), Val(M))
46+
end
47+
48+
49+
function _ConcatDiskArray(arrays, T, ::Val{N}, ::Val{M}) where {N,M}
2450
if N > M
25-
newshape = extenddims(size(arrays), size(first(arrays)), 1)
51+
newshape = extenddims(size(arrays), ntuple(_ -> 1, N), 1)
2652
arrays1 = reshape(arrays, newshape)
2753
D = N
2854
else
2955
arrays1 = arrays
3056
D = M
3157
end
58+
_ConcatDiskArray(arrays1::AbstractArray, T, Val(D))
59+
end
60+
function _ConcatDiskArray(arrays1::AbstractArray, T, ::Val{D}) where {D}
3261
startinds, sizes = arraysize_and_startinds(arrays1)
3362

3463
chunks = concat_chunksize(arrays1)
3564
hc = Chunked(batchstrategy(chunks))
3665

3766
return ConcatDiskArray{T,D,typeof(arrays1),typeof(chunks),typeof(hc)}(arrays1, startinds, sizes, chunks, hc)
3867
end
39-
function ConcatDiskArray(arrays::AbstractArray)
40-
# Validate array eltype and dimensionality
41-
all(a -> ndims(a) == ndims(first(arrays)), arrays) ||
42-
error("Arrays don't have the same dimensions")
43-
return error("Should not be reached")
44-
end
68+
4569
extenddims(a::Tuple{Vararg{Any,N}}, b::Tuple{Vararg{Any,M}}, fillval) where {N,M} = extenddims((a..., fillval), b, fillval)
4670
extenddims(a::Tuple{Vararg{Any,N}}, _::Tuple{Vararg{Any,N}}, _) where {N} = a
4771

@@ -51,6 +75,7 @@ function arraysize_and_startinds(arrays1)
5175
sizes = map(i -> zeros(Int, i), size(arrays1))
5276
for i in CartesianIndices(arrays1)
5377
ai = arrays1[i]
78+
ismissing(ai) && continue
5479
sizecur = extenddims(size(ai), size(arrays1), 1)
5580
foreach(sizecur, i.I, sizes) do si, ind, sizeall
5681
if sizeall[ind] == 0
@@ -62,6 +87,9 @@ function arraysize_and_startinds(arrays1)
6287
end
6388
end
6489
r = map(sizes) do sizeall
90+
#Replace missing sizes with size 1
91+
replace!(sizeall, 0 => 1)
92+
#Add starting 1
6593
pushfirst!(sizeall, 1)
6694
for i in 2:length(sizeall)
6795
sizeall[i] = sizeall[i-1] + sizeall[i]
@@ -80,13 +108,24 @@ function readblock!(a::ConcatDiskArray, aout, inds::AbstractUnitRange...)
80108
# Find affected blocks and indices in blocks
81109
_concat_diskarray_block_io(a, inds...) do outer_range, array_range, I
82110
vout = view(aout, outer_range...)
83-
readblock!(a.parents[I], vout, array_range...)
111+
if ismissing(I)
112+
vout .= missing
113+
else
114+
readblock!(a.parents[I], vout, array_range...)
115+
end
84116
end
85117
end
86118
function writeblock!(a::ConcatDiskArray, aout, inds::AbstractUnitRange...)
87119
_concat_diskarray_block_io(a, inds...) do outer_range, array_range, I
88120
data = view(aout, outer_range...)
89-
writeblock!(a.parents[I], data, array_range...)
121+
if ismissing(I)
122+
if !all(ismissing, data)
123+
@warn "Trying to write data to missing array tile, skipping write"
124+
end
125+
return
126+
else
127+
writeblock!(a.parents[I], data, array_range...)
128+
end
90129
end
91130
end
92131

@@ -101,17 +140,24 @@ function _concat_diskarray_block_io(f, a::ConcatDiskArray, inds...)
101140
end
102141
map(CartesianIndices(blockinds)) do cI
103142
myar = a.parents[cI]
104-
mysize = extenddims(size(myar), cI.I, 1)
143+
size_inferred = map(a.startinds, size(a), cI.I) do si, sa, ii
144+
ii == length(si) ? sa - si[ii] + 1 : si[ii+1] - si[ii]
145+
end
146+
mysize = extenddims(size_inferred, cI.I, 1)
105147
array_range = map(cI.I, a.startinds, mysize, inds) do ii, si, ms, indstoread
106148
max(first(indstoread) - si[ii] + 1, 1):min(last(indstoread) - si[ii] + 1, ms)
107149
end
108150
outer_range = map(cI.I, a.startinds, array_range, inds) do ii, si, ar, indstoread
109151
(first(ar)+si[ii]-first(indstoread)):(last(ar)+si[ii]-first(indstoread))
110152
end
111153
#Shorten array range to shape of actual array
112-
array_range = map((i, j) -> j, size(myar), array_range)
154+
array_range = map((i, j) -> j, size_inferred, array_range)
113155
outer_range = fix_outerrangeshape(outer_range, array_range)
114-
f(outer_range, array_range, cI)
156+
if ismissing(myar)
157+
f(outer_range, array_range, missing)
158+
else
159+
f(outer_range, array_range, cI)
160+
end
115161
end
116162
end
117163
fix_outerrangeshape(outer_range, array_range) = fix_outerrangeshape((), outer_range, array_range)
@@ -126,6 +172,7 @@ function concat_chunksize(parents)
126172
newchunks = map(s -> Vector{Union{RegularChunks,IrregularChunks}}(undef, s), size(parents))
127173
for i in CartesianIndices(parents)
128174
array = parents[i]
175+
ismissing(array) && continue
129176
chunks = eachchunk(array)
130177
foreach(chunks.chunks, i.I, newchunks) do c, ind, newc
131178
if !isassigned(newc, ind)

src/diskarray.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ function readblock!(a::AbstractArray, aout, r...)
2828
if isdisk(a)
2929
@warn "Using fallback readblock! for array $(typeof(a)). This should not happen but there should be a custom implementation."
3030
end
31-
copyto!(aout, CartesianIndices(aout), a, CartesianIndices(r))
31+
aout .= view(a, CartesianIndices(r))
3232
end
3333

3434
"""

test/runtests.jl

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,33 @@ end
491491
@test slic isa Vector{Float64}
492492
@test slic == Float64[1, 2, 3, 4, 1, 2, 3, 4]
493493
end
494+
495+
@testset "Concat DiskArray with missing tiles" begin
496+
a = zeros(Int, 3, 4)
497+
b = ones(Int, 2, 4)
498+
c = fill(2, 3, 5)
499+
d = fill(missing, 2, 5)
500+
aconc = DiskArrays.ConcatDiskArray(reshape([a, b, c, missing], 2, 2))
501+
abase = [a c; b d]
502+
@test all(isequal.(aconc[:, :], abase))
503+
@test all(isequal.(aconc[3:4, 4:6], abase[3:4, 4:6]))
504+
ch = DiskArrays.eachchunk(aconc)
505+
@test ch.chunks[1] == [1:3, 4:5]
506+
@test ch.chunks[2] == [1:4, 5:9]
507+
508+
a = ones(100, 50)
509+
b = [rem(i.I[3], 5) == 0 ? missing : a for i in CartesianIndices((1, 1, 100))]
510+
b[1] = missing
511+
a_conc = DiskArrays.ConcatDiskArray(b)
512+
ch = eachchunk(a_conc)
513+
@test ch.chunks[1] == [1:100]
514+
@test ch.chunks[2] == [1:50]
515+
@test ch.chunks[3] === DiskArrays.RegularChunks(1, 0, 100)
516+
517+
@test all(isequal.(a_conc[2, 2, 1:5], [missing, 1.0, 1.0, 1.0, missing]))
518+
@test all(isequal.(a_conc[end, end, 95:100], [missing, 1.0, 1.0, 1.0, 1.0, missing]))
519+
520+
end
494521
end
495522

496523
@testset "Broadcast with length 1 and 0 final dim" begin
@@ -979,7 +1006,7 @@ end
9791006
@testset "Padded disk arrays" begin
9801007
M = (1:100) * (1:120)'
9811008
A = cat(M, 2M, 3M, 4M; dims=3)
982-
ch = ChunkedDiskArray(A, (128, 128, 2))
1009+
ch = ChunkedDiskArray(A, (128, 128, 2))
9831010
pa = DiskArrays.pad(ch, ((10, 20), (30, 40), (1, 2)); fill=999)
9841011
@test size(pa) == (130, 190, 7)
9851012
# All outside
@@ -1009,9 +1036,9 @@ end
10091036
@test DiskArrays._pad_offset(c1, (10, 10)) == DiskArrays.RegularChunks(10, 0, 120)
10101037
@test DiskArrays._pad_offset(c1, (0, 0)) == c1
10111038

1012-
c2 = DiskArrays.IrregularChunks(chunksizes = [10, 10, 20, 30, 40])
1039+
c2 = DiskArrays.IrregularChunks(chunksizes=[10, 10, 20, 30, 40])
10131040
#The following test would assume padding ends up in a separate chunk:
1014-
@test DiskArrays._pad_offset(c2, (5, 5)) == DiskArrays.IrregularChunks(chunksizes = [5, 10, 10, 20, 30, 40, 5])
1041+
@test DiskArrays._pad_offset(c2, (5, 5)) == DiskArrays.IrregularChunks(chunksizes=[5, 10, 10, 20, 30, 40, 5])
10151042
@test DiskArrays._pad_offset(c2, (0, 0)) == c2
10161043
end
10171044
end
@@ -1054,10 +1081,10 @@ end
10541081
end
10551082

10561083
@testset "identity function" begin
1057-
a = ChunkedDiskArray(1:10 .> 3; chunksize=(3, ))
1058-
for fname in [:sum, :prod, :all, :any, :minimum, :maximum, :count]
1059-
@eval out = @capture_out @trace $fname($a) DiskArrays
1060-
@test occursin("DiskGenerator", out) == false
1084+
a = ChunkedDiskArray(1:10 .> 3; chunksize=(3,))
1085+
for fname in [:sum, :prod, :all, :any, :minimum, :maximum, :count]
1086+
@eval out = @capture_out @trace $fname($a) DiskArrays
1087+
@test occursin("DiskGenerator", out) == false
10611088
end
10621089
@test count(a) + count(!, a) == length(a)
10631090
end

0 commit comments

Comments
 (0)