Skip to content

Commit 5d913ff

Browse files
committed
faster chunk estimation
1 parent a88bbf3 commit 5d913ff

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

src/cat.jl

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,16 @@ function ConcatDiskArray(arrays::AbstractArray{<:AbstractArray{<:Any,N},M}) wher
2222
T = mapreduce(eltype, promote_type, init=eltype(first(arrays)), arrays)
2323

2424
if N > M
25-
newshape = extenddims(size(arrays), size(first(arrays)))
26-
@show newshape
25+
newshape = extenddims(size(arrays), size(first(arrays)),1)
2726
arrays1 = reshape(arrays, newshape)
2827
D = N
2928
else
3029
arrays1 = arrays
3130
D = M
3231
end
3332
startinds, sizes = arraysize_and_startinds(arrays1)
34-
@show startinds, sizes
3533

36-
chunks = concat_chunksize(D, arrays1)
34+
chunks = concat_chunksize(arrays1)
3735
hc = Chunked(batchstrategy(chunks))
3836

3937
return ConcatDiskArray{T,D,typeof(arrays1),typeof(chunks),typeof(hc)}(arrays1, startinds, sizes, chunks, hc)
@@ -44,8 +42,8 @@ function ConcatDiskArray(arrays::AbstractArray)
4442
error("Arrays don't have the same dimensions")
4543
return error("Should not be reached")
4644
end
47-
extenddims(a::NTuple{N,Int},b::NTuple{M,Int}) where {N,M} = extenddims((a...,1), b)
48-
extenddims(a::NTuple{N,Int},b::NTuple{N,Int}) where {N} = a
45+
extenddims(a::NTuple{N},b::NTuple{M},fillval) where {N,M} = extenddims((a...,fillval), b, fillval)
46+
extenddims(a::NTuple{N},_::NTuple{N},_) where {N} = a
4947

5048
Base.size(a::ConcatDiskArray) = a.size
5149

@@ -113,15 +111,24 @@ function _concat_diskarray_block_io(f, a::ConcatDiskArray, inds...)
113111
end
114112
end
115113

116-
function concat_chunksize(N, parents)
117-
oldchunks = map(eachchunk, parents)
118-
newchunks = ntuple(N) do i
119-
sliceinds = Base.setindex(ntuple(_ -> 1, N), :, i)
120-
v = map(c -> c.chunks[i], oldchunks[sliceinds...])
114+
function concat_chunksize(parents)
115+
newchunks = map(s->Vector{Union{RegularChunks, IrregularChunks}}(undef, s) ,size(parents))
116+
for i in CartesianIndices(parents)
117+
array = parents[i]
118+
chunks = eachchunk(array)
119+
foreach(chunks.chunks,i.I,newchunks) do c, ind, newc
120+
if !isassigned(newc, ind)
121+
newc[ind] = c
122+
elseif c != newc[ind]
123+
throw(ArgumentError("Chunk sizes don't forma grid"))
124+
end
125+
end
126+
end
127+
newchunks = map(newchunks) do v
121128
init = RegularChunks(approx_chunksize(first(v)), 0, 0)
122129
reduce(mergechunks, v; init=init)
123130
end
124-
131+
extenddims(newchunks, size(parents), RegularChunks(1,0,1))
125132
return GridChunks(newchunks...)
126133
end
127134

0 commit comments

Comments
 (0)