Skip to content

Commit 4160df4

Browse files
Allow vector-based indices (#181)
* Allow vector-based indices * Handle discontinous integer indices * Allow integer indices, now handled by dispatch * Allow BitVectors * Add tentative tests * Define chunks of vector subset through run-length encoding * Allow AbstractVector views --------- Co-authored-by: Fabian Gans <[email protected]>
1 parent 5b73f67 commit 4160df4

File tree

3 files changed

+57
-9
lines changed

3 files changed

+57
-9
lines changed

src/chunks.jl

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,36 @@ findchunk(r::RegularChunks, i::Int) = div(i + r.offset - 1, r.cs) + 1
7171

7272
subsetchunks(r, subs) = subsetchunks_fallback(r, subs)
7373

74+
"""
75+
chunk_rle(chunks,vec)
76+
77+
Computes the run length of which chunk is accessed how many times consecutively to determine
78+
79+
"""
80+
function chunk_rle(chunks, vec)
81+
out = Int[]
82+
currentchunk = -1
83+
for i in vec
84+
nextchunk = findchunk(chunks, i)
85+
if nextchunk == currentchunk
86+
out[end] += 1
87+
else
88+
push!(out, 1)
89+
currentchunk = nextchunk
90+
end
91+
end
92+
out
93+
end
94+
95+
"""
96+
subsetchunks(r, subs::BitVector)
97+
98+
Identify chunks from indices which may be discontinuous.
99+
"""
100+
function subsetchunks(r, subs::BitVector)
101+
return subsetchunks(r, findall(subs))
102+
end
103+
74104
approx_chunksize(r::RegularChunks) = r.cs
75105
grid_offset(r::RegularChunks) = r.offset
76106
max_chunksize(r::RegularChunks) = r.cs
@@ -155,7 +185,8 @@ function subsetchunks_fallback(r, subs)
155185
elseif issorted(subs; rev=true)
156186
true
157187
else
158-
throw(ArgumentError("Can only subset chunks for sorted indices"))
188+
rle = chunk_rle(r, subs)
189+
return chunktype_from_chunksizes(rle)
159190
end
160191
cs = zeros(Int, length(r))
161192
for i in subs
@@ -342,8 +373,6 @@ function estimate_chunksize(s, si)
342373
return GridChunks(s, cs)
343374
end
344375

345-
346-
347376
abstract type ChunkTiledDiskArray{T,N} <: AbstractDiskArray{T,N} end
348377
Base.size(a::ChunkTiledDiskArray) = arraysize_from_chunksize.(eachchunk(a).chunks)
349378
function DiskArrays.readblock!(A::ChunkTiledDiskArray{T,N}, data, I...) where {T,N}

src/subarray.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ end
2525
eachchunk(a::SubDiskArray) = eachchunk_view(haschunks(a.v.parent), a.v)
2626
function eachchunk_view(::Chunked, vv)
2727
pinds = parentindices(vv)
28-
if any(ind->!isa(ind,Union{Int,AbstractRange,Colon}),pinds)
29-
throw(ArgumentError("Unable to determine chunksize of non-range views."))
28+
if any(ind->!isa(ind,Union{Int,AbstractRange,Colon,AbstractVector{<:Integer}}),pinds)
29+
throw(ArgumentError("Unable to determine chunksize for view of type $(typeof.(pinds))."))
3030
end
3131
iomit = findints(pinds)
3232
chunksparent = eachchunk(parent(vv))

test/runtests.jl

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ end
266266
@test subsetchunks(r1, [28, 27, 19, 17, 10, 7]) == [1:3, 4:5, 6:6]
267267
@test subsetchunks(r1, [1, 2, 3]) == [1:3]
268268
@test subsetchunks(r1, [1, 2, 3, 10, 11]) == [1:3, 4:5]
269-
@test_throws ArgumentError subsetchunks(r1, [3, 4, 5, 1])
269+
@test subsetchunks(r1, [3, 4, 11, 13, 1]) == [1:2,3:4,5:5]
270270

271271
r2 = IrregularChunks(; chunksizes=[3, 3, 4, 3, 3, 4])
272272
@test subsetchunks(r2, 1:20) == r2
@@ -577,6 +577,25 @@ end
577577
i = Bool[1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1]
578578
u = UnchunkedDiskArray(rand(275,305,36))
579579
@test u[i,1,1][1] == u[findfirst(i),1,1]
580+
581+
# Test for #180
582+
test_arr = rand(100,100,100)
583+
a180 = AccessCountDiskArray(test_arr; chunksize=(10,10,20))
584+
585+
idx = [rand(1:100)]
586+
@test all(a180[idx, :, :] .== test_arr[idx, :, :])
587+
@test all(a180[:, idx, :] .== test_arr[:, idx, :])
588+
@test all(a180[:, :, idx] .== test_arr[:, :, idx])
589+
590+
sel = findall(rand(100) .<= 0.5)
591+
@test all(a180[:, sel, :] .== test_arr[:, sel, :])
592+
@test all(a180[sel, :, :] .== test_arr[sel, :, :])
593+
@test all(a180[sel, :, sel] .== test_arr[sel, :, sel])
594+
595+
bit_sel = rand(100) .> 0.5
596+
@test all(a180[bit_sel, :, :] .== test_arr[bit_sel, :, :])
597+
@test all(a180[:, bit_sel, :] .== test_arr[:, bit_sel, :])
598+
@test all(a180[:, bit_sel, bit_sel] .== test_arr[:, bit_sel, bit_sel])
580599
end
581600

582601

@@ -712,10 +731,10 @@ end
712731

713732
@test collect(reverse(a_disk)) == reverse(a)
714733
@test reverse(view(a_disk, :, 1)) == reverse(a[:, 1])
715-
@test_broken reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
734+
@test reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
716735
# ERROR: ArgumentError: Can only subset chunks for sorted indices
717-
@test_broken reverse(view(a_disk, :, 1), 5) == reverse(a[:, 1], 5)
718-
@test_broken reverse(view(a_disk, :, 1), 5, 10) == reverse(a[:, 1], 5, 10)
736+
@test reverse(view(a_disk, :, 1), 5) == reverse(a[:, 1], 5)
737+
@test reverse(view(a_disk, :, 1), 5, 10) == reverse(a[:, 1], 5, 10)
719738
@test collect(reverse(a_disk)) == collect(reverse(a_disk; dims=:)) ==
720739
collect(reverse(a_disk; dims=(1, 2))) == reverse(a)
721740
@test collect(reverse(a_disk; dims=2)) == reverse(a; dims=2)

0 commit comments

Comments
 (0)