Skip to content

Commit c817c5f

Browse files
authored
Allow indexing with nd arrays of integers and CartesianIndex (#151)
* Allow indexing with nd arrays of integers and cart * simplify
1 parent a48787c commit c817c5f

File tree

4 files changed

+71
-32
lines changed

4 files changed

+71
-32
lines changed

src/array.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ end
7676
_reverse1(a) = _reverse(a, 1)
7777
function _reverse1(a, start::Int, stop::Int)
7878
inds = [firstindex(a):start-1; stop:-1:start; stop+1:lastindex(a)]
79-
@show inds
8079
return view(a, inds)
8180
end
8281

src/batchgetindex.jl

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ end
6767
has_chunk_gap(cs,ids) = true
6868

6969
#Compute the number of possible indices in the hyperrectangle
70-
span(v::AbstractVector{<:Integer}) = 1 -(-(extrema(v)...))
71-
function span(v::AbstractVector{CartesianIndex{N}}) where N
70+
span(v::AbstractArray{<:Integer}) = 1 -(-(extrema(v)...))
71+
function span(v::AbstractArray{CartesianIndex{N}}) where N
7272
minind,maxind = extrema(v)
7373
prod((maxind-minind+oneunit(minind)).I)
7474
end
@@ -78,7 +78,7 @@ function span(v::AbstractArray{Bool})
7878
end
7979
#The number of indices to actually be read
8080
numind(v::AbstractArray{Bool}) = sum(v)
81-
numind(v::Union{AbstractVector{<:Integer},AbstractVector{<:CartesianIndex}})=length(v)
81+
numind(v::Union{AbstractArray{<:Integer},AbstractArray{<:CartesianIndex}})=length(v)
8282

8383
function is_sparse_index(ids; density_threshold = 0.5)
8484
indexdensity = numind(ids) / span(ids)
@@ -91,16 +91,17 @@ function process_index(i, cs, strategy::Union{ChunkRead,SubRanges})
9191
end
9292

9393

94-
function process_index(i::AbstractVector{<:Integer}, cs, ::ChunkRead)
94+
function process_index(i::AbstractArray{<:Integer,N}, cs, ::ChunkRead) where N
9595
csnow = first(cs)
96-
chunksdict = Dict{Int,Vector{Pair{Int,Int}}}()
96+
chunksdict = Dict{Int,Vector{Pair{Int,CartesianIndex{N}}}}()
9797
# Look for affected chunks
98-
for (outindex,dataindex) in enumerate(i)
98+
for outindex in CartesianIndices(i)
99+
dataindex = i[outindex]
99100
cI = findchunk(csnow,dataindex)
100101
a = get!(()->Pair{Int,Int}[],chunksdict,cI)
101102
push!(a,(dataindex=>outindex))
102103
end
103-
tempinds,datainds,outinds = Tuple{Vector{Int}}[], Tuple{UnitRange{Int}}[], Tuple{Vector{Int}}[]
104+
tempinds,datainds,outinds = Tuple{Vector{Int}}[], Tuple{UnitRange{Int}}[], Tuple{Vector{CartesianIndex{N}}}[]
104105
maxtempind = -1
105106
for (cI,a) in chunksdict
106107
dataind = extrema(first,a)
@@ -110,7 +111,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, ::ChunkRead)
110111
push!(tempinds, (tempind,))
111112
maxtempind = max(maxtempind,maximum(tempind))
112113
end
113-
(length(i),), ((maxtempind),), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
114+
size(i), ((maxtempind),), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
114115
end
115116

116117
function find_subranges_sorted(inds,allow_steprange=false)
@@ -154,9 +155,17 @@ function find_subranges_sorted(inds,allow_steprange=false)
154155
rangelist, outputinds
155156
end
156157

158+
#For index arrays >1D we need to store the cartesian indices in the sort
159+
#perm result
160+
function mysortperm(i)
161+
p = collect(vec(CartesianIndices(i)))
162+
sort!(p;by=Base.Fix1(getindex,i))
163+
p
164+
end
165+
mysortperm(i::AbstractVector) = sortperm(i)
157166
##Implement NCDatasets behavior of splitting list of indices into ranges
158-
function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
159-
if issorted(i)
167+
function process_index(i::AbstractArray{<:Integer,N}, cs, s::SubRanges) where N
168+
if i isa AbstractVector && issorted(i)
160169
rangelist, outputinds = find_subranges_sorted(i,allow_steprange(s))
161170
datainds = tuple.(rangelist)
162171
tempinds = map(rangelist,outputinds) do rl,oi
@@ -168,7 +177,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
168177
tempsize = maximum(length(rangelist))
169178
(length(i),), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
170179
else
171-
p = sortperm(i)
180+
p = mysortperm(i)
172181
i_sorted = view(i,p)
173182
rangelist, outputinds = find_subranges_sorted(i_sorted,allow_steprange(s))
174183
datainds = tuple.(rangelist)
@@ -181,7 +190,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
181190
(view(p,oi),)
182191
end
183192
tempsize = maximum(length(rangelist))
184-
(length(i),), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
193+
size(i), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
185194
end
186195
end
187196
function process_index(i::AbstractArray{Bool,N}, cs, cr::ChunkRead) where N

src/diskarray.jl

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,22 +87,15 @@ _resolve_indices(::Tuple{}, ::Tuple{}, output_size, temp_sizes, output_indices,
8787
#No dimension left in array, only singular indices allowed
8888
function _resolve_indices(::Tuple{}, i, output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
8989
inow = first(i)
90-
if inow isa Integer
91-
inow == 1 || throw(ArgumentError("Trailing indices must be 1"))
92-
_resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
93-
elseif inow isa AbstractVector
94-
(length(inow) == 1 && first(inow) == 1) || throw(ArgumentError("Trailing indices must be 1"))
95-
output_size = (output_size..., 1)
96-
output_indices = (output_indices..., 1)
97-
_resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
98-
else
99-
throw(ArgumentError("Trailing indices must be 1"))
100-
end
90+
(length(inow) == 1 && only(inow) == 1) || throw(ArgumentError("Trailing indices must be 1"))
91+
output_size = (output_size..., size(inow)...)
92+
output_indices = (output_indices..., size(inow)...)
93+
_resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
10194
end
10295
#Still dimensions left, but no indices available
10396
function _resolve_indices(cs, ::Tuple{}, output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
10497
csnow = first(cs)
105-
arraysize_from_chunksize(csnow) == 1 || throw(ArgumentError("Wrong indexing"))
98+
arraysize_from_chunksize(csnow) == 1 || throw(ArgumentError("Indices can only be omitted for trailing singleton dimensions"))
10699
data_indices = (data_indices..., 1:1)
107100
temp_sizes = (temp_sizes..., 1)
108101
temp_indices = (temp_indices..., 1)
@@ -120,9 +113,9 @@ end
120113
function process_index(i::AbstractUnitRange, cs)
121114
(length(i),), (length(i),), (Colon(),), (Colon(),), (i,), Base.tail(cs)
122115
end
123-
function process_index(i::AbstractVector{<:Integer}, cs, ::NoBatch)
116+
function process_index(i::AbstractArray{<:Integer}, cs, ::NoBatch)
124117
indmin, indmax = extrema(i)
125-
(length(i),), ((indmax - indmin + 1),), (Colon(),), ((i .- (indmin - 1)),), (indmin:indmax,), Base.tail(cs)
118+
size(i), ((indmax - indmin + 1),), map(_->Colon(),size(i)), ((i .- (indmin - 1)),), (indmin:indmax,), Base.tail(cs)
126119
end
127120
function process_index(i::AbstractArray{Bool,N}, cs, ::NoBatch) where {N}
128121
csnow, csrem = splitcs(i, cs)
@@ -133,22 +126,23 @@ function process_index(i::AbstractArray{Bool,N}, cs, ::NoBatch) where {N}
133126
tempinds = view(i, range.(indmin, indmax)...)
134127
(sum(i),), tempsize, (Colon(),), (tempinds,), range.(indmin, indmax), csrem
135128
end
136-
function process_index(i::AbstractVector{<:CartesianIndex{N}}, cs, ::NoBatch) where {N}
129+
function process_index(i::AbstractArray{<:CartesianIndex{N}}, cs, ::NoBatch) where {N}
137130
csnow, csrem = splitcs(i, cs)
138131
s = arraysize_from_chunksize.(csnow)
139132
cindmin, cindmax = extrema(view(CartesianIndices(s), i))
140133
indmin, indmax = cindmin.I, cindmax.I
141134
tempsize = indmax .- indmin .+ 1
142135
tempoffset = cindmin - oneunit(cindmin)
143136
tempinds = i .- tempoffset
144-
(length(i),), tempsize, (Colon(),), (tempinds,), range.(indmin, indmax), csrem
137+
outinds = map(_->Colon(),size(i))
138+
size(i), tempsize, outinds, (tempinds,), range.(indmin, indmax), csrem
145139
end
146140
function process_index(i::CartesianIndices{N}, cs, ::NoBatch) where {N}
147141
_, csrem = splitcs(i, cs)
148142
cols = map(_ -> Colon(), i.indices)
149143
length.(i.indices), length.(i.indices), cols, cols, i.indices, csrem
150144
end
151-
splitcs(i::AbstractVector{<:CartesianIndex}, cs) = splitcs(first(i).I, (), cs)
145+
splitcs(i::AbstractArray{<:CartesianIndex}, cs) = splitcs(first(i).I, (), cs)
152146
splitcs(i::AbstractArray{Bool}, cs) = splitcs(size(i), (), cs)
153147
splitcs(i::CartesianIndices, cs) = splitcs(i.indices, (), cs)
154148
splitcs(_, cs) = (first(cs),), Base.tail(cs)

test/runtests.jl

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ end
446446
@testset "Getindex/Setindex with vectors" begin
447447
a = AccessCountDiskArray(reshape(1:20, 4, 5, 1); chunksize=(4, 1, 1))
448448
@test a[:, [1, 4], 1] == trueparent(a)[:, [1, 4], 1]
449-
@test_broken getindex_count(a) == 2
449+
@test getindex_count(a) == 1
450450
coords = CartesianIndex.([(1, 1, 1), (3, 1, 1), (2, 4, 1), (4, 4, 1)])
451451
@test a[coords] == trueparent(a)[coords]
452452
@test_broken getindex_count(a) == 4
@@ -497,6 +497,8 @@ end
497497
a_inner = rand(100)
498498
inds_sorted = [1,1,1,3,5,6,6,7,10,13,16,16,19,20]
499499
inds_unsorted = [7, 5, 1, 16, 1, 10, 20, 6, 19, 1, 13, 6, 3, 16]
500+
inds_sorted_matrix = reshape(inds_sorted,7,2)
501+
inds_unsorted_matrix = reshape(inds_unsorted,7,2)
500502
a = AccessCountDiskArray(a_inner,chunksize=(10,),batchstrategy=DiskArrays.ChunkRead(NoStepRange(),0.5));
501503
b1 = a[inds_sorted];
502504
@test b1 == a_inner[inds_sorted]
@@ -505,6 +507,17 @@ end
505507
b2 = a[inds_unsorted]
506508
@test b2 == a_inner[inds_unsorted]
507509
@test getindex_log(a) == [(1:20,)]
510+
empty!(a.getindex_log)
511+
b3 = a[inds_sorted_matrix];
512+
@test size(b3) == size(a_inner[inds_sorted_matrix])
513+
@test b3 == a_inner[inds_sorted_matrix]
514+
@test getindex_log(a) == [(1:20,)]
515+
empty!(a.getindex_log)
516+
b4 = a[inds_unsorted_matrix]
517+
@test b4 == a_inner[inds_unsorted_matrix]
518+
@test getindex_log(a) == [(1:20,)]
519+
empty!(a.getindex_log)
520+
508521

509522
a = AccessCountDiskArray(a_inner,chunksize=(5,),batchstrategy=DiskArrays.ChunkRead(CanStepRange(),0.8));
510523
b1 = a[inds_sorted];
@@ -514,6 +527,14 @@ end
514527
b2 = a[inds_unsorted]
515528
@test b2 == a_inner[inds_unsorted]
516529
@test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
530+
empty!(a.getindex_log)
531+
b3 = a[inds_sorted_matrix];
532+
@test b3 == a_inner[inds_sorted_matrix]
533+
@test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
534+
empty!(a.getindex_log)
535+
b4 = a[inds_unsorted_matrix]
536+
@test b4 == a_inner[inds_unsorted_matrix]
537+
@test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
517538

518539

519540
a = AccessCountDiskArray(a_inner,chunksize=(10,),batchstrategy=DiskArrays.SubRanges(CanStepRange(),0.5));
@@ -533,6 +554,14 @@ end
533554
b2 = a[inds_unsorted]
534555
@test b2 == a_inner[inds_unsorted]
535556
@test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
557+
empty!(a.getindex_log)
558+
b3 = a[inds_sorted_matrix];
559+
@test b3 == a_inner[inds_sorted_matrix]
560+
@test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
561+
empty!(a.getindex_log)
562+
b4 = a[inds_unsorted_matrix]
563+
@test b4 == a_inner[inds_unsorted_matrix]
564+
@test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
536565

537566
a = AccessCountDiskArray(a_inner,chunksize=(5,),batchstrategy=DiskArrays.SubRanges(NoStepRange(),0.8));
538567
b1 = a[inds_sorted];
@@ -542,6 +571,14 @@ end
542571
b2 = a[inds_unsorted]
543572
@test b2 == a_inner[inds_unsorted]
544573
@test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
574+
empty!(a.getindex_log)
575+
b3 = a[inds_sorted_matrix];
576+
@test b3 == a_inner[inds_sorted_matrix]
577+
@test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
578+
empty!(a.getindex_log)
579+
b4 = a[inds_unsorted_matrix]
580+
@test b4 == a_inner[inds_unsorted_matrix]
581+
@test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
545582
end
546583

547584
@testset "generator" begin
@@ -580,7 +617,7 @@ end
580617

581618
@test collect(reverse(a_disk)) == reverse(a)
582619
@test reverse(view(a_disk, :, 1)) == reverse(a[:, 1])
583-
@test reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
620+
@test_broken reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
584621
# ERROR: ArgumentError: Can only subset chunks for sorted indices
585622
@test_broken reverse(view(a_disk, :, 1), 5) == reverse(a[:, 1], 5)
586623
@test_broken reverse(view(a_disk, :, 1), 5, 10) == reverse(a[:, 1], 5, 10)

0 commit comments

Comments
 (0)