Allow indexing with nd arrays of integers and CartesianIndex (#151)

meggart · web-flow · commit c817c5f00430 · 2024-02-21T16:24:58.000+01:00
* Allow indexing with nd arrays of integers and cart

* simplify
diff --git a/src/array.jl b/src/array.jl
@@ -76,7 +76,6 @@ end
 _reverse1(a) = _reverse(a, 1)
 function _reverse1(a, start::Int, stop::Int) 
     inds = [firstindex(a):start-1; stop:-1:start; stop+1:lastindex(a)]
-    @show inds
     return view(a, inds)
 end
 
diff --git a/src/batchgetindex.jl b/src/batchgetindex.jl
@@ -67,8 +67,8 @@ end
 has_chunk_gap(cs,ids) = true
 
 #Compute the number of possible indices in the hyperrectangle
-span(v::AbstractVector{<:Integer}) = 1 -(-(extrema(v)...))
-function span(v::AbstractVector{CartesianIndex{N}}) where N
+span(v::AbstractArray{<:Integer}) = 1 -(-(extrema(v)...))
+function span(v::AbstractArray{CartesianIndex{N}}) where N
     minind,maxind = extrema(v)
     prod((maxind-minind+oneunit(minind)).I)
 end
@@ -78,7 +78,7 @@ function span(v::AbstractArray{Bool})
 end
 #The number of indices to actually be read
 numind(v::AbstractArray{Bool}) = sum(v)
-numind(v::Union{AbstractVector{<:Integer},AbstractVector{<:CartesianIndex}})=length(v)
+numind(v::Union{AbstractArray{<:Integer},AbstractArray{<:CartesianIndex}})=length(v)
 
 function is_sparse_index(ids; density_threshold = 0.5)
     indexdensity = numind(ids) / span(ids)
@@ -91,16 +91,17 @@ function process_index(i, cs, strategy::Union{ChunkRead,SubRanges})
 end
 
 
-function process_index(i::AbstractVector{<:Integer}, cs, ::ChunkRead)
+function process_index(i::AbstractArray{<:Integer,N}, cs, ::ChunkRead) where N
     csnow = first(cs)
-    chunksdict = Dict{Int,Vector{Pair{Int,Int}}}()
+    chunksdict = Dict{Int,Vector{Pair{Int,CartesianIndex{N}}}}()
     # Look for affected chunks
-    for (outindex,dataindex) in enumerate(i)
+    for outindex in CartesianIndices(i)
+        dataindex = i[outindex]
         cI = findchunk(csnow,dataindex)
         a = get!(()->Pair{Int,Int}[],chunksdict,cI)
         push!(a,(dataindex=>outindex))
     end
-    tempinds,datainds,outinds = Tuple{Vector{Int}}[], Tuple{UnitRange{Int}}[], Tuple{Vector{Int}}[]
+    tempinds,datainds,outinds = Tuple{Vector{Int}}[], Tuple{UnitRange{Int}}[], Tuple{Vector{CartesianIndex{N}}}[]
     maxtempind = -1
     for (cI,a) in chunksdict
         dataind = extrema(first,a)
@@ -110,7 +111,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, ::ChunkRead)
         push!(tempinds, (tempind,))
         maxtempind = max(maxtempind,maximum(tempind))
     end
-    (length(i),), ((maxtempind),), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
+    size(i), ((maxtempind),), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
 end
 
 function find_subranges_sorted(inds,allow_steprange=false)
@@ -154,9 +155,17 @@ function find_subranges_sorted(inds,allow_steprange=false)
     rangelist, outputinds
 end
 
+#For index arrays >1D we need to store the cartesian indices in the sort
+#perm result
+function mysortperm(i)
+    p = collect(vec(CartesianIndices(i)))
+    sort!(p;by=Base.Fix1(getindex,i))
+    p
+end
+mysortperm(i::AbstractVector) = sortperm(i)
 ##Implement NCDatasets behavior of splitting list of indices into ranges
-function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
-    if issorted(i)
+function process_index(i::AbstractArray{<:Integer,N}, cs, s::SubRanges) where N
+    if i isa AbstractVector && issorted(i)
         rangelist, outputinds = find_subranges_sorted(i,allow_steprange(s))
         datainds = tuple.(rangelist)
         tempinds = map(rangelist,outputinds) do rl,oi
@@ -168,7 +177,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
         tempsize = maximum(length(rangelist))
         (length(i),), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
     else
-        p = sortperm(i)
+        p = mysortperm(i)
         i_sorted = view(i,p)
         rangelist, outputinds = find_subranges_sorted(i_sorted,allow_steprange(s))
         datainds = tuple.(rangelist)
@@ -181,7 +190,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
             (view(p,oi),)
         end
         tempsize = maximum(length(rangelist))
-        (length(i),), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
+        size(i), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
     end
 end
 function process_index(i::AbstractArray{Bool,N}, cs, cr::ChunkRead) where N
diff --git a/src/diskarray.jl b/src/diskarray.jl
@@ -87,22 +87,15 @@ _resolve_indices(::Tuple{}, ::Tuple{}, output_size, temp_sizes, output_indices,
 #No dimension left in array, only singular indices allowed
 function _resolve_indices(::Tuple{}, i, output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
     inow = first(i)
-    if inow isa Integer
-        inow == 1 || throw(ArgumentError("Trailing indices must be 1"))
-        _resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
-    elseif inow isa AbstractVector
-        (length(inow) == 1 && first(inow) == 1) || throw(ArgumentError("Trailing indices must be 1"))
-        output_size = (output_size..., 1)
-        output_indices = (output_indices..., 1)
-        _resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
-    else
-        throw(ArgumentError("Trailing indices must be 1"))
-    end
+    (length(inow) == 1 && only(inow) == 1) || throw(ArgumentError("Trailing indices must be 1"))
+    output_size = (output_size..., size(inow)...)
+    output_indices = (output_indices..., size(inow)...)
+    _resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
 end
 #Still dimensions left, but no indices available
 function _resolve_indices(cs, ::Tuple{}, output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
     csnow = first(cs)
-    arraysize_from_chunksize(csnow) == 1 || throw(ArgumentError("Wrong indexing"))
+    arraysize_from_chunksize(csnow) == 1 || throw(ArgumentError("Indices can only be omitted for trailing singleton dimensions"))
     data_indices = (data_indices..., 1:1)
     temp_sizes = (temp_sizes..., 1)
     temp_indices = (temp_indices..., 1)
@@ -120,9 +113,9 @@ end
 function process_index(i::AbstractUnitRange, cs)
     (length(i),), (length(i),), (Colon(),), (Colon(),), (i,), Base.tail(cs)
 end
-function process_index(i::AbstractVector{<:Integer}, cs, ::NoBatch)
+function process_index(i::AbstractArray{<:Integer}, cs, ::NoBatch)
     indmin, indmax = extrema(i)
-    (length(i),), ((indmax - indmin + 1),), (Colon(),), ((i .- (indmin - 1)),), (indmin:indmax,), Base.tail(cs)
+    size(i), ((indmax - indmin + 1),), map(_->Colon(),size(i)), ((i .- (indmin - 1)),), (indmin:indmax,), Base.tail(cs)
 end
 function process_index(i::AbstractArray{Bool,N}, cs, ::NoBatch) where {N}
     csnow, csrem = splitcs(i, cs)
@@ -133,22 +126,23 @@ function process_index(i::AbstractArray{Bool,N}, cs, ::NoBatch) where {N}
     tempinds = view(i, range.(indmin, indmax)...)
     (sum(i),), tempsize, (Colon(),), (tempinds,), range.(indmin, indmax), csrem
 end
-function process_index(i::AbstractVector{<:CartesianIndex{N}}, cs, ::NoBatch) where {N}
+function process_index(i::AbstractArray{<:CartesianIndex{N}}, cs, ::NoBatch) where {N}
     csnow, csrem = splitcs(i, cs)
     s = arraysize_from_chunksize.(csnow)
     cindmin, cindmax = extrema(view(CartesianIndices(s), i))
     indmin, indmax = cindmin.I, cindmax.I
     tempsize = indmax .- indmin .+ 1
     tempoffset = cindmin - oneunit(cindmin)
     tempinds = i .- tempoffset
-    (length(i),), tempsize, (Colon(),), (tempinds,), range.(indmin, indmax), csrem
+    outinds = map(_->Colon(),size(i))
+    size(i), tempsize, outinds, (tempinds,), range.(indmin, indmax), csrem
 end
 function process_index(i::CartesianIndices{N}, cs, ::NoBatch) where {N}
     _, csrem = splitcs(i, cs)
     cols = map(_ -> Colon(), i.indices)
     length.(i.indices), length.(i.indices), cols, cols, i.indices, csrem
 end
-splitcs(i::AbstractVector{<:CartesianIndex}, cs) = splitcs(first(i).I, (), cs)
+splitcs(i::AbstractArray{<:CartesianIndex}, cs) = splitcs(first(i).I, (), cs)
 splitcs(i::AbstractArray{Bool}, cs) = splitcs(size(i), (), cs)
 splitcs(i::CartesianIndices, cs) = splitcs(i.indices, (), cs)
 splitcs(_, cs) = (first(cs),), Base.tail(cs)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -446,7 +446,7 @@ end
 @testset "Getindex/Setindex with vectors" begin
     a = AccessCountDiskArray(reshape(1:20, 4, 5, 1); chunksize=(4, 1, 1))
     @test a[:, [1, 4], 1] == trueparent(a)[:, [1, 4], 1]
-    @test_broken getindex_count(a) == 2
+    @test getindex_count(a) == 1
     coords = CartesianIndex.([(1, 1, 1), (3, 1, 1), (2, 4, 1), (4, 4, 1)])
     @test a[coords] == trueparent(a)[coords]
     @test_broken getindex_count(a) == 4
@@ -497,6 +497,8 @@ end
     a_inner = rand(100)
     inds_sorted = [1,1,1,3,5,6,6,7,10,13,16,16,19,20]
     inds_unsorted = [7, 5, 1, 16, 1, 10, 20, 6, 19, 1, 13, 6, 3, 16]
+    inds_sorted_matrix = reshape(inds_sorted,7,2)
+    inds_unsorted_matrix = reshape(inds_unsorted,7,2)
     a = AccessCountDiskArray(a_inner,chunksize=(10,),batchstrategy=DiskArrays.ChunkRead(NoStepRange(),0.5));
     b1 = a[inds_sorted];
     @test b1 == a_inner[inds_sorted]
@@ -505,6 +507,17 @@ end
     b2 = a[inds_unsorted]
     @test b2 == a_inner[inds_unsorted]
     @test getindex_log(a) == [(1:20,)]
+    empty!(a.getindex_log)
+    b3 = a[inds_sorted_matrix];
+    @test size(b3) == size(a_inner[inds_sorted_matrix])
+    @test b3 == a_inner[inds_sorted_matrix]
+    @test getindex_log(a) == [(1:20,)]
+    empty!(a.getindex_log)
+    b4 = a[inds_unsorted_matrix]
+    @test b4 == a_inner[inds_unsorted_matrix]
+    @test getindex_log(a) == [(1:20,)]
+    empty!(a.getindex_log)
+
     
     a = AccessCountDiskArray(a_inner,chunksize=(5,),batchstrategy=DiskArrays.ChunkRead(CanStepRange(),0.8));
     b1 = a[inds_sorted];
@@ -514,6 +527,14 @@ end
     b2 = a[inds_unsorted]
     @test b2 == a_inner[inds_unsorted]
     @test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
+    empty!(a.getindex_log)
+    b3 = a[inds_sorted_matrix];
+    @test b3 == a_inner[inds_sorted_matrix]
+    @test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
+    empty!(a.getindex_log)
+    b4 = a[inds_unsorted_matrix]
+    @test b4 == a_inner[inds_unsorted_matrix]
+    @test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
     
     
     a = AccessCountDiskArray(a_inner,chunksize=(10,),batchstrategy=DiskArrays.SubRanges(CanStepRange(),0.5));
@@ -533,6 +554,14 @@ end
     b2 = a[inds_unsorted]
     @test b2 == a_inner[inds_unsorted]
     @test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
+    empty!(a.getindex_log)
+    b3 = a[inds_sorted_matrix];
+    @test b3 == a_inner[inds_sorted_matrix]
+    @test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
+    empty!(a.getindex_log)
+    b4 = a[inds_unsorted_matrix]
+    @test b4 == a_inner[inds_unsorted_matrix]
+    @test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
     
     a = AccessCountDiskArray(a_inner,chunksize=(5,),batchstrategy=DiskArrays.SubRanges(NoStepRange(),0.8));
     b1 = a[inds_sorted];
@@ -542,6 +571,14 @@ end
     b2 = a[inds_unsorted]
     @test b2 == a_inner[inds_unsorted]
     @test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
+    empty!(a.getindex_log)
+    b3 = a[inds_sorted_matrix];
+    @test b3 == a_inner[inds_sorted_matrix]
+    @test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
+    empty!(a.getindex_log)
+    b4 = a[inds_unsorted_matrix]
+    @test b4 == a_inner[inds_unsorted_matrix]
+    @test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
     end
 
 @testset "generator" begin
@@ -580,7 +617,7 @@ end
 
     @test collect(reverse(a_disk)) == reverse(a)
     @test reverse(view(a_disk, :, 1)) == reverse(a[:, 1])
-    @test reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
+    @test_broken reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
     # ERROR: ArgumentError: Can only subset chunks for sorted indices
     @test_broken reverse(view(a_disk, :, 1), 5) == reverse(a[:, 1], 5)
     @test_broken reverse(view(a_disk, :, 1), 5, 10) == reverse(a[:, 1], 5, 10)