Merge pull request #40 from JuliaArrays/teh/more_indexing

mbauman · web-flow · commit 6ace8150b698 · 2016-09-18T11:45:04.000-05:00
Indexing, equality, and dimension-changing operations
diff --git a/src/AxisArrays.jl b/src/AxisArrays.jl
@@ -1,5 +1,6 @@
 module AxisArrays
 
+using Base: tail
 using RangeArrays, Iterators, IntervalSets, Compat
 using Compat.view
 
diff --git a/src/core.jl b/src/core.jl
@@ -8,6 +8,8 @@ else
     using Base: @pure
 end
 
+typealias Symbols Tuple{Symbol,Vararg{Symbol}}
+
 @doc """
 Type-stable axis-specific indexing and identification with a
 parametric type.
@@ -51,7 +53,7 @@ immutable Axis{name,T}
 end
 # Constructed exclusively through Axis{:symbol}(...) or Axis{1}(...)
 @compat (::Type{Axis{name}}){name,T}(I::T=()) = Axis{name,T}(I)
-@compat Base.:(==){name,T}(A::Axis{name,T}, B::Axis{name,T}) = A.val == B.val
+@compat Base.:(==){name}(A::Axis{name}, B::Axis{name}) = A.val == B.val
 Base.hash{name}(A::Axis{name}, hx::UInt) = hash(A.val, hash(name, hx))
 axistype{name,T}(::Axis{name,T}) = T
 axistype{name,T}(::Type{Axis{name,T}}) = T
@@ -61,8 +63,12 @@ Base.getindex(A::Axis, i...) = A.val[i...]
 Base.unsafe_getindex(A::Axis, i...) = Base.unsafe_getindex(A, i...)
 Base.eltype{_,T}(::Type{Axis{_,T}}) = eltype(T)
 Base.size(A::Axis) = size(A.val)
+Base.indices(A::Axis) = indices(A.val)
+Base.indices(A::Axis, d) = indices(A.val, d)
 Base.length(A::Axis) = length(A.val)
 @compat (A::Axis{name}){name}(i) = Axis{name}(i)
+Base.convert{name,T}(::Type{Axis{name,T}}, ax::Axis{name,T}) = ax
+Base.convert{name,T}(::Type{Axis{name,T}}, ax::Axis{name}) = Axis{name}(convert(T, ax.val))
 
 @doc """
 An AxisArray is an AbstractArray that wraps another AbstractArray and
@@ -95,11 +101,12 @@ AxisArray(A::AbstractArray, vectors::AbstractVector...)
 * `A::AbstractArray` : the wrapped array data
 * `axes` or `names` or `vectors` : dimensional information for the wrapped array
 
-The dimensional information may be passed in one of three ways and is entirely
-optional. When the axis name or value is missing for a dimension, a default is
-substituted. The default axis names for dimensions `(1, 2, 3, 4, 5, ...)` are
-`(:row, :col, :page, :dim_4, :dim_5, ...)`. The default axis values are the
-integer unit ranges: `1:size(A, d)` for each missing dimension `d`.
+The dimensional information may be passed in one of three ways and is
+entirely optional. When the axis name or value is missing for a
+dimension, a default is substituted. The default axis names for
+dimensions `(1, 2, 3, 4, 5, ...)` are `(:row, :col, :page, :dim_4,
+:dim_5, ...)`. The default axis values are `indices(A, d)` for each
+missing dimension `d`.
 
 ### Indexing
 
@@ -166,12 +173,12 @@ AxisArray(A::AbstractArray, axs::Axis...) = AxisArray(A, axs)
         push!(ax.args, :(axs[$i]))
     end
     for i=L+1:N
-        push!(ax.args, :(Axis{_defaultdimname($i)}(1:size(A, $i))))
+        push!(ax.args, :(Axis{_defaultdimname($i)}(indices(A, $i))))
     end
     quote
         for i = 1:length(axs)
             checkaxis(axs[i].val)
-            if length(axs[i].val) != size(A, i)
+            if _length(axs[i].val) != _size(A, i)
                 throw(ArgumentError("the length of each axis must match the corresponding size of data"))
             end
         end
@@ -183,7 +190,7 @@ AxisArray(A::AbstractArray, axs::Axis...) = AxisArray(A, axs)
 end
 # Simple non-type-stable constructors to specify just the name or axis values
 AxisArray(A::AbstractArray) = AxisArray(A, ()) # Disambiguation
-AxisArray(A::AbstractArray, names::Symbol...)         = AxisArray(A, ntuple(i->Axis{names[i]}(1:size(A, i)), length(names)))
+AxisArray(A::AbstractArray, names::Symbol...)         = AxisArray(A, map((name,ind)->Axis{name}(ind), names, indices(A)))
 AxisArray(A::AbstractArray, vects::AbstractVector...) = AxisArray(A, ntuple(i->Axis{_defaultdimname(i)}(vects[i]), length(vects)))
 
 # Axis definitions
@@ -214,47 +221,103 @@ end
 Base.size(A::AxisArray) = size(A.data)
 Base.size(A::AxisArray, Ax::Axis) = size(A.data, axisdim(A, Ax))
 Base.size{Ax<:Axis}(A::AxisArray, ::Type{Ax}) = size(A.data, axisdim(A, Ax))
+Base.indices(A::AxisArray) = indices(A.data)
+Base.indices(A::AxisArray, Ax::Axis) = indices(A.data, axisdim(A, Ax))
+Base.indices{Ax<:Axis}(A::AxisArray, ::Type{Ax}) = indices(A.data, axisdim(A, Ax))
 Base.linearindexing(A::AxisArray) = Base.linearindexing(A.data)
 Base.convert{T,N}(::Type{Array{T,N}}, A::AxisArray{T,N}) = convert(Array{T,N}, A.data)
 # Similar is tricky. If we're just changing the element type, it can stay as an
 # AxisArray. But if we're changing dimensions, there's no way it can know how
 # to keep track of the axes, so just punt and return a regular old Array.
 # TODO: would it feel more consistent to return an AxisArray without any axes?
-Base.similar{T}(A::AxisArray{T})                = (d = similar(A.data, T); AxisArray(d, A.axes))
-Base.similar{T}(A::AxisArray{T}, S::Type)       = (d = similar(A.data, S); AxisArray(d, A.axes))
-Base.similar{T}(A::AxisArray{T}, S::Type, ::Tuple{}) = (d = similar(A.data, S); AxisArray(d, A.axes))
-Base.similar{T}(A::AxisArray{T}, dims::Int)         = similar(A, T, (dims,))
-Base.similar{T}(A::AxisArray{T}, dims::Int...)      = similar(A, T, dims)
-Base.similar{T}(A::AxisArray{T}, dims::Tuple{Vararg{Int}}) = similar(A, T, dims)
-Base.similar{T}(A::AxisArray{T}, S::Type, dims::Int...)    = similar(A.data, S, dims)
-Base.similar{T}(A::AxisArray{T}, S::Type, dims::Tuple{Vararg{Int}}) = similar(A.data, S, dims)
+Base.similar{S}(A::AxisArray, ::Type{S})       = (d = similar(A.data, S); AxisArray(d, A.axes))
+Base.similar{S,N}(A::AxisArray, ::Type{S}, dims::Dims{N}) = similar(A.data, S, dims)
 # If, however, we pass Axis objects containing the new axis for that dimension,
 # we can return a similar AxisArray with an appropriately modified size
-Base.similar{T}(A::AxisArray{T}, axs::Axis...) = similar(A, T, axs)
-Base.similar{T}(A::AxisArray{T}, S::Type, axs::Axis...) = similar(A, S, axs)
-@generated function Base.similar{T,N}(A::AxisArray{T,N}, S::Type, axs::Tuple{Vararg{Axis}})
-    sz = Expr(:tuple)
+Base.similar{T}(A::AxisArray{T}, ax1::Axis, axs::Axis...) = similar(A, T, (ax1, axs...))
+Base.similar{S}(A::AxisArray, ::Type{S}, ax1::Axis, axs::Axis...) = similar(A, S, (ax1, axs...))
+@generated function Base.similar{T,S,N}(A::AxisArray{T,N}, ::Type{S}, axs::Tuple{Axis,Vararg{Axis}})
+    inds = Expr(:tuple)
     ax = Expr(:tuple)
     for d=1:N
-        push!(sz.args, :(size(A, Axis{$d})))
+        push!(inds.args, :(indices(A, Axis{$d})))
         push!(ax.args, :(axes(A, Axis{$d})))
     end
     to_delete = Int[]
     for i=1:length(axs.parameters)
         a = axs.parameters[i]
         d = axisdim(A, a)
         axistype(a) <: Tuple{} && push!(to_delete, d)
-        sz.args[d] = :(length(axs[$i].val))
+        inds.args[d] = :(indices(axs[$i].val, 1))
         ax.args[d] = :(axs[$i])
     end
     sort!(to_delete)
-    deleteat!(sz.args, to_delete)
+    deleteat!(inds.args, to_delete)
     deleteat!(ax.args, to_delete)
     quote
-        d = similar(A.data, S, $sz)
+        d = similar(A.data, S, $inds)
         AxisArray(d, $ax)
     end
 end
+
+function Base.permutedims(A::AxisArray, perm)
+    p = permutation(perm, axisnames(A))
+    AxisArray(permutedims(A.data, p), axes(A)[[p...]])
+end
+permutation(to::Union{AbstractVector{Int},Tuple{Int,Vararg{Int}}}, from::Symbols) = to
+
+"""
+    permutation(to, from) -> p
+
+Calculate the permutation of labels in `from` to produce the order in
+`to`. Any entries in `to` that are missing in `from` will receive an
+index of 0. Any entries in `from` that are missing in `to` will have
+their indices appended to the end of the permutation. Consequently,
+the length of `p` is equal to the longer of `to` and `from`.
+"""
+function permutation(to::Symbols, from::Symbols)
+    n = length(to)
+    nf = length(from)
+    li = linearindices(from)
+    d = Dict(from[i]=>i for i in li)
+    covered = similar(dims->falses(length(li)), li)
+    ind = Array(Int, max(n, nf))
+    for (i,toi) in enumerate(to)
+        j = get(d, toi, 0)
+        ind[i] = j
+        if j != 0
+            covered[j] = true
+        end
+    end
+    k = n
+    for i in li
+        if !covered[i]
+            d[from[i]] != i && throw(ArgumentError("$(from[i]) is a duplicated argument"))
+            k += 1
+            k > nf && throw(ArgumentError("no incomplete containment allowed in $to and $from"))
+            ind[k] = i
+        end
+    end
+    ind
+end
+
+function Base.squeeze(A::AxisArray, dims::Dims)
+    keepdims = setdiff(1:ndims(A), dims)
+    AxisArray(squeeze(A.data, dims), axes(A)[keepdims])
+end
+# This version is type-stable
+function Base.squeeze{Ax<:Axis}(A::AxisArray, ::Type{Ax})
+    dim = axisdim(A, Ax)
+    AxisArray(squeeze(A.data, dim), dropax(Ax, axes(A)...))
+end
+
+@inline dropax(ax, ax1, axs...) = (ax1, dropax(ax, axs...)...)
+@inline dropax{name}(ax::Axis{name}, ax1::Axis{name}, axs...) = dropax(ax, axs...)
+@inline dropax{name}(ax::Type{Axis{name}}, ax1::Axis{name}, axs...) = dropax(ax, axs...)
+@inline dropax{name,T}(ax::Type{Axis{name,T}}, ax1::Axis{name}, axs...) = dropax(ax, axs...)
+dropax(ax) = ()
+
+
 # A simple display method to include axis information. It might be nice to
 # eventually display the axis labels alongside the data array, but that is
 # much more difficult.
@@ -356,3 +419,10 @@ function checkaxis(::Type{Categorical}, ax)
         push!(seen, elt)
     end
 end
+
+_length(A::AbstractArray) = length(linearindices(A))
+_length(A) = length(A)
+_size(A::AbstractArray) = map(length, indices(A))
+_size(A) = size(A)
+_size(A::AbstractArray, d) = length(indices(A, d))
+_size(A, d) = size(A, d)
diff --git a/src/indexing.jl b/src/indexing.jl
@@ -78,7 +78,7 @@ end
             push!(newaxes, :($(Axis{names[d]})(A.axes[$d].val[J[$d]])))
         elseif I[d] <: AbstractArray
             for i=1:ndims(I[d])
-                push!(newaxes, :($(Axis{Symbol(names[d], "_", i)})(1:size(I[$d], $i))))
+                push!(newaxes, :($(Axis{Symbol(names[d], "_", i)})(indices(I[$d], $i))))
             end
         end
     end
@@ -198,6 +198,10 @@ end
             push!(ex.args, :(I[$i]))
         elseif I[i] <: AbstractArray{Bool}
             push!(ex.args, :(find(I[$i])))
+        elseif I[i] <: CartesianIndex
+            for j = 1:length(I[i])
+                push!(ex.args, :(I[$i][$j]))
+            end
         elseif i <= length(Ax.parameters)
             push!(ex.args, :(axisindexes(A.axes[$i], I[$i])))
         else
diff --git a/src/sortedvector.jl b/src/sortedvector.jl
@@ -63,6 +63,7 @@ Base.getindex(v::SortedVector, idx::AbstractVector) =
 Base.length(v::SortedVector) = length(v.data)
 Base.size(v::SortedVector) = size(v.data)
 Base.size(v::SortedVector, i) = size(v.data, i)
+Base.indices(v::SortedVector) = indices(v.data)
 
 axistrait(::SortedVector) = Dimensional
 checkaxis(::SortedVector) = nothing
diff --git a/test/REQUIRE b/test/REQUIRE
@@ -0,0 +1 @@
+OffsetArrays
diff --git a/test/core.jl b/test/core.jl
@@ -26,6 +26,15 @@ C = similar(A, 0)
 D = similar(A)
 @test size(A) == size(D)
 @test eltype(A) == eltype(D)
+@test axisnames(permutedims(A, (2,1,3))) == (:col, :row, :page)
+@test axisnames(permutedims(A, (2,3,1))) == (:col, :page, :row)
+@test axisnames(permutedims(A, (3,2,1))) == (:page, :col, :row)
+@test axisnames(permutedims(A, (3,1,2))) == (:page, :row, :col)
+for perm in ((:col, :row, :page), (:col, :page, :row),
+             (:page, :col, :row), (:page, :row, :col),
+             (:row, :page, :col), (:row, :col, :page))
+    @test axisnames(permutedims(A, perm)) == perm
+end
 # Test modifying a particular axis
 E = similar(A, Float64, Axis{:col}(1:2))
 @test size(E) == (2,2,4)
@@ -87,6 +96,14 @@ A = AxisArray(reshape(1:16, 2,2,2,2), .5:.5:1)
 @test axisnames(A) == (:row,:col,:page,:dim_4)
 VERSION >= v"0.5.0-dev" && @inferred(axisnames(A))
 @test axisvalues(A) == (.5:.5:1, 1:2, 1:2, 1:2)
+A = AxisArray([0]', :x, :y)
+@test axisnames(squeeze(A, 1)) == (:y,)
+@test axisnames(squeeze(A, 2)) == (:x,)
+@test axisnames(squeeze(A, (1,2))) == axisnames(squeeze(A, (2,1))) == ()
+@test axisnames(@inferred(squeeze(A, Axis{:x}))) == (:y,)
+@test axisnames(@inferred(squeeze(A, Axis{:x,UnitRange{Int}}))) == (:y,)
+@test axisnames(@inferred(squeeze(A, Axis{:y}))) == (:x,)
+@test axisnames(@inferred(squeeze(squeeze(A, Axis{:x}), Axis{:y}))) == ()
 
 # Test axisdim
 @test_throws ArgumentError AxisArray(reshape(1:24, 2,3,4),
@@ -107,13 +124,25 @@ A = AxisArray(reshape(1:24, 2,3,4),
 @test @inferred(axes(A, Axis{:x})) == @inferred(axes(A, Axis{:x}())) == Axis{:x}(.1:.1:.2)
 @test @inferred(axes(A, Axis{:y})) == @inferred(axes(A, Axis{:y}())) == Axis{:y}(1//10:1//10:3//10)
 @test @inferred(axes(A, Axis{:z})) == @inferred(axes(A, Axis{:z}())) == Axis{:z}(["a", "b", "c", "d"])
+@test axes(A, 2) == Axis{:y}(1//10:1//10:3//10)
 
 @test Axis{:col}(1) == Axis{:col}(1)
 @test Axis{:col}(1) != Axis{:com}(1)
+@test Axis{:x}(1:3) == Axis{:x}(Base.OneTo(3))
 @test hash(Axis{:col}(1)) == hash(Axis{:col}(1.0))
 @test hash(Axis{:row}()) != hash(Axis{:col}())
+@test hash(Axis{:x}(1:3)) == hash(Axis{:x}(Base.OneTo(3)))
 @test AxisArrays.axistype(Axis{1}(1:2)) == typeof(1:2)
+@test AxisArrays.axistype(Axis{1,UInt32}) == UInt32
 @test axisnames(Axis{1}, Axis{2}, Axis{3}) == (1,2,3)
+@test Axis{:row}(2:7)[4] == 5
+@test eltype(Axis{:row}(1.0:1.0:3.0)) == Float64
+@test size(Axis{:row}(2:7)) === (6,)
+@test indices(Axis{:row}(2:7)) === (Base.OneTo(6),)
+@test indices(Axis{:row}(-1:1), 1) === Base.OneTo(3)
+@test length(Axis{:col}(-1:2)) === 4
+@test AxisArrays.axisname(Axis{:foo}(1:2)) == :foo
+@test AxisArrays.axisname(Axis{:foo})      == :foo
 
 # Test Timetype axis construction
 dt, vals = DateTime(2010, 1, 2, 3, 40), randn(5,2)
@@ -123,3 +152,16 @@ A = AxisArray(vals, Axis{:Timestamp}(dt-Dates.Hour(2):Dates.Hour(1):dt+Dates.Hou
 
 # Simply run the display method to ensure no stupid errors
 @compat show(IOBuffer(),MIME("text/plain"),A)
+
+# With unconventional indices
+import OffsetArrays  # import rather than using because OffsetArrays has a deprecation for ..
+A = AxisArray(OffsetArrays.OffsetArray([5,3,4], -1:1), :x)
+@test axes(A) == (Axis{:x}(-1:1),)
+@test A[-1] == 5
+A[0] = 12
+@test A.data[0] == 12
+@test indices(A) == (-1:1,)
+@test linearindices(A) == -1:1
+A = AxisArray(OffsetArrays.OffsetArray(rand(4,5), -1:2, 5:9), :x, :y)
+@test indices(A) == (-1:2, 5:9)
+@test linearindices(A) == 1:20
diff --git a/test/indexing.jl b/test/indexing.jl
@@ -81,3 +81,9 @@ A = AxisArray([1:100 -1:-1:-100], .1:.1:10.0, [:c1, :c2])
 @test A[atindex(-0.5..0.5, [25, 35]), :c1] == [20:30 30:40]
 @test_throws BoundsError A[atindex(-0.5..0.5, 5), :c1]
 @test_throws BoundsError A[atindex(-0.5..0.5, [5, 15, 25]), :]
+
+# Indexing with CartesianIndex{0}
+A = AxisArray(reshape(1:15, 3, 5), :x, :y)
+@test A[2,2,CartesianIndex(())] == 5
+@test A[2,CartesianIndex(()),2] == 5
+@test A[CartesianIndex(()),2,2] == 5
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,8 @@
 using AxisArrays
 using Base.Test, Compat
 
+@test isempty(detect_ambiguities(AxisArrays, Base, Core))
+
 include("core.jl")
 include("intervals.jl")
 include("indexing.jl")