bugs, tests

Michael Abbott · Michael Abbott · commit 58792753c692 · 2019-06-08T18:46:59.000+02:00
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,12 @@
+language: julia
+os:
+  - linux
+  - osx
+julia:
+  - 1.0
+  - 1.1
+  - nightly
+
+matrix:
+  allow_failures:
+  - julia: nightly
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # SliceMap.jl
 
-It would be nice if [Flux](https://github.com/FluxML/Flux.jl) worked with `mapslices`, 
+It would be nice if [Flux](https://github.com/FluxML/Flux.jl) / [Zygote](https://github.com/FluxML/Zygote.jl) worked with `mapslices`, 
 or with something generalising that. This package has some quick attempts:
 
 ```julia
@@ -23,7 +23,8 @@ Zygote.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]      # Zygote.forward
 Zygote.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
 ```
 
-These are a bit faster than `mapslices` too:
+These are a bit faster than `mapslices` too. Although storing all the backward functions, 
+which is what `mapcols` does, seems not to be so quick:
 
 ```julia
 using BenchmarkTools
@@ -47,7 +48,7 @@ Of course `mapslices()` does things other than columns of matrices.
 Most of which can be done better with `eachslice()` and `reduce(hcat,...)`, 
 maybe with some thought one could just write gradients for those...
 
-Perhaps this is done. The views of `eachcol()` have quite inefficient gradients, 
+Perhaps this is done, at least for Zygote. The views of `eachcol()` have quite inefficient gradients, 
 because for each `view()` they make a fresh `zero(A)`, but `collecteachcol()` is efficient:
 
 ```julia
@@ -73,6 +74,25 @@ ten = rand(1:9, 3,10,2)
 Zygote.gradient(m -> sum(sin, @cast zed[i,j,k] := fun(m[i,:,k])[j]  nolazy), ten)[1]
 ```
 
+The function `slicemap(f, A, dims)` uses these slice/glue functions, 
+without having to write index notation. 
+
 Issues about mapslices:
 * https://github.com/FluxML/Zygote.jl/issues/92
 * https://github.com/FluxML/Flux.jl/issues/741
+
+Other packages which define gradients of possible interest:
+* https://github.com/GiggleLiu/LinalgBackwards.jl
+* https://github.com/mcabbott/ArrayAllez.jl
+
+I added some tests: 
+[![Build Status](https://travis-ci.org/mcabbott/SliceMap.jl.svg?branch=master)](https://travis-ci.org/mcabbott/SliceMap.jl)
+
+<!--
+AD packages this could perhaps support, quite the zoo:
+* https://github.com/invenia/Nabla.jl
+* https://github.com/dfdx/Yota.jl
+* https://github.com/denizyuret/AutoGrad.jl
+* https://github.com/Roger-luo/YAAD.jl
+* And perhaps one day, just https://github.com/JuliaDiff/ChainRules.jl
+-->
diff --git a/src/SliceMap.jl b/src/SliceMap.jl
@@ -51,22 +51,20 @@ end
 Like `mapcols()` but for rows.
 """
 maprows(f::Function, M::AbstractMatrix, args...) =
-    reduce(vcat, [ surerow(f(col, args...)) for col in eachrow(M) ])
-
-surerow(x) = transpose(surevec(x))
+    reduce(vcat, [ transpose(surevec(f(col, args...))) for col in eachrow(M) ])
 
 maprows(f::Function, M::TrackedMatrix, args...) = track(maprows, f, M, args...)
 
 @grad maprows(f::Function, M::AbstractMatrix, args...) =
-    ∇maprows([ Tracker.forward(x -> surerow(f(x, args...)), row) for row in eachrow(data(M)) ], args)
+    ∇maprows([ Tracker.forward(x -> surevec(f(x, args...)), row) for row in eachrow(data(M)) ], args)
 
 @adjoint maprows(f::Function, M::AbstractMatrix, args...) =
-    ∇maprows([ Zygote.forward(x -> surerow(f(x, args...)), row) for row in eachrow(M) ], args)
+    ∇maprows([ Zygote.forward(x -> surevec(f(x, args...)), row) for row in eachrow(M) ], args)
 
 function ∇maprows(forwards, args)
-    reduce(vcat, data.(first.(forwards))), Δ -> begin
+    reduce(vcat, map(transpose∘data∘first, forwards)), Δ -> begin
         rows = [ data(last(fwd)(Δrow)[1]) for (fwd, Δrow) in zip(forwards, eachrow(data(Δ))) ]
-        (nothing, reduce(vcat, rows), map(_->nothing, args)...)
+        (nothing, reduce(vcat, transpose.(rows)), map(_->nothing, args)...)
     end
 end
 
@@ -265,7 +263,10 @@ end
 """
     slicemap(f, A; dims) ≈ mapslices(f, A; dims)
 
-Like `mapcols()`, but for any slice. Gradient is for Zygote only.
+Like `mapcols()`, but for any slice. The function `f` must preserve shape,
+e.g. `dims=(2,4)` then `f` must map matrices to matrices.
+
+The gradient is for Zygote only.
 """
 function slicemap(f::Function, A::AbstractArray{T,N}, args...; dims) where {T,N}
     code = ntuple(d -> d in dims ? (:) : (*), N)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,9 +1,102 @@
 
 using SliceMap
 using Test
+using ForwardDiff, Tracker, Zygote, TensorCast
 
-@testset "nothing" begin
+Zygote.refresh()
 
-    @test true
+@testset "columns" begin
+
+    mat = rand(1:9, 3,10)
+    fun(x) = 2 .+ x.^2
+    res = mapslices(fun, mat, dims=1)
+
+    @test res ≈ mapcols(fun, mat)
+    @test res ≈ MapCols{3}(fun, mat)
+
+    grad = ForwardDiff.gradient(m -> sum(sin, mapslices(fun, m, dims=1)), mat)
+
+    @test grad ≈ Tracker.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]
+    @test grad ≈ Tracker.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+
+    @test grad ≈ Zygote.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]
+    @test grad ≈ Zygote.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+
+    tcm(mat) = @cast out[i,j] := fun(mat[:,j])[i]
+    @test res ≈ tcm(mat)
+    @test grad ≈ Zygote.gradient(m -> sum(sin, tcm(m)), mat)[1]
+
+end
+@testset "columns, scalar" begin
+
+    mat = rand(1:9, 3,10)
+    fun(x) = sum(x) # different function!
+    res = mapslices(fun, mat, dims=1)
+
+    @test res ≈ mapcols(fun, mat)
+    @test res ≈ MapCols{3}(fun, mat)
+
+    grad = ForwardDiff.gradient(m -> sum(sin, mapslices(fun, m, dims=1)), mat)
+
+    @test grad ≈ Tracker.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]
+    @test grad ≈ Tracker.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+
+    @test grad ≈ Zygote.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]
+    @test grad ≈ Zygote.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+
+    tcm3(mat) = @cast out[_,j] := fun(mat[:,j]) # changed here too
+    @test res ≈ tcm3(mat)
+    @test grad ≈ Zygote.gradient(m -> sum(sin, tcm3(m)), mat)[1]
+
+end
+@testset "columns, matrix" begin
+
+    mat = rand(1:9, 3,10)
+    fun(x) = x .* x' # different function! vector -> matrix
+    res = mapslices(vec∘fun, mat, dims=1)
+
+    @test res ≈ mapcols(fun, mat)
+    @test res ≈ MapCols{3}(fun, mat)
+
+    grad = ForwardDiff.gradient(m -> sum(sin, mapslices(vec∘fun, m, dims=1)), mat)
+
+    @test grad ≈ Tracker.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]
+    @test grad ≈ Tracker.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+
+    @test grad ≈ Zygote.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]
+    @test grad ≈ Zygote.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+
+    tcm4(mat) = @cast out[i⊗i′,j] := fun(mat[:,j])[i,i′] i:3, i′:3 # changed here too
+    @test res ≈ tcm4(mat)
+    @test grad ≈ Zygote.gradient(m -> sum(sin, tcm4(m)), mat)[1]
+
+end
+@testset "rows" begin
+
+    mat = randn(4,5)
+    fun(x) = 2 .+ x.^2 ./ sum(x)
+
+    res = mapslices(fun, mat, dims=2)
+    @test res ≈ maprows(fun, mat)
+
+    grad = ForwardDiff.gradient(m -> sum(sin, mapslices(fun, m, dims=2)), mat)
+    @test grad ≈ Tracker.gradient(m -> sum(sin, maprows(fun, m)), mat)[1]
+    @test grad ≈ Zygote.gradient(m -> sum(sin, maprows(fun, m)), mat)[1]
+
+    tcm2(mat) = @cast out[i,j] := fun(mat[i,:])[j]
+    @test res ≈ tcm2(mat)
+    @test grad ≈ Zygote.gradient(m -> sum(sin, tcm2(m)), mat)[1]
+
+end
+@testset "slices" begin
+
+    ten = randn(3,4,5,2)
+    fun(x) = sqrt(3) .+ x.^3 ./ (sum(x)^2)
+    res = mapslices(fun, ten, dims=3)
+
+    @test res ≈ slicemap(fun, ten, dims=3)
+
+    grad = ForwardDiff.gradient(x -> sum(sin, slicemap(fun, x, dims=3)), ten)
+    @test grad ≈ Zygote.gradient(x -> sum(sin, slicemap(fun, x, dims=3)), ten)[1]
 
 end