day one

Michael Abbott · Michael Abbott · commit 47e3b5264f72 · 2019-06-05T18:06:29.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -0,0 +1,18 @@
+name = "SliceMap"
+uuid = "82cb661a-3f19-5665-9e27-df437c7e54c8"
+authors = ["Michael Abbott"]
+version = "0.1.0"
+
+[deps]
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+TensorCast = "02d47bb6-7ce6-556a-be16-bb1710789e2b"
+Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+WeightedArrays = "379a43df-f81c-573e-83a6-069eb6c11a71"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+# SliceMap.jl
+
+It would be nice if [Flux](https://github.com/FluxML/Flux.jl) worked with `mapslices`, 
+or with something generalising that. This package has some quick attempts:
+
+```julia
+mat = rand(1:99, 3,10)
+fun(x) = 2 .+ x.^2
+mapslices(fun, mat, dims=1)
+
+using SliceMap
+
+mapcols(fun, mat)     # eachcol(m)
+MapCols{3}(fun, mat)  # reinterpret(SArray,...)
+
+using Tracker, Zygote, ForwardDiff
+ForwardDiff.gradient(m -> sum(sin, mapslices(fun, m, dims=1)), mat)
+
+Tracker.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]     # Tracker.forward per slice
+Tracker.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]  # ForwardDiff on slices
+
+# Zygote.gradient(m -> sum(sin, mapslices(fun, m, dims=1)), mat)
+Zygote.gradient(m -> sum(sin, mapcols(fun, m)), mat)[1]      # Zygote.forward 
+Zygote.gradient(m -> sum(sin, MapCols{3}(fun, m)), mat)[1]
+```
+
+These are a bit faster than `mapslices` too:
+
+```julia
+mat1k = rand(3,1000);
+
+@btime mapslices(fun, $mat1k, dims=1)  # 1.017 ms
+@btime mapcols(fun, $mat1k)            #   399.016 μs
+@btime MapCols{3}(fun, $mat1k)         #    46.733 μs
+@btime MapCols(fun, $mat1k)            #    59.471 μs without size
+
+@btime ForwardDiff.gradient(m -> sum(sin, mapslices(fun, m, dims=1)), $mat1k); # 372.705 ms
+@btime Tracker.gradient(m -> sum(sin, mapcols(fun, m)), $mat1k);               #  70.203 ms
+@btime Tracker.gradient(m -> sum(sin, MapCols{3}(fun, m)), $mat1k);            #     255.032 μs
+@btime Zygote.gradient(m -> sum(sin, mapcols(fun, m)), $mat1k);                #  20.018 ms
+@btime Zygote.gradient(m -> sum(sin, MapCols{3}(fun, m)), $mat1k);             #     354.112 μs
+```
+
+Of course `mapslices()` does things other than columns of matrices. 
+Most of which can be done better with `eachslice()` and `reduce(hcat,...)`, 
+maybe with some thought one could just write gradients for those. 
+
+Or for the slice/glue functions in [TensorCast](https://github.com/mcabbott/TensorCast.jl),
+which now does some mapslices things (and will soon do many more) by chaining such functions.
+
diff --git a/src/SliceMap.jl b/src/SliceMap.jl
@@ -0,0 +1,140 @@
+
+module SliceMap
+
+export MapCols, mapcols
+
+#========== Reverse, Eachslice ==========#
+
+"""
+    mapcols(f, m::Matrix, args...) = reduce(hcat, f(c, args...) for c in eachcol(M))
+
+When `m::TrackedMatrix`, it saves the backward function for each slice.
+"""
+mapcols(f::Function, M::Matrix, args...) =
+    reduce(hcat, [ rvec(f(col, args...)) for col in eachcol(M) ])
+
+using Tracker
+using Tracker: TrackedMatrix, track, @grad, data
+
+mapcols(f::Function, M::TrackedMatrix, args...) = track(mapcols, f, M, args...)
+
+@grad function mapcols(f::Function, M::TrackedMatrix, args...)
+    res = [ Tracker.forward(x -> rvec(f(x, args...)), col) for col in eachcol(data(M)) ]
+    fwd = reduce(hcat, data.(first.(res)))
+    function back(Δ)
+        cols = [ data((last(res[c]))(Δcol)[1]) for (c, Δcol) in enumerate(eachcol(data(Δ))) ]
+        ∇M = reduce(hcat, cols)
+        (nothing, ∇M, map(_->nothing, args)...)
+    end
+    fwd, back
+end
+
+using Zygote
+Zygote.@adjoint function mapcols(f::Function, M::Matrix, args...)
+    res = [ Zygote.forward(x -> rvec(f(x, args...)), col) for col in eachcol(data(M)) ]
+    fwd = reduce(hcat, data.(first.(res)))
+    function back(Δ)
+        cols = [ data((last(res[c]))(Δcol)[1]) for (c, Δcol) in enumerate(eachcol(data(Δ))) ]
+        ∇M = reduce(hcat, cols)
+        (nothing, ∇M, map(_->nothing, args)...)
+    end
+    fwd, back
+end
+
+#========== Forward, Static ==========#
+
+using TensorCast, StaticArrays, WeightedArrays
+
+struct MapCols{d} end
+
+"""
+    MapCols{d}(f, m::Matrix, args...)
+
+Expects `f(::SVector{d}, args...)` and maps this over the columns, `d = size(M,1)`.
+Doesn't expect `f` to return a staticarray, just an array.
+
+When `m::TrackedMatrix`, it uses `ForwardDiff` to calculate the gradient of each slice.
+The second point of keeping one type parameter is that the dual numbers needed depend on this.
+
+    MapCols{d}(f, m::Weighted, args...)
+Takes `m.weights` along for the ride.
+"""
+MapCols(f::Function, M::WeightedArrays.MaybeWeightedMatrix, args...) =
+    MapCols{size(M,1)}(f, M, args...)
+
+MapCols{d}(f::Function, M::WeightedMatrix, args...)  where {d} =
+    Weighted(MapCols{d}(f, M.array, args...), M.weights, M.opt)
+
+function MapCols{d}(f::Function, M::Matrix, args...) where {d}
+    @cast A[c]{r:d} := M[r,c] assert
+    reduce(hcat, [ rvec(f(acol, args...)) for acol in A ])
+
+    # TODO: call some function which static-glues if possible...
+    # TensorCast.auto_glue(map(col -> rvec(f(col, args...)), A), (:,*))
+
+    # TODO: can I thread this? Is it even safe to do so?
+    # https://github.com/mohamed82008/KissThreading.jl
+end
+
+rvec(x::Number) = [x] # to allow for f vector -> scalar, as mapslices does
+rvec(x::StaticArray) = vec(Array(x)) # to avoid creating a giant staticarray, as reduce(hcat would otherwise do
+rvec(A) = vec(A) # LinearAlgebra.
+
+
+using ForwardDiff
+
+MapCols{d}(f::Function, M::TrackedMatrix, args...) where {d} = track(MapCols, f, M, Val(d), args...)
+
+@grad function MapCols(f::Function, M::TrackedMatrix, dval::Val{d}, args...) where {d}
+
+    @cast A[c]{r:d} := M.data[r,c]
+    dualcol = SVector(ntuple(j->ForwardDiff.Dual(0, ntuple(i->i==j ? 1 : 0, dval)...), dval))
+
+    C = [ rvec(f(acol .+ dualcol, args...)) for acol in A ]
+
+    Z = reduce(hcat, [ ForwardDiff.value.(full) for full in C ]) # full is not an SVector here
+
+    function back(ΔZ)
+        ∇M = similar(data(M)) .+ zero(first(data(ΔZ)))
+        @inbounds for c=1:size(M,2)
+            part = ForwardDiff.partials.(C[c])
+            for r=1:d
+                ∇M[r,c] = 0
+                for i=1:size(ΔZ,1)
+                    ∇M[r,c] += data(ΔZ)[i,c] * part[i].values[r]
+                end
+            end
+        end
+        (nothing, ∇M, nothing, map(_->nothing, args)...)
+    end
+
+    Z, back
+end
+
+Zygote.@adjoint function MapCols{d}(f::Function, M::Matrix, args...) where {d} # no dval!
+
+    @cast A[c]{r:d} := M[r,c]
+    dualcol = SVector(ntuple(j->ForwardDiff.Dual(0, ntuple(i->i==j ? 1 : 0, Val(d))...), Val(d)))
+
+    C = [ rvec(f(acol .+ dualcol, args...)) for acol in A ]
+
+    Z = reduce(hcat, [ ForwardDiff.value.(full) for full in C ])
+
+    function back(ΔZ)
+        ∇M = similar(data(M)) .+ zero(first(data(ΔZ)))
+        @inbounds for c=1:size(M,2)
+            part = ForwardDiff.partials.(C[c])
+            for r=1:d
+                ∇M[r,c] = 0
+                for i=1:size(ΔZ,1)
+                    ∇M[r,c] += data(ΔZ)[i,c] * part[i].values[r]
+                end
+            end
+        end
+        (nothing, ∇M, map(_->nothing, args)...) # changed!
+    end
+
+    Z, back
+end
+
+end # module
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -0,0 +1,9 @@
+
+using SliceMap
+using Test
+
+@testset "nothing" begin
+
+    @test true
+
+end