Add some tests using StaticArrays (#39)

mcabbott · ToucheSir · web-flow · commit 17daa1dad869 · 2022-01-29T20:04:15.000-05:00
* move many tests to a new file

* add one more simple test of all rules

* add a test with StaticArrays

* feature test for OptimiserChain which discovered a bug

* add overall testsets

* add some type promotion tests

* don't call every loss loss

* fixup

* also test non-array gradient types

* fix indenting, only whitespace

* Update test/rules.jl

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;
diff --git a/Project.toml b/Project.toml
@@ -15,7 +15,9 @@ julia = "1.6"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "Zygote"]
+test = ["Test", "ChainRulesCore", "StaticArrays", "Zygote"]
diff --git a/src/rules.jl b/src/rules.jl
@@ -16,7 +16,7 @@ Descent() = Descent(1f-1)
 init(o::Descent, x::AbstractArray) = nothing
 
 function apply!(o::Descent, state, x, dx)
-  η = convert(float(eltype(dx)), o.eta)
+  η = convert(float(eltype(x)), o.eta)
   
   return state, @.. dx * η
 end
@@ -477,7 +477,7 @@ ClipGrad() = ClipGrad(10f0)
 init(o::ClipGrad, x::AbstractArray) = nothing
 
 function apply!(o::ClipGrad, state, x, dx)
-  δ = convert(float(eltype(dx)), o.delta)
+  δ = convert(float(eltype(x)), o.delta)
   dx′ = @.. clamp(dx, -δ, δ)
 
   return state, dx′
diff --git a/test/rules.jl b/test/rules.jl
@@ -0,0 +1,156 @@
+using Optimisers
+using ChainRulesCore, Functors, StaticArrays, Zygote
+using LinearAlgebra, Statistics, Test, Random
+
+Random.seed!(1)
+
+RULES = [
+  # All the rules at default settings
+  Descent(), ADAM(), Momentum(), Nesterov(), RMSProp(),
+  ADAGrad(), AdaMax(), ADADelta(), AMSGrad(), NADAM(),
+  ADAMW(), RADAM(), OADAM(), AdaBelief(),
+  # A few chained combinations:
+  OptimiserChain(WeightDecay(), ADAM(0.001)),
+  OptimiserChain(ClipNorm(), ADAM(0.001)),
+  OptimiserChain(ClipGrad(0.5), Momentum()),
+]
+
+name(o) = typeof(o).name.name
+name(o::OptimiserChain) = join(name.(o.opts), " → ")
+
+@testset "independence" begin
+  @testset "$(name(o))" for o in RULES
+    w = randn(10, 10)
+    w′ = randn(10, 10)
+    iloss(x, w, w′) = mean((w*x .- w′*x) .^ 2)
+    @test iloss(rand(10, 10), w, w′) > 1
+    st = Optimisers.setup(o, w)
+    for t = 1:10^5
+      x = rand(10)
+      gs = gradient(w -> iloss(x, w, w′), w)
+      st, w = Optimisers.update!(st, w, gs...)
+    end
+    @test iloss(rand(10, 10), w, w′) < 0.01
+  end
+end
+
+@testset verbose=true "simple sum" begin
+  @testset "$(name(o))" for o in RULES
+    m = shuffle!(reshape(1:64, 8, 8) .+ 0.0)
+    s = Optimisers.setup(o, m)
+    for _ in 1:10^5
+      g = gradient(x -> sum(abs2, x + x'), m)[1]
+      s, m = Optimisers.update!(s, m, g)
+    end
+    # @test sum(m) < sum(1:64)
+    if sum(m) < 1
+      @test sum(m) < 1
+    else
+      @show name(o) sum(m)/sum(1:64)
+      @test_broken sum(m) < 1
+    end
+  end
+end
+
+@testset "original" begin
+  @testset "$(name(o))" for o in RULES
+    w′ = (α = rand(3, 3), β = rand(3, 3))
+    w = (α = 5rand(3, 3), β = rand(3, 3))
+    st = Optimisers.setup(o, w)
+    loss(x, y) = mean((x.α .* x.β .- y.α .* y.β) .^ 2)
+    @test loss(w, w′) > 1
+    for i = 1:10^4
+      gs = gradient(x -> loss(x, w′), w)
+      st, w = Optimisers.update(st, w, gs...)
+    end
+    lw = loss(w, w′)
+    if o isa ADADelta
+      @show name(o) loss(w, w′)
+      @test_broken lw < 0.001
+    else
+      @test lw < 0.001
+    end
+  end
+end
+
+@testset verbose=true "StaticArrays" begin
+  @testset "$(name(o))" for o in RULES
+    W1 = @SMatrix randn(10, 10)
+    b1 = @SVector randn(10)
+    W2 = @SMatrix randn(10, 10)
+    model = (; W1, b1, W2, tanh)
+    s_loss(m, x, y) = sum(abs2, m.W2 * (m.tanh).(m.W1*x .+ m.b1) .- y)
+    # x = @SMatrix randn(10, 10)
+    # y = @SMatrix randn(10, 10)  # gives an error from sum(; dims=())
+    x = @SVector randn(10)
+    y = @SVector randn(10)
+    @test s_loss(model, x, y) > 10
+    state = Optimisers.setup(o, model)
+    for t = 1:10^3
+      g = gradient(m -> s_loss(m, x, y), model)[1]
+      state, model = Optimisers.update!(state, model, g)
+    end
+    if o isa Union{Descent, RMSProp, ADAGrad, ADADelta, NADAM}
+      @show name(o) s_loss(model, x, y)
+      @test_broken s_loss(model, x, y) < 1
+    else
+      @test s_loss(model, x, y) < 1
+    end
+  end
+end
+
+@testset verbose=true "element types" begin
+  @testset "$(name(o))" for o in RULES
+    marray = (Float32[1,2], Float64[3,4], Float16[5,6])
+    types = map(eltype, marray)
+
+    # This is a weak test, as it copies & then does `update!`
+    uparray = Optimisers.update(Optimisers.setup(o, marray), marray, marray)[2]
+    @test map(eltype, uparray) == types
+
+    # Static version is truly out-of-place:
+    mstatic = (SA{Float32}[1,2], SA{Float64}[3,4]) # , SA{Float16}[5,6])  with Float16, all fail
+    upstatic = Optimisers.update(Optimisers.setup(o, mstatic), mstatic, mstatic)[2]
+    if o isa OptimiserChain && o.opts[2] isa ADAM  # These promote to Float64
+      @test_broken map(eltype, upstatic) == types[1:2]
+    else
+      @test map(eltype, upstatic) == types[1:2]
+    end
+    @test upstatic[1] isa SVector
+
+    # With ordinary Array gradient, what happens?
+    upstatic2 = Optimisers.update(Optimisers.setup(o, mstatic), mstatic, marray[1:2])[2]
+    # @test map(eltype, upstatic2) == types[1:2]  # same information
+    if upstatic2[1] isa SVector
+      @test upstatic2[1] isa SVector
+    else
+      @test_broken upstatic2[1] isa SVector
+    end
+  end
+end
+
+@testset "gradient types" begin
+  @testset "$(name(o))" for o in RULES
+    x = (a = ones(2,2), b = transpose(ones(2,2)))
+    s = Optimisers.setup(o, x)
+
+    _, x1 = Optimisers.update(s, x, (a = [1 2; 3 4], b = nothing))
+    @test x1.a != ones(2,2)
+    @test x1.b == ones(2,2)
+
+    _, xfill = Optimisers.update(s, x, (a = Zygote.Fill(2.0,2,2), b = Zygote.Fill(true,2,2)))
+    @test xfill.a != ones(2,2)
+    @test xfill.b != ones(2,2)
+
+    bc = Optimisers.@.. 1 + log([2 3; 4 5]) / 6
+    _, xbc = Optimisers.update(s, x, (a = bc, b = bc))
+    @test xbc.a != ones(2,2)
+    @test xbc.b != ones(2,2)
+
+    th = ChainRulesCore.@thunk @. 1 + log([2 3; 4 5]) / 6
+    _, xth = Optimisers.update(s, x, (a = bc, b = bc))
+    @test xth.a != ones(2,2)
+    @test xth.b != ones(2,2)
+  end
+end
+
diff --git a/test/runtests.jl b/test/runtests.jl