Trivial cases of OptimiserChain (#43)

mcabbott · web-flow · commit c73dea75b5f9 · 2022-01-29T23:17:52.000-05:00
* trivial cases of OptimiserChain

* and the missing show method

* test a nested case too

* always return an OptimiserChain

* rm all optimisations?

* change to show made on the website and forgotten

* doc comment
diff --git a/src/rules.jl b/src/rules.jl
@@ -516,10 +516,27 @@ end
 """
     OptimiserChain(opts...)
 
-Compose a chain (sequence) of optimisers so that each `opt` in `opts`
-updates the gradient in the order specified.
+Compose a sequence of optimisers so that each `opt` in `opts`
+updates the gradient, in the order specified.
+
+With an empty sequence, `OptimiserChain()` is the identity,
+so `update!` will subtract the full gradient from the parameters.
+This is equivalent to `Descent(1)`.
+
+# Example
+```jldoctest
+julia> o = OptimiserChain(ClipGrad(1), Descent(0.1));
+
+julia> m = ([0,0,0],);
+
+julia> s = Optimisers.setup(o, m)
+(Leaf(OptimiserChain(ClipGrad{Int64}(1), Descent{Float64}(0.1)), [nothing, nothing]),)
+
+julia> Optimisers.update(s, m, ([0.3, 1, 7],))[2]  # clips before discounting
+([-0.03, -0.1, -0.1],)
+```
 """
-struct OptimiserChain{O}
+struct OptimiserChain{O<:Tuple}
   opts::O
 end
 OptimiserChain(opts...) = OptimiserChain(opts)
@@ -534,3 +551,9 @@ function apply!(o::OptimiserChain, states, x, dx, dxs...)
 
   return new_states, dx
 end
+
+function Base.show(io::IO, c::OptimiserChain)
+  print(io, "OptimiserChain(")
+  join(io, c.opts, ", ")
+  print(io, ")")
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -65,8 +65,18 @@ Optimisers.trainable(x::TwoThirds) = (a = x.a,)
       o2 = OptimiserChain(ClipGrad(2), WeightDecay(0.1))
       @test Optimisers.update(Optimisers.setup(o2, x), x, dx)[2] ≈ [1-0.1-1, 10-1-2, 100-10-2]
 
-      o2r = OptimiserChain(WeightDecay(0.1), ClipGrad(2))
+      o2n = OptimiserChain(OptimiserChain(ClipGrad(2), WeightDecay(0.1)))  # nested
+      @test Optimisers.update(Optimisers.setup(o2n, x), x, dx)[2] ≈ [1-0.1-1, 10-1-2, 100-10-2]
+
+      o2r = OptimiserChain(WeightDecay(0.1), ClipGrad(2))  # reversed
       @test Optimisers.update(Optimisers.setup(o2r, x), x, dx)[2] != [1-0.1-1, 10-2, 100-2]
+
+      # Trivial cases
+      o1 = OptimiserChain(Descent(0.1))
+      @test Optimisers.update(Optimisers.setup(o1, x), x, dx)[2] ≈ [0.9, 9.8, 99.7]
+
+      o0 = OptimiserChain()
+      @test Optimisers.update(Optimisers.setup(o0, x), x, dx)[2] ≈ [1-1,10-2,100-3]
     end
 
     @testset "trainable subset" begin