diff --git a/Project.toml b/Project.toml index 249f032ec..b6b218fc4 100644 --- a/Project.toml +++ b/Project.toml @@ -27,7 +27,7 @@ Compat = "3.7, 4" CompositionsBase = "0.1" Distances = "0.10.9" FillArrays = "0.10, 0.11, 0.12, 0.13, 1" -Functors = "0.1, 0.2, 0.3, 0.4" +Functors = "0.1, 0.2, 0.3, 0.4, 0.5" IrrationalConstants = "0.1, 0.2" LogExpFunctions = "0.2.1, 0.3" Requires = "1.0.1" diff --git a/examples/train-kernel-parameters/Project.toml b/examples/train-kernel-parameters/Project.toml index 7aee5a340..1d3750398 100644 --- a/examples/train-kernel-parameters/Project.toml +++ b/examples/train-kernel-parameters/Project.toml @@ -13,7 +13,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] BenchmarkTools = "1.2" Distributions = "0.25" -Flux = "0.12, 0.13, 0.14" +Flux = "0.13, 0.14" ForwardDiff = "0.10" KernelFunctions = "0.10" Literate = "2" diff --git a/examples/train-kernel-parameters/script.jl b/examples/train-kernel-parameters/script.jl index e60f9549e..a1087c978 100644 --- a/examples/train-kernel-parameters/script.jl +++ b/examples/train-kernel-parameters/script.jl @@ -1,9 +1,9 @@ # # Train Kernel Parameters -# Here we show a few ways to train (optimize) the kernel (hyper)parameters at the example of kernel-based regression using KernelFunctions.jl. -# All options are functionally identical, but differ a little in readability, dependencies, and computational cost. +# Here we show a few ways to train (optimize) the kernel (hyper)parameters at the example of kernel-based regression using KernelFunctions.jl. +# All options are functionally identical, but differ a little in readability, dependencies, and computational cost. -# We load KernelFunctions and some other packages. Note that while we use `Zygote` for automatic differentiation and `Flux.optimise` for optimization, you should be able to replace them with your favourite autodiff framework or optimizer. +# We load KernelFunctions and some other packages. Note that while we use `Zygote` for automatic differentiation and `Flux.optimise` for optimization, you should be able to replace them with your favourite autodiff framework or optimizer. using KernelFunctions using LinearAlgebra @@ -33,14 +33,14 @@ scatter(x_train, y_train; label="data") plot!(x_test, sinc; label="true function") # ## Manual Approach -# The first option is to rebuild the parametrized kernel from a vector of parameters -# in each evaluation of the cost function. This is similar to the approach taken in +# The first option is to rebuild the parametrized kernel from a vector of parameters +# in each evaluation of the cost function. This is similar to the approach taken in # [Stheno.jl](https://github.com/JuliaGaussianProcesses/Stheno.jl). # To train the kernel parameters via [Zygote.jl](https://github.com/FluxML/Zygote.jl), # we need to create a function creating a kernel from an array. # A simple way to ensure that the kernel parameters are positive -# is to optimize over the logarithm of the parameters. +# is to optimize over the logarithm of the parameters. function kernel_creator(θ) return (exp(θ[1]) * SqExponentialKernel() + exp(θ[2]) * Matern32Kernel()) ∘ @@ -59,7 +59,7 @@ end nothing #hide # Let's look at our prediction. -# With starting parameters `p0` (picked so we get the right local +# With starting parameters `p0` (picked so we get the right local # minimum for demonstration) we get: p0 = [1.1, 0.1, 0.01, 0.001] @@ -85,16 +85,16 @@ loss(θ) @benchmark let θ = log.(p0) - opt = Optimise.ADAGrad(0.5) + opt = Optimise.AdaGrad(0.5) grads = only((Zygote.gradient(loss, θ))) Optimise.update!(opt, θ, grads) end # ### Training the model -# Setting an initial value and initializing the optimizer: +# Setting an initial value and initializing the optimizer: θ = log.(p0) # Initial vector -opt = Optimise.ADAGrad(0.5) +opt = Optimise.AdaGrad(0.5) nothing #hide # Optimize @@ -119,10 +119,10 @@ nothing; #hide loss(θ) # ## Using ParameterHandling.jl -# Alternatively, we can use the [ParameterHandling.jl](https://github.com/invenia/ParameterHandling.jl) package -# to handle the requirement that all kernel parameters should be positive. -# The package also allows arbitrarily nesting named tuples that make the parameters -# more human readable, without having to remember their position in a flat vector. +# Alternatively, we can use the [ParameterHandling.jl](https://github.com/invenia/ParameterHandling.jl) package +# to handle the requirement that all kernel parameters should be positive. +# The package also allows arbitrarily nesting named tuples that make the parameters +# more human readable, without having to remember their position in a flat vector. using ParameterHandling @@ -133,7 +133,7 @@ raw_initial_θ = ( flat_θ, unflatten = ParameterHandling.value_flatten(raw_initial_θ) flat_θ #hide -# We define a few relevant functions and note that compared to the previous `kernel_creator` function, we do not need explicit `exp`s. +# We define a few relevant functions and note that compared to the previous `kernel_creator` function, we do not need explicit `exp`s. function kernel_creator(θ) return (θ.k1 * SqExponentialKernel() + θ.k2 * Matern32Kernel()) ∘ ScaleTransform(θ.k3) @@ -164,7 +164,7 @@ nothing #hide @benchmark let θ = flat_θ[:] - opt = Optimise.ADAGrad(0.5) + opt = Optimise.AdaGrad(0.5) grads = (Zygote.gradient(loss ∘ unflatten, θ))[1] Optimise.update!(opt, θ, grads) end @@ -173,7 +173,7 @@ end # Optimize -opt = Optimise.ADAGrad(0.5) +opt = Optimise.AdaGrad(0.5) for i in 1:15 grads = (Zygote.gradient(loss ∘ unflatten, flat_θ))[1] Optimise.update!(opt, flat_θ, grads) @@ -185,11 +185,11 @@ nothing #hide (loss ∘ unflatten)(flat_θ) # ## Flux.destructure -# If we don't want to write an explicit function to construct the kernel, we can alternatively use the `Flux.destructure` function. -# Again, we need to ensure that the parameters are positive. Note that the `exp` function is now part of the loss function, instead of part of the kernel construction. +# If we don't want to write an explicit function to construct the kernel, we can alternatively use the `Flux.destructure` function. +# Again, we need to ensure that the parameters are positive. Note that the `exp` function is now part of the loss function, instead of part of the kernel construction. -# We could also use ParameterHandling.jl here. -# To do so, one would remove the `exp`s from the loss function below and call `loss ∘ unflatten` as above. +# We could also use ParameterHandling.jl here. +# To do so, one would remove the `exp`s from the loss function below and call `loss ∘ unflatten` as above. θ = [1.1, 0.1, 0.01, 0.001] @@ -217,7 +217,7 @@ nothing #hide # Cost for one step -@benchmark let θt = θ[:], optt = Optimise.ADAGrad(0.5) +@benchmark let θt = θ[:], optt = Optimise.AdaGrad(0.5) grads = only((Zygote.gradient(loss, θt))) Optimise.update!(optt, θt, grads) end @@ -228,9 +228,9 @@ end θ = log.([1.1, 0.1, 0.01, 0.001]) # Initial vector loss(θ) -# Initialize optimizer +# Initialize optimizer -opt = Optimise.ADAGrad(0.5) +opt = Optimise.AdaGrad(0.5) nothing #hide # Optimize diff --git a/test/Project.toml b/test/Project.toml index 7ef690d1d..a12372998 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -28,7 +28,7 @@ Distances = "0.10" Documenter = "0.25, 0.26, 0.27" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10" -Functors = "0.2, 0.3, 0.4" +Functors = "0.2, 0.3, 0.4, 0.5" Kronecker = "0.4, 0.5" LogExpFunctions = "0.2, 0.3" PDMats = "0.9, 0.10, 0.11"