1
1
# # Kernel Ridge Regression
2
2
3
+ # In this example we show the two main methods to perform regression on a kernel from KernelFunctions.jl.
4
+
3
5
# ## We load KernelFunctions and some other packages
4
6
5
7
using KernelFunctions
@@ -11,6 +13,7 @@ using Flux: Optimise
11
13
using ForwardDiff
12
14
using Random: seed!
13
15
seed! (42 )
16
+ using ParameterHandling
14
17
15
18
# ## Data Generation
16
19
# We generated data in 1 dimension
@@ -28,9 +31,18 @@ x_test = range(xmin - 0.1, xmax + 0.1; length=300)
28
31
scatter (x_train, y_train; lab= " data" )
29
32
plot! (x_test, sinc; lab= " true function" )
30
33
31
- # ## Kernel training
34
+ # ## Kernel training, Method 1
35
+ # The first method is to rebuild the parametrized kernel from a vector of parameters
36
+ # in each evaluation of the cost fuction. This is similar to the approach taken in
37
+ # [Stheno.jl](https://github.com/JuliaGaussianProcesses/Stheno.jl).
38
+
39
+
40
+ # ### Simplest Approach
41
+ # A simple way to ensure that the kernel parameters are positive
42
+ # is to optimize over the logarithm of the parameters.
43
+
32
44
# To train the kernel parameters via ForwardDiff.jl
33
- # we need to create a function creating a kernel from an array
45
+ # we need to create a function creating a kernel from an array.
34
46
35
47
function kernelcall (θ)
36
48
return (exp (θ[1 ]) * SqExponentialKernel () + exp (θ[2 ]) * Matern32Kernel ()) ∘
@@ -54,6 +66,7 @@ scatter(x_train, y_train; lab="data")
54
66
plot! (x_test, sinc; lab= " true function" )
55
67
plot! (x_test, ŷ; lab= " prediction" )
56
68
69
+
57
70
# We define the loss based on the L2 norm both
58
71
# for the loss and the regularization
59
72
@@ -68,11 +81,13 @@ loss(log.(ones(4)))
68
81
69
82
# ## Training the model
70
83
71
- θ = log .([1.0 , 0.1 , 0.01 , 0.001 ]) # Initial vector
84
+ θ = log .([1.1 , 0.1 , 0.01 , 0.001 ]) # Initial vector
85
+ # θ = positive.([1.1, 0.1, 0.01, 0.001])
72
86
anim = Animation ()
73
87
opt = Optimise. ADAGrad (0.5 )
74
88
for i in 1 : 30
75
- grads = ForwardDiff. gradient (loss, θ) # We compute the gradients given the kernel parameters and regularization
89
+ println (i)
90
+ grads = only (Zygote. gradient (loss, θ)) # We compute the gradients given the kernel parameters and regularization
76
91
Optimise. update! (opt, θ, grads)
77
92
scatter (
78
93
x_train, y_train; lab= " data" , title= " i = $(i) , Loss = $(round (loss (θ), digits = 4 )) "
@@ -82,3 +97,54 @@ for i in 1:30
82
97
frame (anim)
83
98
end
84
99
gif (anim)
100
+
101
+ # ### ParameterHandling.jl
102
+ # Alternatively, we can use the [ParameterHandling.jl](https://github.com/invenia/ParameterHandling.jl) package.
103
+
104
+ raw_initial_θ = (
105
+ k1 = positive (1.1 ),
106
+ k2 = positive (0.1 ),
107
+ k3 = positive (0.01 ),
108
+ noise_var= positive (0.001 ),
109
+ )
110
+
111
+ flat_θ, unflatten = ParameterHandling. value_flatten (raw_initial_θ);
112
+
113
+ function kernelcall (θ)
114
+ return (θ. k1 * SqExponentialKernel () + θ. k2 * Matern32Kernel ()) ∘
115
+ ScaleTransform (θ. k3)
116
+ end
117
+
118
+ function f (x, x_train, y_train, θ)
119
+ k = kernelcall (θ)
120
+ return kernelmatrix (k, x, x_train) *
121
+ ((kernelmatrix (k, x_train) + θ. noise_var * I) \ y_train)
122
+ end
123
+
124
+ function loss (θ)
125
+ ŷ = f (x_train, x_train, y_train, θ)
126
+ return sum (abs2, y_train - ŷ) + θ. noise_var * norm (ŷ)
127
+ end
128
+
129
+ initial_θ = ParameterHandling. value (raw_initial_θ)
130
+
131
+ # The loss with our starting point :
132
+
133
+ loss (initial_θ)
134
+
135
+ # ## Training the model
136
+
137
+ anim = Animation ()
138
+ opt = Optimise. ADAGrad (0.5 )
139
+ for i in 1 : 30
140
+ println (i)
141
+ grads = only (Zygote. gradient (loss ∘ unflatten, flat_θ)) # We compute the gradients given the kernel parameters and regularization
142
+ Optimise. update! (opt, flat_θ, grads)
143
+ scatter (
144
+ x_train, y_train; lab= " data" , title= " i = $(i) , Loss = $(round ((loss ∘ unflatten)(flat_θ), digits = 4 )) "
145
+ )
146
+ plot! (x_test, sinc; lab= " true function" )
147
+ plot! (x_test, f (x_test, x_train, y_train, unflatten (flat_θ)); lab= " Prediction" , lw= 3.0 )
148
+ frame (anim)
149
+ end
150
+ gif (anim)
0 commit comments