@@ -25,7 +25,8 @@ N = 50 # Number of samples
25
25
x_train = rand (Uniform (xmin, xmax), N) # We sample 100 random samples
26
26
σ = 0.1
27
27
y_train = sinc .(x_train) + randn (N) * σ # We create a function and add some noise
28
- x_test = range (xmin - 0.1 , xmax + 0.1 ; length= 300 );
28
+ x_test = range (xmin - 0.1 , xmax + 0.1 ; length= 300 )
29
+ nothing # hide
29
30
# Plot the data
30
31
31
32
# # scatter(x_train, y_train; lab="data")
@@ -47,7 +48,8 @@ x_test = range(xmin - 0.1, xmax + 0.1; length=300);
47
48
function kernelcall (θ)
48
49
return (exp (θ[1 ]) * SqExponentialKernel () + exp (θ[2 ]) * Matern32Kernel ()) ∘
49
50
ScaleTransform (exp (θ[3 ]))
50
- end ;
51
+ end
52
+ nothing # hide
51
53
52
54
# From theory we know the prediction for a test set x given
53
55
# the kernel parameters and normalization constant
@@ -56,28 +58,32 @@ function f(x, x_train, y_train, θ)
56
58
k = kernelcall (θ[1 : 3 ])
57
59
return kernelmatrix (k, x, x_train) *
58
60
((kernelmatrix (k, x_train) + exp (θ[4 ]) * I) \ y_train)
59
- end ;
61
+ end
62
+ nothing # hide
60
63
61
64
# We look how the prediction looks like
62
65
# with starting parameters [1.0, 1.0, 1.0, 1.0] we get :
63
66
64
- ŷ = f (x_test, x_train, y_train, log .(ones (4 )));
67
+ ŷ = f (x_test, x_train, y_train, log .(ones (4 )))
65
68
# # scatter(x_train, y_train; lab="data")
66
69
# # plot!(x_test, sinc; lab="true function")
67
70
# # plot!(x_test, ŷ; lab="prediction")
71
+ nothing # hide
68
72
69
73
# We define the loss based on the L2 norm both
70
74
# for the loss and the regularization
71
75
72
76
function loss (θ)
73
77
ŷ = f (x_train, x_train, y_train, θ)
74
78
return sum (abs2, y_train - ŷ) + exp (θ[4 ]) * norm (ŷ)
75
- end ;
79
+ end
80
+ nothing # hide
76
81
77
82
# ### Training
78
83
# Setting an initial value and initializing the optimizer:
79
84
θ = log .([1.1 , 0.1 , 0.01 , 0.001 ]) # Initial vector
80
- opt = Optimise. ADAGrad (0.5 );
85
+ opt = Optimise. ADAGrad (0.5 )
86
+ nothing # hide
81
87
82
88
# The loss with our starting point:
83
89
@@ -123,25 +129,30 @@ raw_initial_θ = (
123
129
noise_var= positive (0.001 ),
124
130
)
125
131
126
- flat_θ, unflatten = ParameterHandling. value_flatten (raw_initial_θ);
132
+ flat_θ, unflatten = ParameterHandling. value_flatten (raw_initial_θ)
133
+ nothing # hide
127
134
128
135
function kernelcall (θ)
129
136
return (θ. k1 * SqExponentialKernel () + θ. k2 * Matern32Kernel ()) ∘
130
137
ScaleTransform (θ. k3)
131
- end ;
138
+ end
139
+ nothing # hide
132
140
133
141
function f (x, x_train, y_train, θ)
134
142
k = kernelcall (θ)
135
143
return kernelmatrix (k, x, x_train) *
136
144
((kernelmatrix (k, x_train) + θ. noise_var * I) \ y_train)
137
- end ;
145
+ end
146
+ nothing # hide
138
147
139
148
function loss (θ)
140
149
ŷ = f (x_train, x_train, y_train, θ)
141
150
return sum (abs2, y_train - ŷ) + θ. noise_var * norm (ŷ)
142
- end ;
151
+ end
152
+ nothing # hide
143
153
144
- initial_θ = ParameterHandling. value (raw_initial_θ);
154
+ initial_θ = ParameterHandling. value (raw_initial_θ)
155
+ nothing # hide
145
156
146
157
# The loss with our starting point :
147
158
@@ -162,7 +173,8 @@ opt = Optimise.ADAGrad(0.5)
162
173
for i in 1 : 25
163
174
grads = (Zygote. gradient (loss ∘ unflatten, flat_θ))[1 ]
164
175
Optimise. update! (opt, flat_θ, grads)
165
- end ;
176
+ end
177
+ nothing # hide
166
178
167
179
# Final loss
168
180
@@ -188,7 +200,8 @@ function f(x, x_train, y_train, θ)
188
200
k = kernelc (θ[1 : 3 ])
189
201
return kernelmatrix (k, x, x_train) *
190
202
((kernelmatrix (k, x_train) + (θ[4 ]) * I) \ y_train)
191
- end ;
203
+ end
204
+ nothing # hide
192
205
193
206
194
207
# We define the loss based on the L2 norm both
197
210
function loss (θ)
198
211
ŷ = f (x_train, x_train, y_train, exp .(θ))
199
212
return sum (abs2, y_train - ŷ) + exp (θ[4 ]) * norm (ŷ)
200
- end ;
213
+ end
214
+ nothing # hide
201
215
202
216
# ## Training the model
203
217
221
235
for i in 1 : 25
222
236
grads = only ((Zygote. gradient (loss, θ))) # We compute the gradients given the kernel parameters and regularization
223
237
Optimise. update! (opt, θ, grads)
224
- end ;
238
+ end
239
+ nothing # hide
225
240
226
241
# Final loss
227
242
loss (θ)
0 commit comments