Skip to content

Commit 863866f

Browse files
authored
Replace first by only when needed and divide by 2 instead of multiplying by 0.5 (#112)
* Replace first by only when needed and divide by 2 instead of multiplying by 0.5 * More transitions from first to only * Fixed the quadrature * Use only for prior tests * Fixing logistic-softmax * Fix introduced issue with sampling * Fix GP predict * Fix max order heteroscedastic * Fix sampling * Fix issue with map on inference * Remove non-ubuntu workflows * Fix prediction MO* * Increase tolerance for heteroscedasticity
1 parent eb5eb7f commit 863866f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+188
-190
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ jobs:
2525
- 'nightly'
2626
os:
2727
- ubuntu-latest
28-
- macOS-latest
29-
- windows-latest
3028
arch:
3129
- x64
3230
steps:

docs/examples/heteroscedastic.jl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ using AugmentedGaussianProcesses
55
using Distributions
66
using LinearAlgebra
77
using Plots
8-
default(lw=3.0, msw=0.0)
8+
default(; lw=3.0, msw=0.0)
99
# using CairoMakie
1010

1111
# ## Model generated data
@@ -30,8 +30,8 @@ g = rand(MvNormal(μ₀ * ones(N), K))
3030
y = f + σ .* randn(N); # We finally sample the ouput
3131
# We can visualize the data:
3232
n_sig = 2 # Number of std. dev. around the mean
33-
plot(x, f, ribbon = n_sig * σ, lab= "p(y|f,g)") # Mean and std. dev. of y
34-
scatter!(x, y, alpha=0.5, msw=0.0, lab="y") # Observation samples
33+
plot(x, f; ribbon=n_sig * σ, lab="p(y|f,g)") # Mean and std. dev. of y
34+
scatter!(x, y; alpha=0.5, msw=0.0, lab="y") # Observation samples
3535

3636
# ## Model creation and training
3737
# We will now use the augmented model to infer both `f` and `g`
@@ -55,12 +55,18 @@ train!(model, 100);
5555
y_m, y_σ = proba_y(model, x_test);
5656
# Let's first look at the differece between the latent `f` and `g`
5757
plot(x, [f, g]; label=["f" "g"])
58-
plot!(x_test, [f_m, g_m]; ribbon=[n_sig * f_σ n_sig * g_σ], lab=["f_pred" "g_pred"], legend=true)
58+
plot!(
59+
x_test,
60+
[f_m, g_m];
61+
ribbon=[n_sig * f_σ n_sig * g_σ],
62+
lab=["f_pred" "g_pred"],
63+
legend=true,
64+
)
5965
# But it's more interesting to compare the predictive probability of `y` directly:
60-
plot(x, f; ribbon = n_sig * σ, lab="p(y|f,g)")
61-
plot!(x_test, y_m, ribbon = n_sig * sqrt.(y_σ), lab="p(y|f,g) pred")
66+
plot(x, f; ribbon=n_sig * σ, lab="p(y|f,g)")
67+
plot!(x_test, y_m; ribbon=n_sig * sqrt.(y_σ), lab="p(y|f,g) pred")
6268
scatter!(x, y; lab="y", alpha=0.2)
6369
# Or to explore the heteroscedasticity itself, we can look at the residuals
64-
scatter(x, (f - y).^2; yaxis=:log, lab="residuals",alpha=0.2)
70+
scatter(x, (f - y) .^ 2; yaxis=:log, lab="residuals", alpha=0.2)
6571
plot!(x, σ .^ 2; lab="true σ²(x)")
6672
plot!(x_test, y_σ; lab="predicted σ²(x)")

src/ComplementaryDistributions/lap_transf_dist.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ end
2525
function _check_f(f)
2626
return true # TODO Add tests for complete monotonicity / PDR
2727
end
28-
_gradf(d::LaplaceTransformDistribution, x::Real) = first(ForwardDiff.gradient(d.f, [x]))
28+
_gradf(d::LaplaceTransformDistribution, x::Real) = only(ForwardDiff.gradient(d.f, [x]))
2929
function _gradlogf(d::LaplaceTransformDistribution, x::Real)
30-
return first(ForwardDiff.gradient(log d.f, [x]))
30+
return only(ForwardDiff.gradient(log d.f, [x]))
3131
end
3232
function _hessianlogf(d::LaplaceTransformDistribution, x::Real)
33-
return first(ForwardDiff.hessian(log d.f, [x]))
33+
return only(ForwardDiff.hessian(log d.f, [x]))
3434
end
3535

3636
Distributions.pdf(dist::LaplaceTransformDistribution, x::Real) = apply_f(dist, x)
@@ -42,7 +42,7 @@ function Distributions.var(dist::LaplaceTransformDistribution)
4242
end
4343

4444
function Random.rand(dist::LaplaceTransformDistribution)
45-
return first(rand(dist, 1))
45+
return only(rand(dist, 1))
4646
end
4747

4848
# Sampling from Ridout (09)
@@ -173,7 +173,7 @@ function laptrans(
173173
k += 1
174174
t = t - (apply_F(dist, t) - u[i]) / apply_f(dist, t)
175175
if t < x_L || t > x_U
176-
t = 0.5 * (x_L + x_U)
176+
t = (x_L + x_U) / 2
177177
end
178178
if apply_F(dist, t) <= u[i]
179179
x_L = t

src/ComplementaryDistributions/polyagamma.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ struct PolyaGamma{Tc,A} <: Distributions.ContinuousUnivariateDistribution
3434
end
3535
end
3636

37-
Statistics.mean(d::PolyaGamma) = d.b / (2 * d.c) * tanh(0.5 * d.c)
37+
Statistics.mean(d::PolyaGamma) = d.b / (2 * d.c) * tanh(d.c / 2)
3838

3939
function PolyaGamma(b::Int, c::T; nmax::Int=10, trunc::Int=200) where {T<:Real}
4040
return PolyaGamma{T}(b, c, trunc, nmax)
@@ -59,17 +59,17 @@ end
5959
function a(n::Int, x::Real)
6060
k = (n + 0.5) * π
6161
if x > __TRUNC
62-
return k * exp(-0.5 * k^2 * x)
62+
return k * exp(-k^2 * x / 2)
6363
elseif x > 0
64-
expnt = -1.5 * (log(0.5 * π) + log(x)) + log(k) - 2.0 * (n + 0.5)^2 / x
64+
expnt = -1.5 * (log(π / 2) + log(x)) + log(k) - 2 * (n + 0.5)^2 / x
6565
return exp(expnt)
6666
end
6767
end
6868

6969
function mass_texpon(z::Real)
7070
t = __TRUNC
7171

72-
fz = 0.125 * π^2 + 0.5 * z^2
72+
fz = 0.125 * π^2 + z^2 / 2
7373
b = sqrt(1.0 / t) * (t * z - 1)
7474
a = sqrt(1.0 / t) * (t * z + 1) * -1.0
7575

@@ -99,13 +99,13 @@ function rtigauss(rng::AbstractRNG, z::Real)
9999
end
100100
x = 1 + e1 * t
101101
x = t / x^2
102-
alpha = exp(-0.5 * z^2 * x)
102+
alpha = exp(-z^2 * x / 2)
103103
end
104104
else
105105
mu = 1.0 / z
106106
while (x > t)
107107
y = randn(rng)^2
108-
half_mu = 0.5 * mu
108+
half_mu = mu / 2
109109
mu_Y = mu * y
110110
x = mu + half_mu * mu_Y - half_mu * sqrt(4 * mu_Y + mu_Y^2)
111111
if rand(rng) > mu / (mu + x)
@@ -122,10 +122,10 @@ end
122122

123123
function draw_like_devroye(rng::AbstractRNG, c::Real)
124124
# Change the parameter.
125-
c = 0.5 * abs(c)
125+
c = abs(c) / 2
126126

127127
# Now sample 0.25 * J^*(1, Z := Z/2).
128-
fz = 0.125 * π^2 + 0.5 * c^2
128+
fz = 0.125 * π^2 + c^2 / 2
129129
# ... Problems with large Z? Try using q_over_p.
130130
# double p = 0.5 * __PI * exp(-1.0 * fz * __TRUNC) / fz;
131131
# double q = 2 * exp(-1.0 * Z) * pigauss(__TRUNC, Z);

src/functions/KLdivergences.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ function GaussianKL(
1414
Σ::Symmetric{T,Matrix{T}},
1515
K::Cholesky{T,Matrix{T}},
1616
) where {T<:Real}
17-
return 0.5 * (logdet(K) - logdet(Σ) + tr(K \ Σ) + invquad(K, μ - μ₀) - length(μ))
17+
return (logdet(K) - logdet(Σ) + tr(K \ Σ) + invquad(K, μ - μ₀) - length(μ)) / 2
1818
end
1919

2020
function GaussianKL(
@@ -24,7 +24,7 @@ function GaussianKL(
2424
K::AbstractMatrix{T},
2525
) where {T<:Real}
2626
K
27-
return 0.5 * (logdet(K) - logdet(Σ) + tr(K \ Σ) + dot(μ - μ₀, K \- μ₀)) - length(μ))
27+
return (logdet(K) - logdet(Σ) + tr(K \ Σ) + dot(μ - μ₀, K \- μ₀)) - length(μ)) / 2
2828
end
2929

3030
extraKL(::AbstractGPModel{T}, ::Any) where {T} = zero(T)
@@ -42,13 +42,13 @@ function extraKL(model::OnlineSVGP{T}, state) where {T}
4242
κₐμ = kernel_mat.κₐ * mean(gp)
4343
KLₐ = prev_gp.prev𝓛ₐ
4444
KLₐ +=
45-
-0.5 * sum(
45+
-sum(
4646
trace_ABt.(
4747
Ref(prev_gp.invDₐ),
4848
[kernel_mat.K̃ₐ, kernel_mat.κₐ * cov(gp) * transpose(kernel_mat.κₐ)],
4949
),
50-
)
51-
KLₐ += dot(prev_gp.prevη₁, κₐμ) - 0.5 * dot(κₐμ, prev_gp.invDₐ * κₐμ)
50+
) / 2
51+
KLₐ += dot(prev_gp.prevη₁, κₐμ) - dot(κₐμ, prev_gp.invDₐ * κₐμ) / 2
5252
return KLₐ
5353
end
5454
end
@@ -94,7 +94,7 @@ end
9494
KL(q(ω)||p(ω)), where q(ω) = PG(b,c) and p(ω) = PG(b,0). θ = 𝑬[ω]
9595
"""
9696
function PolyaGammaKL(b, c, θ)
97-
return dot(b, logcosh.(0.5 * c)) - 0.5 * dot(abs2.(c), θ)
97+
return dot(b, logcosh.(c / 2)) - dot(abs2.(c), θ) / 2
9898
end
9999

100100
"""
@@ -104,10 +104,10 @@ Entropy of GIG variables with parameters a,b and p and omitting the derivative d
104104
"""
105105
function GIGEntropy(a, b, p)
106106
sqrt_ab = sqrt.(a .* b)
107-
return 0.5 * (sum(log, a) - sum(log, b)) +
107+
return (sum(log, a) - sum(log, b)) / 2 +
108108
mapreduce((p, s) -> log(2 * besselk(p, s)), +, p, sqrt_ab) +
109109
sum(
110-
0.5 * sqrt_ab ./ besselk.(p, sqrt_ab) .*
110+
sqrt_ab ./ besselk.(p, sqrt_ab) .*
111111
(besselk.(p + 1, sqrt_ab) + besselk.(p - 1, sqrt_ab)),
112-
)
112+
) / 2
113113
end

src/hyperparameter/autotuning.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ end
226226

227227
## Return the derivative of the KL divergence between the posterior and the GP prior ##
228228
function hyperparameter_KL_gradient(J::AbstractMatrix, A::AbstractMatrix)
229-
return 0.5 * trace_ABt(J, A)
229+
return trace_ABt(J, A) / 2
230230
end
231231

232232
function hyperparameter_gradient_function(gp::LatentGP{T}, ::AbstractVector) where {T}
@@ -376,7 +376,7 @@ function hyperparameter_online_gradient(
376376
)
377377
ιₐ = (Jab - gp.κₐ * Jmm) / pr_cov(gp)
378378
trace_term =
379-
-0.5 * sum(
379+
-sum(
380380
trace_ABt.(
381381
[gp.invDₐ],
382382
[
@@ -386,7 +386,7 @@ function hyperparameter_online_gradient(
386386
-gp.κₐ * transpose(Jab),
387387
],
388388
),
389-
)
389+
) / 2
390390
term_1 = dot(gp.prevη₁, ιₐ * mean(gp))
391391
term_2 = -dot(ιₐ * mean(gp), gp.invDₐ * gp.κₐ * mean(gp))
392392
return trace_term + term_1 + term_2

src/inference/analytic.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ function analytic_updates(m::GP{T}, state, y) where {T}
3737
f = getf(m)
3838
l = likelihood(m)
3939
K = state.kernel_matrices.K
40-
f.post.Σ = K + first(l.σ²) * I
40+
f.post.Σ = K + only(l.σ²) * I
4141
f.post.α .= cov(f) \ (y - pr_mean(f, input(m.data)))
4242
if !isnothing(l.opt_noise)
43-
g = 0.5 * (norm(mean(f), 2) - tr(inv(cov(f))))
43+
g = (norm(mean(f), 2) - tr(inv(cov(f)))) / 2
4444
state_σ², Δlogσ² = Optimisers.apply(
4545
l.opt_noise, state.local_vars.state_σ², l.σ², g .* l.σ²
4646
)

src/inference/analyticVI.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ function local_updates!(
119119
μ::Tuple{<:AbstractVector{T}},
120120
diagΣ::Tuple{<:AbstractVector{T}},
121121
) where {T}
122-
return local_updates!(local_vars, l, y, first(μ), first(diagΣ))
122+
return local_updates!(local_vars, l, y, only(μ), only(diagΣ))
123123
end
124124

125125
# Coordinate ascent updates on the natural parameters ##
@@ -135,7 +135,7 @@ function natural_gradient!(
135135
) where {T}
136136
K = kernel_matrices.K
137137
gp.post.η₁ .= ∇E_μ .+ K \ pr_mean(gp, X)
138-
gp.post.η₂ .= -Symmetric(Diagonal(∇E_Σ) + 0.5 * inv(K))
138+
gp.post.η₂ .= -Symmetric(Diagonal(∇E_Σ) + inv(K) / 2)
139139
return gp
140140
end
141141

@@ -168,15 +168,15 @@ function ∇η₁(
168168
return transpose(κ) ** ∇μ) + (K \ μ₀) - η₁
169169
end
170170

171-
# Gradient of on the second natural parameter η₂ = -0.5Σ⁻¹
171+
# Gradient of on the second natural parameter η₂ = -1/2Σ⁻¹
172172
function ∇η₂(
173173
θ::AbstractVector{T},
174174
ρ::Real,
175175
κ::AbstractMatrix{<:Real},
176176
K::Cholesky{T,Matrix{T}},
177177
η₂::Symmetric{T,Matrix{T}},
178178
) where {T<:Real}
179-
return -(ρκdiagθκ(ρ, κ, θ) + 0.5 * inv(K)) - η₂
179+
return -(ρκdiagθκ(ρ, κ, θ) + inv(K) / 2) - η₂
180180
end
181181

182182
# Natural gradient for the ONLINE model (OSVGP) #
@@ -198,7 +198,7 @@ function natural_gradient!(
198198
invDₐ = previous_gp.invDₐ
199199
gp.post.η₁ = K \ pr_mean(gp, Z) + transpose(κ) * ∇E_μ + transpose(κₐ) * prevη₁
200200
gp.post.η₂ =
201-
-Symmetric(ρκdiagθκ(1.0, κ, ∇E_Σ) + 0.5 * transpose(κₐ) * invDₐ * κₐ + 0.5 * inv(K))
201+
-Symmetric(ρκdiagθκ(1.0, κ, ∇E_Σ) + transpose(κₐ) * invDₐ * κₐ / 2 + inv(K) / 2)
202202
return gp
203203
end
204204

@@ -304,5 +304,5 @@ function expec_loglikelihood(
304304
diagΣ::Tuple{<:AbstractVector{T}},
305305
state,
306306
) where {T}
307-
return expec_loglikelihood(l, i, y, first(μ), first(diagΣ), state)
307+
return expec_loglikelihood(l, i, y, only(μ), only(diagΣ), state)
308308
end

src/inference/gibbssampling.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ end
4444
function sample_local!(
4545
local_vars, l::AbstractLikelihood, y, f::Tuple{<:AbstractVector{T}}
4646
) where {T}
47-
return sample_local!(local_vars, l, y, first(f))
47+
return sample_local!(local_vars, l, y, only(f))
4848
end
4949

5050
function sample_global!(

src/inference/inference.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ post_process!(::AbstractGPModel) = nothing
1111

1212
# Utils to iterate over inference objects
1313
Base.length(::AbstractInference) = 1
14+
Base.size(::AbstractInference) = (1,)
1415

1516
Base.iterate(i::AbstractInference) = (i, nothing)
1617
Base.iterate(::AbstractInference, ::Any) = nothing
@@ -22,13 +23,13 @@ conv_crit(i::AbstractInference) = i.ϵ
2223

2324
## Conversion from natural to standard distribution parameters ##
2425
function global_update!(gp::AbstractLatent)
25-
gp.post.Σ .= -0.5 * inv(nat2(gp))
26+
gp.post.Σ .= -inv(nat2(gp)) / 2
2627
return gp.post.μ .= cov(gp) * nat1(gp)
2728
end
2829

2930
## For the online case, the size may vary and inplace updates are note valid
3031
function global_update!(gp::OnlineVarLatent)
31-
gp.post.Σ = -0.5 * inv(nat2(gp))
32+
gp.post.Σ = -inv(nat2(gp)) / 2
3233
return gp.post.μ = cov(gp) * nat1(gp)
3334
end
3435

0 commit comments

Comments
 (0)