Added custom priors (constant and zero)

theogf · theogf · commit 29d9ef2a28c7 · 2019-05-08T14:35:55.000+02:00
diff --git a/src/AugmentedGaussianProcesses.jl b/src/AugmentedGaussianProcesses.jl
@@ -24,7 +24,7 @@ include("kernels/KernelModule.jl")
 include("kmeans/KMeansModule.jl")
 include("functions/PGSampler.jl")
 #include("functions/PerturbativeCorrection.jl")
-include("functions/GPAnalysisTools.jl")
+# include("functions/GPAnalysisTools.jl")
 # include("functions/IO_model.jl")
 #Custom modules
 using .KernelModule
@@ -66,6 +66,7 @@ abstract type Inference{T<:Real} end
 abstract type Likelihood{T<:Real}  end
 
 const LatentArray = Vector #For future optimization : How collection of latent GPs are stored
+include("prior/meanprior.jl")
 
 include("models/AbstractGP.jl")
 include("models/GP.jl")
diff --git a/src/autotuning.jl b/src/autotuning.jl
@@ -8,7 +8,7 @@ function  update_hyperparameters!(model::Union{VGP,GP})
 
     apply_gradients_lengthscale!.(model.kernel,grads_l) #Send the derivative of the matrix to the specific gradient of the model
     apply_gradients_variance!.(model.kernel,grads_v) #Send the derivative of the matrix to the specific gradient of the model
-    apply_gradients_mean_prior!.(model.μ₀,grads_μ₀,model.opt_μ₀)
+    update!.(model.μ₀,grads_μ₀)
 
     model.inference.HyperParametersUpdated = true
 end
@@ -33,7 +33,7 @@ function update_hyperparameters!(model::SVGP{<:Likelihood,<:Inference,T}) where
     grads_μ₀ = map(f_μ₀,1:model.nPrior)
     apply_gradients_lengthscale!.(model.kernel,grads_l)
     apply_gradients_variance!.(model.kernel,grads_v)
-    apply_gradients_mean_prior!.(model.μ₀,grads_μ₀,model.opt_μ₀)
+    update!.(model.μ₀,grads_μ₀)
     model.inference.HyperParametersUpdated = true
 end
 
diff --git a/src/models/GP.jl b/src/models/GP.jl
@@ -34,8 +34,7 @@ mutable struct GP{L<:Likelihood,I<:Inference,T<:Real,V<:AbstractVector{T}} <: Ab
     nLatent::Int64 # Number pf latent GPs
     IndependentPriors::Bool # Use of separate priors for each latent GP
     nPrior::Int64 # Equal to 1 or nLatent given IndependentPriors
-    μ₀::LatentArray{V}
-    opt_μ₀::LatentArray{Optimizer}
+    μ₀::LatentArray{MeanPrior{T}}
     Knn::LatentArray{Symmetric{T,Matrix{T}}}
     invKnn::LatentArray{Symmetric{T,Matrix{T}}}
     kernel::LatentArray{Kernel{T}}
@@ -49,7 +48,7 @@ end
 
 
 function GP(X::AbstractArray{T1,N1},y::AbstractArray{T2,N2},kernel::Union{Kernel,AbstractVector{<:Kernel}};  noise::Real=1e-5,
-            verbose::Integer=0,Autotuning::Bool=true,atfrequency::Integer=1,μ₀::AbstractVector{T1}=Vector{T1}(),
+            verbose::Integer=0,Autotuning::Bool=true,atfrequency::Integer=1,mean::Union{<:Real,AbstractVector{<:Real},MeanPrior}=ZeroMean(),
             IndependentPriors::Bool=true,ArrayType::UnionAll=Vector) where {T1<:Real,T2,N1,N2}
             likelihood = GaussianLikelihood(noise)
             inference = Analytic()
@@ -61,19 +60,21 @@ function GP(X::AbstractArray{T1,N1},y::AbstractArray{T2,N2},kernel::Union{Kernel
 
             Knn = LatentArray([Symmetric(Matrix{T1}(I,nFeature,nFeature)) for _ in 1:nPrior]);
             invKnn = copy(Knn)
-            if !isempty(μ₀) && length(μ₀) == nFeature
-                μ₀ = [μ₀ for _ in 1:nPrior]
+            μ₀ = []
+            if typeof(mean) <: Real
+                μ₀ = [ConstantMean(mean) for _ in 1:nPrior]
+            elseif typeof(mean) <: AbstractVector{<:Real}
+                μ₀ = [EmpiricalMean(mean) for _ in 1:nPrior]
             else
-                μ₀ = [zeros(T1,nFeature) for _ in 1:nPrior]
+                μ₀ = [mean for _ in 1:nPrior]
             end
-            opt_μ₀ = [Adam(α=0.1) for _ in 1:nPrior]
             likelihood = init_likelihood(likelihood,inference,nLatent,nSample)
             inference = init_inference(inference,nLatent,nSample,nSample,nSample)
 
             model = GP{GaussianLikelihood{T1},Analytic{T1},T1,ArrayType{T1}}(X,y,
                     nFeature, nDim, nFeature, nLatent,
                     IndependentPriors,nPrior,
-                    μ₀,opt_μ₀,Knn,invKnn,kernel,likelihood,inference,
+                    μ₀,Knn,invKnn,kernel,likelihood,inference,
                     verbose,Autotuning,atfrequency,false)
             computeMatrices!(model)
             model.Trained = true
diff --git a/src/models/SVGP.jl b/src/models/SVGP.jl
@@ -40,8 +40,7 @@ mutable struct SVGP{L<:Likelihood,I<:Inference,T<:Real,V<:AbstractVector{T}} <:
     Σ::LatentArray{Symmetric{T,Matrix{T}}}
     η₁::LatentArray{V}
     η₂::LatentArray{Symmetric{T,Matrix{T}}}
-    μ₀::LatentArray{V}
-    opt_μ₀::LatentArray{Optimizer}
+    μ₀::LatentArray{MeanPrior{T}}
     Kmm::LatentArray{Symmetric{T,Matrix{T}}}
     invKmm::LatentArray{Symmetric{T,Matrix{T}}}
     Knm::LatentArray{Matrix{T}}
@@ -60,7 +59,8 @@ end
 function SVGP(X::AbstractArray{T1},y::AbstractArray{T2},kernel::Union{Kernel,AbstractVector{<:Kernel}},
             likelihood::LikelihoodType,inference::InferenceType,
             nInducingPoints::Integer
-            ;verbose::Integer=0,Autotuning::Bool=true,atfrequency::Integer=1,μ₀::AbstractVector{T1}=Vector{T1}(),
+            ;verbose::Integer=0,Autotuning::Bool=true,atfrequency::Integer=1,
+            mean::Union{<:Real,AbstractVector{<:Real},MeanPrior}=ZeroMean(),
             IndependentPriors::Bool=true, OptimizeInducingPoints::Bool=false,ArrayType::UnionAll=Vector) where {T1<:Real,T2,LikelihoodType<:Likelihood,InferenceType<:Inference}
 
             X,y,nLatent,likelihood = check_data!(X,y,likelihood)
@@ -83,12 +83,14 @@ function SVGP(X::AbstractArray{T1},y::AbstractArray{T2},kernel::Union{Kernel,Abs
             Knm = deepcopy(κ)
             K̃ = LatentArray([zeros(T1,inference.Stochastic ? inference.nSamplesUsed : nSample) for _ in 1:nPrior])
             Kmm = LatentArray([similar(Σ[1]) for _ in 1:nPrior]); invKmm = similar.(Kmm)
-            if !isempty(μ₀) && length(μ₀) == nFeature
-                μ₀ = [μ₀ for _ in 1:nPrior]
+            μ₀ = []
+            if typeof(mean) <: Real
+                μ₀ = [ConstantMean(mean) for _ in 1:nPrior]
+            elseif typeof(mean) <: AbstractVector{<:Real}
+                μ₀ = [EmpiricalMean(mean) for _ in 1:nPrior]
             else
-                μ₀ = [zeros(T1,nFeature) for _ in 1:nPrior]
+                μ₀ = [mean for _ in 1:nPrior]
             end
-            opt_μ₀ = [Adam(α=1.0) for _ in 1:nPrior]
 
             nSamplesUsed = nSample
             if inference.Stochastic
@@ -97,7 +99,6 @@ function SVGP(X::AbstractArray{T1},y::AbstractArray{T2},kernel::Union{Kernel,Abs
                 opt = kernel[1].fields.variance.opt
                 opt.α = opt.α*0.1
                 setoptimizer!.(kernel,[copy(opt) for _ in 1:nLatent])
-                broadcast(opt->opt.α=opt.α*0.1,opt_μ₀)
             end
 
             likelihood = init_likelihood(likelihood,inference,nLatent,nSamplesUsed)
@@ -106,7 +107,7 @@ function SVGP(X::AbstractArray{T1},y::AbstractArray{T2},kernel::Union{Kernel,Abs
                     nSample, nDim, nFeature, nLatent,
                     IndependentPriors,nPrior,
                     Z,μ,Σ,η₁,η₂,
-                    μ₀,opt_μ₀,Kmm,invKmm,Knm,κ,K̃,
+                    μ₀,Kmm,invKmm,Knm,κ,K̃,
                     kernel,likelihood,inference,
                     verbose,Autotuning,atfrequency,OptimizeInducingPoints,false)
             if isa(inference.optimizer_η₁[1],ALRSVI)
diff --git a/src/models/VGP.jl b/src/models/VGP.jl
@@ -56,7 +56,7 @@ end
 
 function VGP(X::AbstractArray{T1,N1},y::AbstractArray{T2,N2},kernel::Union{Kernel,AbstractVector{<:Kernel}},
             likelihood::LikelihoodType,inference::InferenceType;
-            verbose::Integer=0,Autotuning::Bool=true,atfrequency::Integer=1,mean::Union{T,MeanPrior}=ConstantMean(0.0),
+            verbose::Integer=0,Autotuning::Bool=true,atfrequency::Integer=1,mean::Union{<:Real,AbstractVector{<:Real},MeanPrior}=ZeroMean(),
             IndependentPriors::Bool=true,ArrayType::UnionAll=Vector) where {T1<:Real,T2,N1,N2,LikelihoodType<:Likelihood,InferenceType<:Inference}
 
             X,y,nLatent,likelihood = check_data!(X,y,likelihood)
@@ -69,12 +69,14 @@ function VGP(X::AbstractArray{T1,N1},y::AbstractArray{T2,N2},kernel::Union{Kerne
             μ = LatentArray([zeros(T1,nFeature) for _ in 1:nLatent]); η₁ = deepcopy(μ)
             Σ = LatentArray([Symmetric(Matrix(Diagonal(one(T1)*I,nFeature))) for _ in 1:nLatent]);
             η₂ = -0.5*inv.(Σ);
+            μ₀ = []
             if typeof(mean) <: Real
-                mean = [ConstantMean(mean) for _ in 1:nPrior]
+                μ₀ = [ConstantMean(mean) for _ in 1:nPrior]
+            elseif typeof(mean) <: AbstractVector{<:Real}
+                μ₀ = [EmpiricalMean(mean) for _ in 1:nPrior]
             else
-                mean = [mean for _ in 1:nPrior]
+                μ₀ = [mean for _ in 1:nPrior]
             end
-
             Knn = LatentArray([deepcopy(Σ[1]) for _ in 1:nPrior]);
             invKnn = copy(Knn)
 
@@ -84,7 +86,7 @@ function VGP(X::AbstractArray{T1,N1},y::AbstractArray{T2,N2},kernel::Union{Kerne
             VGP{LikelihoodType,InferenceType,T1,ArrayType{T1}}(X,y,
                     nFeature, nDim, nFeature, nLatent,
                     IndependentPriors,nPrior,μ,Σ,η₁,η₂,
-                    mean,Knn,invKnn,kernel,likelihood,inference,
+                    μ₀,Knn,invKnn,kernel,likelihood,inference,
                     verbose,Autotuning,atfrequency,false)
 end
 
diff --git a/src/prior/constantmean.jl b/src/prior/constantmean.jl
@@ -13,19 +13,19 @@ function ConstantMean(c::T=1.0;opt::Optimizer=Adam(α=0.01)) where {T<:Real}
 end
 
 function update!(μ::ConstantMean{T},grad::AbstractVector{T}) where {T<:Real}
-    μ.C .+= update!(μ.opt,sum(grad))
+    μ.C += update!(μ.opt,sum(grad))
 end
 
-Base.+(x::Real,y::ConstantMean{<:Real}) = x+y.C
-Base.+(x::AbstractVector{<:Real},y::ConstantMean{<:Real}) = x.+y.C
-Base.+(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = y.+x.C
-Base.+(x::ConstantMean{<:Real},y::Real) = y+x.C
-Base.+(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = ConstantMean(x.C+y.C)
-Base.-(x::Real,y::ConstantMean) = x - y.C
-Base.-(x::AbstractVector{<:Real},y::ConstantMean) = x .- y.C
-Base.-(x::ConstantMean{<:Real},y::Real) = x.C - y
-Base.-(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = x.C .- y
-Base.-(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = ConstantMean(x.C-y.C)
-Base.*(A::AbstractMatrix{<:Real},y::ConstantMean{T}) where {T<:Real} = y.C*A*ones(T,size(A,2),1)
-Base.*(y::ConstantMean{T},A::AbstractMatrix{<:Real}) where {T<:Real} = y.C*ones(T,1,size(A,1))*A
-Base.convert(::T1,x::ConstantMean{T2}) where {T1<:Real,T2<:Real} = T1(x.C)
+Base.:+(x::Real,y::ConstantMean{<:Real}) = x+y.C
+Base.:+(x::AbstractVector{<:Real},y::ConstantMean{<:Real}) = x.+y.C
+Base.:+(x::ConstantMean{<:Real},y::Real) = y+x.C
+Base.:+(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = y.+x.C
+Base.:+(x::ConstantMean{<:Real},y::ConstantMean{<:Real}) = ConstantMean(x.C+y.C)
+Base.:-(x::Real,y::ConstantMean) = x - y.C
+Base.:-(x::AbstractVector{<:Real},y::ConstantMean) = x .- y.C
+Base.:-(x::ConstantMean{<:Real},y::Real) = x.C - y
+Base.:-(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = x.C .- y
+Base.:-(x::ConstantMean{<:Real},y::AbstractVector{<:Real}) = ConstantMean(x.C-y.C)
+Base.:*(A::AbstractMatrix{<:Real},y::ConstantMean{T}) where {T<:Real} = y.C*A*ones(T,size(A,2))
+Base.:*(y::ConstantMean{T},A::AbstractMatrix{<:Real}) where {T<:Real} = y.C*ones(T,1,size(A,1))*A
+Base.:convert(::T1,x::ConstantMean{T2}) where {T1<:Real,T2<:Real} = T1(x.C)
diff --git a/src/prior/empiricalmean.jl b/src/prior/empiricalmean.jl
@@ -0,0 +1,31 @@
+mutable struct EmpiricalMean{T<:Real,V<:AbstractVector{<:Real}} <: MeanPrior{T}
+    C::V
+    opt::Optimizer
+end
+
+"""
+EmpiricalMean(c)
+Construct a constant mean with values `c`
+Optionally give an optimizer `opt` (`Adam(α=0.01)` by default)
+"""
+function EmpiricalMean(c::V=1.0;opt::Optimizer=Adam(α=0.01)) where {V<:AbstractVector{<:Real}}
+    EmpiricalMean{eltype(c),V}(c,opt)
+end
+
+function update!(μ::EmpiricalMean{T},grad::AbstractVector{T}) where {T<:Real}
+    μ.C .+= update!(μ.opt,grad)
+end
+
+Base.:+(x::Real,y::EmpiricalMean{<:Real}) = x.+y.C
+Base.:+(x::AbstractVector{<:Real},y::EmpiricalMean{<:Real}) = x+y.C
+Base.:+(x::EmpiricalMean{<:Real},y::Real) = y.+x.C
+Base.:+(x::EmpiricalMean{<:Real},y::AbstractVector{<:Real}) = y+x.C
+Base.:+(x::EmpiricalMean{<:Real},y::EmpiricalMean{<:Real}) = EmpiricalMean(x.C+y.C)
+Base.:-(x::Real,y::EmpiricalMean) = x .- y.C
+Base.:-(x::AbstractVector{<:Real},y::EmpiricalMean) = x - y.C
+Base.:-(x::EmpiricalMean{<:Real},y::Real) = x.C .- y
+Base.:-(x::EmpiricalMean{<:Real},y::AbstractVector{<:Real}) = x.C - y
+Base.:-(x::EmpiricalMean{<:Real},y::EmpiricalMean{<:Real}) = EmpiricalMean(x.C-y.C)
+Base.:*(A::AbstractMatrix{<:Real},y::EmpiricalMean{T}) where {T<:Real} = A*y.C
+Base.:*(y::EmpiricalMean{T},A::AbstractMatrix{<:Real}) where {T<:Real} = transpose(y)*A
+Base.:convert(::T1,x::EmpiricalMean{T2}) where {T1<:Real,T2<:Real} = T1(x.C)
diff --git a/src/prior/meanprior.jl b/src/prior/meanprior.jl
@@ -3,3 +3,5 @@ abstract type MeanPrior{T} end
 import Base: +, -, *, convert
 
 include("constantmean.jl")
+include("zeromean.jl")
+include("empiricalmean.jl")
diff --git a/src/prior/zeromean.jl b/src/prior/zeromean.jl
@@ -0,0 +1,27 @@
+mutable struct ZeroMean{T<:Real} <: MeanPrior{T}
+end
+
+"""
+ZeroMean(c)
+Construct a constant mean with constant 0
+"""
+function ZeroMean()
+    ZeroMean{Float64}()
+end
+
+function update!(μ::ZeroMean{T},grad::AbstractVector{T}) where {T<:Real}
+end
+
+Base.:+(x::Real,y::ZeroMean{<:Real}) = x
+Base.:+(x::AbstractVector{<:Real},y::ZeroMean{<:Real}) = x
+Base.:+(x::ZeroMean{<:Real},y::Real) = y
+Base.:+(x::ZeroMean{<:Real},y::AbstractVector{<:Real}) = y
+Base.:+(x::ZeroMean{<:Real},y::ConstantMean{<:Real}) = ConstantMean(y.C)
+Base.:-(x::Real,y::ZeroMean{<:Real}) = x
+Base.:-(x::AbstractVector{<:Real},y::ZeroMean) = x
+Base.:-(x::ZeroMean{<:Real},y::Real) = -y
+Base.:-(x::ZeroMean{<:Real},y::AbstractVector{<:Real}) = -y
+Base.:-(x::ZeroMean{<:Real},y::ConstantMean{<:Real}) = ConstantMean(-y.C)
+Base.:*(A::AbstractMatrix{<:Real},y::ZeroMean{T}) where {T<:Real} = zeros(T,size(A,2))
+Base.:*(y::ZeroMean{T},A::AbstractMatrix{<:Real}) where {T<:Real} = zeros(T,1,size(A,1))
+Base.:convert(::T1,x::ZeroMean{T2}) where {T1<:Real,T2<:Real} = T1(x.C)