From 888d752de3a679ca21f5dca85b00c28d33602f52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Fri, 16 Apr 2021 14:48:23 +0200 Subject: [PATCH 01/18] Correction of the docstring I was checking the docstrings and the one for the `spectral_mixture_kernel` is particularly wrong I think. I double checked with the first reference paper as well as the implementation and changed the formula/description of the arguments --- src/basekernels/sm.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 833945295..c2a42961f 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -6,17 +6,17 @@ ωs::AbstractMatrix{<:Real}, ) -where αs are the weights of dimension (A, ), γs is the covariance matrix of -dimension (D, A) and ωs are the mean vectors and is of dimension (D, A). -Here, D is input dimension and A is the number of spectral components. +where `αs` are the weight vector of dimension `A`, `γs` is the sqrt of the covariance matrix of +dimension `(D, A)` and `ωs` are the concatenated mean vectors of dimension (D, A). +Here, `D` is input dimension and `A` is the number of spectral components. -`h` is the kernel, which defaults to [`SqExponentialKernel`](@ref) if not specified. +`h` is the stationary kernel, which defaults to [`SqExponentialKernel`](@ref) if not specified. -Generalised Spectral Mixture kernel function. This family of functions is dense +Generalised Spectral Mixture kernel function. This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math - κ(x, y) = αs' (h(-(γs' * t)^2) .* cos(π * ωs' * t), t = x - y + κ(x, y) = \sum_k \alpha_k^\top (h(γ_k x, γ_k y) \cos(π \cdot ω_k \odot (x-y)), ``` # References: From 31adb2b25c18973ca8d2f086fcbd110304fea741 Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Sun, 25 Jul 2021 20:00:05 +0200 Subject: [PATCH 02/18] Fixes documentations --- src/basekernels/sm.jl | 61 ++++++++++++++++++++++++-------- src/transform/lineartransform.jl | 4 +++ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index c2a42961f..92333d102 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -1,12 +1,25 @@ """ spectral_mixture_kernel( h::Kernel=SqExponentialKernel(), - αs::AbstractVector{<:Real}, - γs::AbstractMatrix{<:Real}, - ωs::AbstractMatrix{<:Real}, + α::AbstractVector{<:Real}, + γ::AbstractMatrix{<:Real}, + ω::AbstractMatrix{<:Real}, + ) + spectral_mixture_kernel( + h::Kernel=SqExponentialKernel(), + α::AbstractVector{<:Real}, + γ::AbstractVector{<:AbstractVecorMat{<:Real}}, + ω::AbstractVector{<:AbstractVecorMat{<:Real}}, ) -where `αs` are the weight vector of dimension `A`, `γs` is the sqrt of the covariance matrix of +Given `A` the number of mixture components and `D` the dimension of the inputs: + +## Arguments +- `h`: Stationary kernel (translation invariant), `SqExponentialKernel` by default +- `α`: Weight vector of each mixture component (`length(α)==A`) +- `γ`: Linear transformation of the input for `h`. `γ` can be an `AbstractMatrix` or +- `ω`: Linear transformation +where `α` are the weight vector of dimension `A`, `γs` is the sqrt of the covariance matrix of dimension `(D, A)` and `ωs` are the concatenated mean vectors of dimension (D, A). Here, `D` is input dimension and `A` is the number of spectral components. @@ -16,7 +29,7 @@ Generalised Spectral Mixture kernel function. This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math - κ(x, y) = \sum_k \alpha_k^\top (h(γ_k x, γ_k y) \cos(π \cdot ω_k \odot (x-y)), + κ(x, y) = \sum_k \alpha_k^\top (h(γ_k \odot x, γ_k \odot y) \cos(π \cdot ω_k^\top (x-y)), ``` # References: @@ -31,24 +44,42 @@ in the family of stationary real-valued kernels with respect to the pointwise co """ function spectral_mixture_kernel( h::Kernel, - αs::AbstractVector{<:Real}, - γs::AbstractMatrix{<:Real}, - ωs::AbstractMatrix{<:Real}, + α::AbstractVector{<:Real}, + γ::AbstractMatrix{<:Real}, + ω::AbstractMatrix{<:Real}, ) - if !(size(αs, 1) == size(γs, 2) == size(ωs, 2)) - throw(DimensionMismatch("The dimensions of αs, γs, ans ωs do not match")) + if !(size(α, 1) == size(γ, 2) == size(ω, 2)) + throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) end - if size(γs) != size(ωs) - throw(DimensionMismatch("The dimensions of γs ans ωs do not match")) + if size(γ, 1) != size(ω, 1) + throw(DimensionMismatch("The dimensions of γ ans ω do not match")) end - return sum(zip(αs, eachcol(γs), eachcol(ωs))) do (α, γ, ω) - a = TransformedKernel(h, LinearTransform(γ')) - b = TransformedKernel(CosineKernel(), LinearTransform(ω')) + return sum(zip(α, eachcol(γ), eachcol(ω))) do (α, γ, ω) + a = TransformedKernel(h, ARDTransform(γ)) + b = TransformedKernel(CosineKernel(), ARDTransform(ω)) return α * a * b end end +function spectral_mixture_kernel( + h::Kernel, + α::AbstractVector{<:Real}, + γ::AbstractVector{<:AbstractVector}, + ω::AbstractVector{<:AbstractVector}, +) + if !(length(α) == length(γ) == length(ω)) + throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) + end + + return sum(zip(α, γ, ω)) do (αk, γk, ωk) + a = TransformedKernel(h, ARDTransform(γk)) + b = TransformedKernel(CosineKernel(), ARDTransform(ωk)) + return αk * a * b + end +end + + function spectral_mixture_kernel( αs::AbstractVector{<:Real}, γs::AbstractMatrix{<:Real}, ωs::AbstractMatrix{<:Real} ) diff --git a/src/transform/lineartransform.jl b/src/transform/lineartransform.jl index b61ba6a94..9f49e2c23 100644 --- a/src/transform/lineartransform.jl +++ b/src/transform/lineartransform.jl @@ -1,7 +1,9 @@ """ LinearTransform(A::AbstractMatrix) + LinearTransform(a::AbstractVector) Linear transformation of the input realised by the matrix `A`. +If `a` is an `AbstractVector`, `Diagonal(a)` is passed The second dimension of `A` must match the number of features of the target. @@ -18,6 +20,8 @@ struct LinearTransform{T<:AbstractMatrix{<:Real}} <: Transform A::T end +LinearTransform(a::AbstractVector{<:Real}) = LinearTransform(Diagonal(a)) + @functor LinearTransform function set!(t::LinearTransform{<:AbstractMatrix{T}}, A::AbstractMatrix{T}) where {T<:Real} From de1c84aadd5000a7ab1f680da2751055f1c4c07e Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 14:35:59 +0200 Subject: [PATCH 03/18] Fix multiple docs --- src/basekernels/sm.jl | 91 ++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 92333d102..c29b135e0 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -8,30 +8,29 @@ spectral_mixture_kernel( h::Kernel=SqExponentialKernel(), α::AbstractVector{<:Real}, - γ::AbstractVector{<:AbstractVecorMat{<:Real}}, - ω::AbstractVector{<:AbstractVecorMat{<:Real}}, + γ::AbstractVector{<:AbstractVecOrMat{<:Real}}, + ω::AbstractVector{<:AbstractVecOrMat{<:Real}}, ) -Given `A` the number of mixture components and `D` the dimension of the inputs: - -## Arguments -- `h`: Stationary kernel (translation invariant), `SqExponentialKernel` by default -- `α`: Weight vector of each mixture component (`length(α)==A`) -- `γ`: Linear transformation of the input for `h`. `γ` can be an `AbstractMatrix` or -- `ω`: Linear transformation -where `α` are the weight vector of dimension `A`, `γs` is the sqrt of the covariance matrix of -dimension `(D, A)` and `ωs` are the concatenated mean vectors of dimension (D, A). -Here, `D` is input dimension and `A` is the number of spectral components. - -`h` is the stationary kernel, which defaults to [`SqExponentialKernel`](@ref) if not specified. - -Generalised Spectral Mixture kernel function. This family of functions is dense +Generalised Spectral Mixture kernel function as described in [1] (Eq. 6). This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math - κ(x, y) = \sum_k \alpha_k^\top (h(γ_k \odot x, γ_k \odot y) \cos(π \cdot ω_k^\top (x-y)), + κ(x, y) = \sum_{k=1}^K \alpha_k (h(γ_k \odot x, γ_k \odot y) \cos(2π \cdot ω_k^\top (x-y)), ``` +## Arguments +- `h`: Stationary kernel (translation invariant), [`SqExponentialKernel`](@ref) by default +- `α`: Weight vector of each mixture component +- `γ`: Linear transformation of the input for `h`. +- `ω`: Linear transformation of the input for the [`CosineKernel`](@ref). + +`γ` and `ω` can either be +- `AbstractMatrix` of dimension `D x K` where `D` is the dimension of the inputs +and `K` is the number of components +- `AbstractVector`s (of length `A`) of `AbstractVector` of length `D` + + # References: [1] Generalized Spectral Kernels, by Yves-Laurent Kom Samo and Stephen J. Roberts [2] SM: Gaussian Process Kernels for Pattern Discovery and Extrapolation, @@ -48,18 +47,7 @@ function spectral_mixture_kernel( γ::AbstractMatrix{<:Real}, ω::AbstractMatrix{<:Real}, ) - if !(size(α, 1) == size(γ, 2) == size(ω, 2)) - throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) - end - if size(γ, 1) != size(ω, 1) - throw(DimensionMismatch("The dimensions of γ ans ω do not match")) - end - - return sum(zip(α, eachcol(γ), eachcol(ω))) do (α, γ, ω) - a = TransformedKernel(h, ARDTransform(γ)) - b = TransformedKernel(CosineKernel(), ARDTransform(ω)) - return α * a * b - end + return spectral_mixture_kernel(h, α, ColVecs(γ), ColVecs(ω)) end function spectral_mixture_kernel( @@ -72,16 +60,15 @@ function spectral_mixture_kernel( throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) end - return sum(zip(α, γ, ω)) do (αk, γk, ωk) - a = TransformedKernel(h, ARDTransform(γk)) - b = TransformedKernel(CosineKernel(), ARDTransform(ωk)) - return αk * a * b + return mapreduce(+, α, γ, ω) do (αk, γk, ωk) + sqkernel = TransformedKernel(h, ARDTransform(γk)) + coskernel = TransformedKernel(CosineKernel(), ARDTransform(2 * ωk)) + return αk * sqkernel * coskernel end end - function spectral_mixture_kernel( - αs::AbstractVector{<:Real}, γs::AbstractMatrix{<:Real}, ωs::AbstractMatrix{<:Real} + αs::AbstractVector{<:Real}, γs::AbstractVecOrMat, ωs::AbstractVecOrMat ) return spectral_mixture_kernel(SqExponentialKernel(), αs, γs, ωs) end @@ -89,26 +76,32 @@ end """ spectral_mixture_product_kernel( h::Kernel=SqExponentialKernel(), - αs::AbstractMatrix{<:Real}, - γs::AbstractMatrix{<:Real}, - ωs::AbstractMatrix{<:Real}, + α::AbstractMatrix{<:Real}, + γ::AbstractMatrix{<:Real}, + ω::AbstractMatrix{<:Real}, ) -where αs are the weights of dimension (D, A), γs is the covariance matrix of -dimension (D, A) and ωs are the mean vectors and is of dimension (D, A). -Here, D is input dimension and A is the number of spectral components. - -Spectral Mixture Product Kernel. With enough components A, the SMP kernel +The spectral mixture product is tensor product of spectral mixture kernel applied +on each dimension as described in [1] (Eq. 13, 14). +With enough components, the SMP kernel can model any product kernel to arbitrary precision, and is flexible even -with a small number of components [1] - - -`h` is the kernel, which defaults to [`SqExponentialKernel`](@ref) if not specified. +with a small number of components ```math - κ(x, y) = Πᵢ₌₁ᴷ Σ(αsᵢᵀ .* (h(-(γsᵢᵀ * tᵢ)²) .* cos(ωsᵢᵀ * tᵢ))), tᵢ = xᵢ - yᵢ + κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{k,i} (h(\gamma_{k,i} x_i, \gamma_{k,i} y_i)) cos(2\pi \omega_{i, k} (x_i - y_i)))) ``` +## Arguments +- `h`: Stationary kernel (translation invariant), [`SqExponentialKernel`](@ref) by default +- `α`: Weight of each mixture component for each dimension +- `γ`: Linear transformation of the input for `h`. +- `ω`: Linear transformation of the input for the [`CosineKernel`](@ref). + +`α`, `γ` and `ω` can all be +where αs are the weights of dimension (D, A), γs is the covariance matrix of +dimension (D, A) and ωs are the mean vectors and is of dimension (D, A). +Here, D is input dimension and A is the number of spectral components. + # References: [1] GPatt: Fast Multidimensional Pattern Extrapolation with GPs, arXiv 1310.5288, 2013, by Andrew Gordon Wilson, Elad Gilboa, @@ -130,7 +123,7 @@ function spectral_mixture_product_kernel( end function spectral_mixture_product_kernel( - αs::AbstractMatrix{<:Real}, γs::AbstractMatrix{<:Real}, ωs::AbstractMatrix{<:Real} + αs::AbstractVecOrMat, γs::AbstractVecOrMat, ωs::AbstractVecOrMat ) return spectral_mixture_product_kernel(SqExponentialKernel(), αs, γs, ωs) end From 50d5471d7e00def219f660d8db8b4993a7b601df Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 14:51:12 +0200 Subject: [PATCH 04/18] Version to test --- src/basekernels/sm.jl | 58 ++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index c29b135e0..99ed5c0a6 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -1,4 +1,4 @@ -""" +@doc raw""" spectral_mixture_kernel( h::Kernel=SqExponentialKernel(), α::AbstractVector{<:Real}, @@ -12,8 +12,8 @@ ω::AbstractVector{<:AbstractVecOrMat{<:Real}}, ) -Generalised Spectral Mixture kernel function as described in [1] (Eq. 6). This family of functions is dense -in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] +Generalised Spectral Mixture kernel function as described in [1] (Eq. 6). +This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math κ(x, y) = \sum_{k=1}^K \alpha_k (h(γ_k \odot x, γ_k \odot y) \cos(2π \cdot ω_k^\top (x-y)), @@ -25,10 +25,10 @@ in the family of stationary real-valued kernels with respect to the pointwise co - `γ`: Linear transformation of the input for `h`. - `ω`: Linear transformation of the input for the [`CosineKernel`](@ref). -`γ` and `ω` can either be +`γ` and `ω` can be an - `AbstractMatrix` of dimension `D x K` where `D` is the dimension of the inputs and `K` is the number of components -- `AbstractVector`s (of length `A`) of `AbstractVector` of length `D` +- `AbstractVector` of `K` `D`-dimensional `AbstractVector` # References: @@ -60,10 +60,10 @@ function spectral_mixture_kernel( throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) end - return mapreduce(+, α, γ, ω) do (αk, γk, ωk) - sqkernel = TransformedKernel(h, ARDTransform(γk)) - coskernel = TransformedKernel(CosineKernel(), ARDTransform(2 * ωk)) - return αk * sqkernel * coskernel + return mapreduce(+, α, γ, ω) do (αₖ, γₖ, ωₖ) + sqkernel = TransformedKernel(h, ARDTransform(γₖ)) + coskernel = TransformedKernel(CosineKernel(), ARDTransform(2 * ωₖ)) + return αₖ * sqkernel * coskernel end end @@ -73,7 +73,7 @@ function spectral_mixture_kernel( return spectral_mixture_kernel(SqExponentialKernel(), αs, γs, ωs) end -""" +@doc raw""" spectral_mixture_product_kernel( h::Kernel=SqExponentialKernel(), α::AbstractMatrix{<:Real}, @@ -97,10 +97,11 @@ with a small number of components - `γ`: Linear transformation of the input for `h`. - `ω`: Linear transformation of the input for the [`CosineKernel`](@ref). -`α`, `γ` and `ω` can all be -where αs are the weights of dimension (D, A), γs is the covariance matrix of -dimension (D, A) and ωs are the mean vectors and is of dimension (D, A). -Here, D is input dimension and A is the number of spectral components. +`α`, `γ` and `ω` can be an +- `AbstractMatrix` of dimension `D x K` where `D` is the dimension of the inputs +and `K` is the number of components +- `AbstractVector` of `D` `K`-dimensional `AbstractVector` + # References: [1] GPatt: Fast Multidimensional Pattern Extrapolation with GPs, @@ -109,21 +110,28 @@ Here, D is input dimension and A is the number of spectral components. """ function spectral_mixture_product_kernel( h::Kernel, - αs::AbstractMatrix{<:Real}, - γs::AbstractMatrix{<:Real}, - ωs::AbstractMatrix{<:Real}, + α::AbstractMatrix{<:Real}, + γ::AbstractMatrix{<:Real}, + ω::AbstractMatrix{<:Real}, +) + return spectral_mixture_product_kernel(h, RowVecs(α), RowVecs(γ), RowVecs(ω)) +end + +function spectral_mixture_product_kernel( + h::Kernel, + α::AbstractVector{<:AbstractVector{<:Real}}, + γ::AbstractVector{<:AbstractVector{<:Real}}, + ω::AbstractVector{<:AbstractVector{<:Real}}, ) - if !(size(αs) == size(γs) == size(ωs)) - throw(DimensionMismatch("The dimensions of αs, γs, ans ωs do not match")) + (length(α) == length(γ) && length(γ) == length(ω)) || + throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) + return mapreduce(⊗, α, γ, ω) do (αᵢ, γᵢ, ωᵢ) + return spectral_mixture_kernel(h, αᵢ, γᵢ, ωᵢ) end - return KernelTensorProduct( - spectral_mixture_kernel(h, α, reshape(γ, 1, :), reshape(ω, 1, :)) for - (α, γ, ω) in zip(eachrow(αs), eachrow(γs), eachrow(ωs)) - ) end function spectral_mixture_product_kernel( - αs::AbstractVecOrMat, γs::AbstractVecOrMat, ωs::AbstractVecOrMat + α::AbstractVecOrMat, γ::AbstractVecOrMat, ω::AbstractVecOrMat ) - return spectral_mixture_product_kernel(SqExponentialKernel(), αs, γs, ωs) + return spectral_mixture_product_kernel(SqExponentialKernel(), α, γ, ω) end From e86f5729f54f243f7ac3eb59cdf38e3b2ccef2bc Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 16:07:46 +0200 Subject: [PATCH 05/18] Wrote more detailed tests --- src/basekernels/sm.jl | 8 ++++---- test/basekernels/sm.jl | 26 ++++++++++++++------------ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 99ed5c0a6..67a449de5 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -60,7 +60,7 @@ function spectral_mixture_kernel( throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) end - return mapreduce(+, α, γ, ω) do (αₖ, γₖ, ωₖ) + return mapreduce(+, α, γ, ω) do αₖ, γₖ, ωₖ sqkernel = TransformedKernel(h, ARDTransform(γₖ)) coskernel = TransformedKernel(CosineKernel(), ARDTransform(2 * ωₖ)) return αₖ * sqkernel * coskernel @@ -88,7 +88,7 @@ can model any product kernel to arbitrary precision, and is flexible even with a small number of components ```math - κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{k,i} (h(\gamma_{k,i} x_i, \gamma_{k,i} y_i)) cos(2\pi \omega_{i, k} (x_i - y_i)))) + κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{k,i} (h(\gamma_{k,i} x_i, \gamma_{k,i} y_i)) \cos(2\pi \omega_{i, k} (x_i - y_i)))) ``` ## Arguments @@ -125,8 +125,8 @@ function spectral_mixture_product_kernel( ) (length(α) == length(γ) && length(γ) == length(ω)) || throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) - return mapreduce(⊗, α, γ, ω) do (αᵢ, γᵢ, ωᵢ) - return spectral_mixture_kernel(h, αᵢ, γᵢ, ωᵢ) + return mapreduce(⊗, α, γ, ω) do αᵢ, γᵢ, ωᵢ + return spectral_mixture_kernel(h, αᵢ, permutedims(γᵢ), permutedims(ωᵢ)) end end diff --git a/test/basekernels/sm.jl b/test/basekernels/sm.jl index b91024144..1a2726def 100644 --- a/test/basekernels/sm.jl +++ b/test/basekernels/sm.jl @@ -3,28 +3,30 @@ v1 = rand(D_in) v2 = rand(D_in) - αs₁ = rand(3) - αs₂ = rand(D_in, 3) - γs = rand(D_in, 3) - ωs = rand(D_in, 3) + K = 3 + αs₁ = rand(K) + αs₂ = rand(D_in, K) + γs = rand(D_in, K) + ωs = rand(D_in, K) k1 = spectral_mixture_kernel(αs₁, γs, ωs) k2 = spectral_mixture_product_kernel(αs₂, γs, ωs) t = v1 - v2 - @test k1(v1, v2) ≈ sum(αs₁ .* exp.(-(t' * γs)' .^ 2 ./ 2) .* cospi.((t' * ωs)')) atol = - 1e-5 + @test k1(v1, v2) ≈ sum( + αs₁[k] * exp(-norm(t .* γs[:, k])^2 / 2) * cospi(2 * dot(ωs[:, k], t)) for k in 1:K + ) atol = 1e-5 @test isapprox( k2(v1, v2), - prod([ + prod( sum( - αs₂[i, :]' .* exp.(-(γs[i, :]' * t[i]) .^ 2 ./ 2) .* - cospi.(ωs[i, :]' * t[i]), - ) for i in 1:length(t) - ],); - atol=1e-5, + αs₂[i, k] * exp(-(γs[i, k] * t[i])^2 / 2) * cospi(2 * ωs[i, k] * t[i]) for + k in 1:K + ) for i in 1:D_in + ); + atol=1e-8, ) @test_throws DimensionMismatch spectral_mixture_kernel(rand(5), rand(4, 3), rand(4, 3)) From 01cfaaf8ebb4f14b7fb58da4397a84d66f9d6831 Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 16:29:07 +0200 Subject: [PATCH 06/18] Add a bunch of cdot --- src/basekernels/sm.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 67a449de5..4beada00a 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -16,7 +16,7 @@ Generalised Spectral Mixture kernel function as described in [1] (Eq. 6). This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math - κ(x, y) = \sum_{k=1}^K \alpha_k (h(γ_k \odot x, γ_k \odot y) \cos(2π \cdot ω_k^\top (x-y)), + κ(x, y) = \sum_{k=1}^K \alpha_k \cdot h(γ_k \odot x, γ_k \odot y) \cdot \cos(2π \cdot ω_k^\top (x-y)), ``` ## Arguments @@ -88,7 +88,7 @@ can model any product kernel to arbitrary precision, and is flexible even with a small number of components ```math - κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{k,i} (h(\gamma_{k,i} x_i, \gamma_{k,i} y_i)) \cos(2\pi \omega_{i, k} (x_i - y_i)))) + κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{k,i} \cdot h(\gamma_{k,i} \cdot x_i, \gamma_{k,i} \cdot y_i)) \cdot \cos(2\pi \cdot \omega_{i, k} \cdot (x_i - y_i)))) ``` ## Arguments From ffba53995757f77dc19efb98e126852b8472cdbc Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 17:08:55 +0200 Subject: [PATCH 07/18] changed spectral_mixture_kernel to a struct --- src/KernelFunctions.jl | 5 +++- src/basekernels/sm.jl | 58 +++++++++++++++++++++++++----------------- src/deprecated.jl | 1 + test/basekernels/sm.jl | 19 +++++++------- 4 files changed, 48 insertions(+), 35 deletions(-) create mode 100644 src/deprecated.jl diff --git a/src/KernelFunctions.jl b/src/KernelFunctions.jl index 4fded7dc5..f38ed9f76 100644 --- a/src/KernelFunctions.jl +++ b/src/KernelFunctions.jl @@ -15,6 +15,7 @@ export LinearKernel, PolynomialKernel export RationalKernel, RationalQuadraticKernel, GammaRationalKernel export PiecewisePolynomialKernel export PeriodicKernel, NeuralNetworkKernel +export SpectralMixtureKernel export KernelSum, KernelProduct, KernelTensorProduct export TransformedKernel, ScaledKernel, NormalizedKernel @@ -32,7 +33,7 @@ export with_lengthscale export NystromFact, nystrom export gaborkernel -export spectral_mixture_kernel, spectral_mixture_product_kernel +export spectral_mixture_product_kernel export ColVecs, RowVecs @@ -115,6 +116,8 @@ include(joinpath("mokernels", "lmm.jl")) include("chainrules.jl") include("zygoterules.jl") +include("deprecated.jl") + include("test_utils.jl") function __init__() diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 4beada00a..21b65ac6c 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -1,11 +1,11 @@ @doc raw""" - spectral_mixture_kernel( + SpectralMixtureKernel( h::Kernel=SqExponentialKernel(), α::AbstractVector{<:Real}, γ::AbstractMatrix{<:Real}, ω::AbstractMatrix{<:Real}, ) - spectral_mixture_kernel( + SpectralMixtureKernel( h::Kernel=SqExponentialKernel(), α::AbstractVector{<:Real}, γ::AbstractVector{<:AbstractVecOrMat{<:Real}}, @@ -41,37 +41,48 @@ and `K` is the number of components [4] http://www.cs.cmu.edu/~andrewgw/pattern/. """ -function spectral_mixture_kernel( +struct SpectralMixtureKernel{K<:Kernel,Tα<:AbstractVector,Tγ<:AbstractVector,Tω<:AbstractVector} <: Kernel + kernel::K + α::Tα + γ::Tγ + ω::Tω + function SpectralMixtureKernel( + h::Kernel, + α::AbstractVector{<:Real}, + γ::AbstractVector{<:AbstractVector}, + ω::AbstractVector{<:AbstractVector}, + ) + if !(length(α) == length(γ) == length(ω)) + throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) + end + return new{typeof(h),typeof(α),typeof(γ),typeof(ω)}(h, α, γ, ω) + end +end + +function SpectralMixtureKernel( h::Kernel, α::AbstractVector{<:Real}, γ::AbstractMatrix{<:Real}, ω::AbstractMatrix{<:Real}, ) - return spectral_mixture_kernel(h, α, ColVecs(γ), ColVecs(ω)) + size(γ) == size(ω) || throw(DimensionMismatch("γ and ω have different dimensions")) + return SpectralMixtureKernel(h, α, ColVecs(γ), ColVecs(ω)) end -function spectral_mixture_kernel( - h::Kernel, - α::AbstractVector{<:Real}, - γ::AbstractVector{<:AbstractVector}, - ω::AbstractVector{<:AbstractVector}, +function SpectralMixtureKernel( + αs::AbstractVector{<:Real}, γs::AbstractVecOrMat, ωs::AbstractVecOrMat ) - if !(length(α) == length(γ) == length(ω)) - throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) - end + return SpectralMixtureKernel(SqExponentialKernel(), αs, γs, ωs) +end - return mapreduce(+, α, γ, ω) do αₖ, γₖ, ωₖ - sqkernel = TransformedKernel(h, ARDTransform(γₖ)) - coskernel = TransformedKernel(CosineKernel(), ARDTransform(2 * ωₖ)) - return αₖ * sqkernel * coskernel +function (κ::SpectralMixtureKernel)(x, y) + return mapreduce(+, κ.α, κ.γ, κ.ω) do α, γ, ω + k = TransformedKernel(κ.kernel, ARDTransform(γ)) + α * k(x, y) * cospi(2 * dot(ω, x - y)) end end -function spectral_mixture_kernel( - αs::AbstractVector{<:Real}, γs::AbstractVecOrMat, ωs::AbstractVecOrMat -) - return spectral_mixture_kernel(SqExponentialKernel(), αs, γs, ωs) -end +Base.show(io::IO, κ::SpectralMixtureKernel) = print(io, "SpectralMixtureKernel Kernel (kernel = ", κ.kernel, ", # components = ", length(κ.α), ")") @doc raw""" spectral_mixture_product_kernel( @@ -114,6 +125,7 @@ function spectral_mixture_product_kernel( γ::AbstractMatrix{<:Real}, ω::AbstractMatrix{<:Real}, ) + (size(α) == size(γ) == size(ω)) || throw(DimensionMismatch("α, γ and ω have different dimensions")) return spectral_mixture_product_kernel(h, RowVecs(α), RowVecs(γ), RowVecs(ω)) end @@ -123,10 +135,8 @@ function spectral_mixture_product_kernel( γ::AbstractVector{<:AbstractVector{<:Real}}, ω::AbstractVector{<:AbstractVector{<:Real}}, ) - (length(α) == length(γ) && length(γ) == length(ω)) || - throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) return mapreduce(⊗, α, γ, ω) do αᵢ, γᵢ, ωᵢ - return spectral_mixture_kernel(h, αᵢ, permutedims(γᵢ), permutedims(ωᵢ)) + return SpectralMixtureKernel(h, αᵢ, permutedims(γᵢ), permutedims(ωᵢ)) end end diff --git a/src/deprecated.jl b/src/deprecated.jl new file mode 100644 index 000000000..5259742d7 --- /dev/null +++ b/src/deprecated.jl @@ -0,0 +1 @@ +@deprecate spectral_mixture_kernel SpectralMixtureKernel \ No newline at end of file diff --git a/test/basekernels/sm.jl b/test/basekernels/sm.jl index 1a2726def..e308dc4a0 100644 --- a/test/basekernels/sm.jl +++ b/test/basekernels/sm.jl @@ -9,14 +9,14 @@ γs = rand(D_in, K) ωs = rand(D_in, K) - k1 = spectral_mixture_kernel(αs₁, γs, ωs) + k1 = SpectralMixtureKernel(αs₁, γs, ωs) k2 = spectral_mixture_product_kernel(αs₂, γs, ωs) t = v1 - v2 @test k1(v1, v2) ≈ sum( αs₁[k] * exp(-norm(t .* γs[:, k])^2 / 2) * cospi(2 * dot(ωs[:, k], t)) for k in 1:K - ) atol = 1e-5 + ) @test isapprox( k2(v1, v2), @@ -25,12 +25,11 @@ αs₂[i, k] * exp(-(γs[i, k] * t[i])^2 / 2) * cospi(2 * ωs[i, k] * t[i]) for k in 1:K ) for i in 1:D_in - ); - atol=1e-8, + ) ) - @test_throws DimensionMismatch spectral_mixture_kernel(rand(5), rand(4, 3), rand(4, 3)) - @test_throws DimensionMismatch spectral_mixture_kernel(rand(3), rand(4, 3), rand(5, 3)) + @test_throws DimensionMismatch SpectralMixtureKernel(rand(5), rand(4, 3), rand(4, 3)) + @test_throws DimensionMismatch SpectralMixtureKernel(rand(3), rand(4, 3), rand(5, 3)) @test_throws DimensionMismatch spectral_mixture_product_kernel( rand(5, 3), rand(4, 3), rand(5, 3) ) @@ -40,15 +39,15 @@ x0 = ColVecs(randn(D_in, 3)) x1 = ColVecs(randn(D_in, 3)) x2 = ColVecs(randn(D_in, 2)) - TestUtils.test_interface(k1, x0, x1, x2) - TestUtils.test_interface(k2, x0, x1, x2) + test_interface(k1, x0, x1, x2) + test_interface(k2, x0, x1, x2) end @testset "RowVecs" begin x0 = RowVecs(randn(3, D_in)) x1 = RowVecs(randn(3, D_in)) x2 = RowVecs(randn(2, D_in)) - TestUtils.test_interface(k1, x0, x1, x2) - TestUtils.test_interface(k2, x0, x1, x2) + test_interface(k1, x0, x1, x2) + test_interface(k2, x0, x1, x2) end # test_ADs(x->spectral_mixture_kernel(exp.(x[1:3]), reshape(x[4:18], 5, 3), reshape(x[19:end], 5, 3)), vcat(log.(αs₁), γs[:], ωs[:]), dims = [5,5]) @test_broken "No tests passing (BaseKernel)" From cb2720906f44348e5ce0f597de21ecc8720d257d Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 17:12:50 +0200 Subject: [PATCH 08/18] formatting --- src/basekernels/sm.jl | 18 +++++++++++++++--- src/deprecated.jl | 2 +- test/basekernels/sm.jl | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 21b65ac6c..e6155f4cb 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -41,7 +41,9 @@ and `K` is the number of components [4] http://www.cs.cmu.edu/~andrewgw/pattern/. """ -struct SpectralMixtureKernel{K<:Kernel,Tα<:AbstractVector,Tγ<:AbstractVector,Tω<:AbstractVector} <: Kernel +struct SpectralMixtureKernel{ + K<:Kernel,Tα<:AbstractVector,Tγ<:AbstractVector,Tω<:AbstractVector +} <: Kernel kernel::K α::Tα γ::Tγ @@ -82,7 +84,16 @@ function (κ::SpectralMixtureKernel)(x, y) end end -Base.show(io::IO, κ::SpectralMixtureKernel) = print(io, "SpectralMixtureKernel Kernel (kernel = ", κ.kernel, ", # components = ", length(κ.α), ")") +function Base.show(io::IO, κ::SpectralMixtureKernel) + return print( + io, + "SpectralMixtureKernel Kernel (kernel = ", + κ.kernel, + ", # components = ", + length(κ.α), + ")", + ) +end @doc raw""" spectral_mixture_product_kernel( @@ -125,7 +136,8 @@ function spectral_mixture_product_kernel( γ::AbstractMatrix{<:Real}, ω::AbstractMatrix{<:Real}, ) - (size(α) == size(γ) == size(ω)) || throw(DimensionMismatch("α, γ and ω have different dimensions")) + (size(α) == size(γ) == size(ω)) || + throw(DimensionMismatch("α, γ and ω have different dimensions")) return spectral_mixture_product_kernel(h, RowVecs(α), RowVecs(γ), RowVecs(ω)) end diff --git a/src/deprecated.jl b/src/deprecated.jl index 5259742d7..9b0d728fe 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1 +1 @@ -@deprecate spectral_mixture_kernel SpectralMixtureKernel \ No newline at end of file +@deprecate spectral_mixture_kernel SpectralMixtureKernel diff --git a/test/basekernels/sm.jl b/test/basekernels/sm.jl index e308dc4a0..9d6bef672 100644 --- a/test/basekernels/sm.jl +++ b/test/basekernels/sm.jl @@ -25,7 +25,7 @@ αs₂[i, k] * exp(-(γs[i, k] * t[i])^2 / 2) * cospi(2 * ωs[i, k] * t[i]) for k in 1:K ) for i in 1:D_in - ) + ), ) @test_throws DimensionMismatch SpectralMixtureKernel(rand(5), rand(4, 3), rand(4, 3)) From 4f5c0252aa17efe160f1b02fe1c08b767151fa0d Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 17:22:39 +0200 Subject: [PATCH 09/18] revert change lineartransform --- src/transform/lineartransform.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/transform/lineartransform.jl b/src/transform/lineartransform.jl index 9f49e2c23..d8a27e36a 100644 --- a/src/transform/lineartransform.jl +++ b/src/transform/lineartransform.jl @@ -1,6 +1,5 @@ """ LinearTransform(A::AbstractMatrix) - LinearTransform(a::AbstractVector) Linear transformation of the input realised by the matrix `A`. If `a` is an `AbstractVector`, `Diagonal(a)` is passed @@ -20,8 +19,6 @@ struct LinearTransform{T<:AbstractMatrix{<:Real}} <: Transform A::T end -LinearTransform(a::AbstractVector{<:Real}) = LinearTransform(Diagonal(a)) - @functor LinearTransform function set!(t::LinearTransform{<:AbstractMatrix{T}}, A::AbstractMatrix{T}) where {T<:Real} From 5f8e7f5658aa6a2a1c6e4fdf48650d5854390cfd Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 31 Aug 2021 17:23:03 +0200 Subject: [PATCH 10/18] Add functor --- src/basekernels/sm.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index e6155f4cb..6d98f74ac 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -61,6 +61,8 @@ struct SpectralMixtureKernel{ end end +@functor SpectralMixtureKernel + function SpectralMixtureKernel( h::Kernel, α::AbstractVector{<:Real}, From 911cb73ccb9a23032a0d0ea40a77a9ff8abd120d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Wed, 1 Sep 2021 16:43:25 +0200 Subject: [PATCH 11/18] Update the docs --- docs/src/kernels.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/kernels.md b/docs/src/kernels.md index ae0f0843a..66c5d9223 100644 --- a/docs/src/kernels.md +++ b/docs/src/kernels.md @@ -99,7 +99,7 @@ GammaRationalKernel ### Spectral Mixture Kernels ```@docs -spectral_mixture_kernel +SpectralMixtureKernel spectral_mixture_product_kernel ``` From 64e3641bc88cb16878d7291f021f09b0202347e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Wed, 1 Sep 2021 17:05:24 +0200 Subject: [PATCH 12/18] Update docs --- src/basekernels/sm.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 6d98f74ac..2b01c39a7 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -12,7 +12,7 @@ ω::AbstractVector{<:AbstractVecOrMat{<:Real}}, ) -Generalised Spectral Mixture kernel function as described in [1] (Eq. 6). +Generalised Spectral Mixture kernel function as described in [1] in equation 6. This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math @@ -23,7 +23,7 @@ This family of functions is dense in the family of stationary real-valued kernel - `h`: Stationary kernel (translation invariant), [`SqExponentialKernel`](@ref) by default - `α`: Weight vector of each mixture component - `γ`: Linear transformation of the input for `h`. -- `ω`: Linear transformation of the input for the [`CosineKernel`](@ref). +- `ω`: Frequencies for the cosine function. `γ` and `ω` can be an - `AbstractMatrix` of dimension `D x K` where `D` is the dimension of the inputs @@ -106,20 +106,20 @@ end ) The spectral mixture product is tensor product of spectral mixture kernel applied -on each dimension as described in [1] (Eq. 13, 14). +on each dimension as described in [1] in equations 13 and 14. With enough components, the SMP kernel can model any product kernel to arbitrary precision, and is flexible even with a small number of components ```math - κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{k,i} \cdot h(\gamma_{k,i} \cdot x_i, \gamma_{k,i} \cdot y_i)) \cdot \cos(2\pi \cdot \omega_{i, k} \cdot (x_i - y_i)))) + κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{ik} \cdot h(\gamma_{ik} \cdot x_i, \gamma_{ik} \cdot y_i)) \cdot \cos(2\pi \cdot \omega_{ik} \cdot (x_i - y_i)))) ``` ## Arguments - `h`: Stationary kernel (translation invariant), [`SqExponentialKernel`](@ref) by default - `α`: Weight of each mixture component for each dimension - `γ`: Linear transformation of the input for `h`. -- `ω`: Linear transformation of the input for the [`CosineKernel`](@ref). +- `ω`: Frequencies for the cosine function. `α`, `γ` and `ω` can be an - `AbstractMatrix` of dimension `D x K` where `D` is the dimension of the inputs From 6f9cae8cbf1fe25485fd2ac21cd826e8263bdeba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Fri, 15 Oct 2021 15:47:30 +0200 Subject: [PATCH 13/18] Update src/basekernels/sm.jl Co-authored-by: David Widmann --- src/basekernels/sm.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 2b01c39a7..cb93f1315 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -16,7 +16,12 @@ Generalised Spectral Mixture kernel function as described in [1] in equation 6. This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] ```math - κ(x, y) = \sum_{k=1}^K \alpha_k \cdot h(γ_k \odot x, γ_k \odot y) \cdot \cos(2π \cdot ω_k^\top (x-y)), +# Definition + +For inputs ``x, x′ \in \mathbb{R}^m``, the spectral mixture kernel ``\tilde{k}`` with ``K`` mixture components, mixture weights ``\alpha \in \mathbb{R}^K``, linear transformations ``\gamma_1, \ldots, \gamma_K \in \mathbb{R}^m``, and frequencies ``\omega_1, \ldots, \omega_K \in \mathbb{R}^m`` derived from a translation-invariant kernel ``k`` is defined as +```math +\tilde{k}(x, x'; \alpha, \gamma_1, \ldots, \gamma_K, \omega_1, \ldots, \omega_K, k) = \sum_{i=1}^K \alpha_i k(\gamma_i \odot x, \gamma_i \odot y) \cos(2\pi \omega_i^\top (x-y)). +``` ``` ## Arguments From 6fde6918ee5cf6c2fa30075fe9e531a9401e8fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Fri, 15 Oct 2021 16:01:06 +0200 Subject: [PATCH 14/18] Fixed docstrings and added constructors checks --- src/basekernels/sm.jl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index cb93f1315..440927e23 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -15,20 +15,18 @@ Generalised Spectral Mixture kernel function as described in [1] in equation 6. This family of functions is dense in the family of stationary real-valued kernels with respect to the pointwise convergence.[1] -```math -# Definition +## Definition -For inputs ``x, x′ \in \mathbb{R}^m``, the spectral mixture kernel ``\tilde{k}`` with ``K`` mixture components, mixture weights ``\alpha \in \mathbb{R}^K``, linear transformations ``\gamma_1, \ldots, \gamma_K \in \mathbb{R}^m``, and frequencies ``\omega_1, \ldots, \omega_K \in \mathbb{R}^m`` derived from a translation-invariant kernel ``k`` is defined as +For inputs ``x, x′ \in \mathbb{R}^D``, the spectral mixture kernel ``\tilde{k}`` with ``K`` mixture components, mixture weights ``\alpha \in \mathbb{R}^K``, linear transformations ``\gamma_1, \ldots, \gamma_K \in \mathbb{R}^D``, and frequencies ``\omega_1, \ldots, \omega_K \in \mathbb{R}^D`` derived from a translation-invariant kernel ``k`` is defined as ```math \tilde{k}(x, x'; \alpha, \gamma_1, \ldots, \gamma_K, \omega_1, \ldots, \omega_K, k) = \sum_{i=1}^K \alpha_i k(\gamma_i \odot x, \gamma_i \odot y) \cos(2\pi \omega_i^\top (x-y)). ``` -``` ## Arguments - `h`: Stationary kernel (translation invariant), [`SqExponentialKernel`](@ref) by default -- `α`: Weight vector of each mixture component +- `α`: Weight vector of each mixture component (should be positive) - `γ`: Linear transformation of the input for `h`. -- `ω`: Frequencies for the cosine function. +- `ω`: Frequencies for the cosine function. (should be positive) `γ` and `ω` can be an - `AbstractMatrix` of dimension `D x K` where `D` is the dimension of the inputs @@ -59,9 +57,9 @@ struct SpectralMixtureKernel{ γ::AbstractVector{<:AbstractVector}, ω::AbstractVector{<:AbstractVector}, ) - if !(length(α) == length(γ) == length(ω)) - throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) - end + (length(α) == length(γ) == length(ω)) || throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) + any(<(0), α) || throw(ArgumentError("At least element of α is negative")) + any(any.(<(0), ω)) || throw(ArgumentError("At least one element of ω is negative")) return new{typeof(h),typeof(α),typeof(γ),typeof(ω)}(h, α, γ, ω) end end @@ -116,8 +114,12 @@ With enough components, the SMP kernel can model any product kernel to arbitrary precision, and is flexible even with a small number of components +## Definition + +For inputs ``x, x′ \in \mathbb{R}^D``, the spectral mixture product kernel ``\tilde{k}`` with ``K`` mixture components, mixture weights ``\alpha_1, \alpha_2, \ldots, \alpha_K \in \mathbb{R}^D``, linear transformations ``\gamma_1, \ldots, \gamma_K \in \mathbb{R}^D``, and frequencies ``\omega_1, \ldots, \omega_K \in \mathbb{R}^D`` derived from a translation-invariant kernel ``k`` is defined as + ```math - κ(x, y) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{ik} \cdot h(\gamma_{ik} \cdot x_i, \gamma_{ik} \cdot y_i)) \cdot \cos(2\pi \cdot \omega_{ik} \cdot (x_i - y_i)))) + \tilde{k}(x, x'; \alpha_1, \ldots, \alpha_k, \gamma_1, \ldots, \gamma_K, \omega_1, \ldots, \omega_K, k) = \prod_{i=1}^D \sum_{k=1}^K \alpha_{ik} \cdot h(\gamma_{ik} \cdot x_i, \gamma_{ik} \cdot y_i)) \cdot \cos(2\pi \cdot \omega_{ik} \cdot (x_i - y_i)))) ``` ## Arguments From 38f9eedc92b4e9ed0bfa9724363d35ce8dc6dfc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Fri, 15 Oct 2021 16:03:02 +0200 Subject: [PATCH 15/18] Update src/basekernels/sm.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/basekernels/sm.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 440927e23..43b268fd1 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -57,7 +57,8 @@ struct SpectralMixtureKernel{ γ::AbstractVector{<:AbstractVector}, ω::AbstractVector{<:AbstractVector}, ) - (length(α) == length(γ) == length(ω)) || throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) + (length(α) == length(γ) == length(ω)) || + throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) any(<(0), α) || throw(ArgumentError("At least element of α is negative")) any(any.(<(0), ω)) || throw(ArgumentError("At least one element of ω is negative")) return new{typeof(h),typeof(α),typeof(γ),typeof(ω)}(h, α, γ, ω) From a07704b3e37b5ff3e26be1796ce95e30b0825759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Fri, 15 Oct 2021 16:30:04 +0200 Subject: [PATCH 16/18] Fix checks --- src/basekernels/sm.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 43b268fd1..0426db34e 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -59,8 +59,8 @@ struct SpectralMixtureKernel{ ) (length(α) == length(γ) == length(ω)) || throw(DimensionMismatch("The dimensions of α, γ, ans ω do not match")) - any(<(0), α) || throw(ArgumentError("At least element of α is negative")) - any(any.(<(0), ω)) || throw(ArgumentError("At least one element of ω is negative")) + any(<(0), α) && throw(ArgumentError("At least one element of α is negative")) + any(any.(<(0), ω)) && throw(ArgumentError("At least one element of ω is negative")) return new{typeof(h),typeof(α),typeof(γ),typeof(ω)}(h, α, γ, ω) end end From f37b566118cba8b455d12c9a2b5c0c4c8cb07b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Galy-Fajou?= Date: Tue, 19 Oct 2021 12:24:55 +0200 Subject: [PATCH 17/18] improve computations using broadcaster Co-authored-by: David Widmann --- src/basekernels/sm.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/basekernels/sm.jl b/src/basekernels/sm.jl index 0426db34e..b23e10ae4 100644 --- a/src/basekernels/sm.jl +++ b/src/basekernels/sm.jl @@ -84,10 +84,13 @@ function SpectralMixtureKernel( end function (κ::SpectralMixtureKernel)(x, y) - return mapreduce(+, κ.α, κ.γ, κ.ω) do α, γ, ω + xy = x - y + # use pairwise summation (https://github.com/JuliaLang/julia/pull/31020) + broadcasted = Broadcast.broadcasted(κ.α, κ.γ, κ.ω) do α, γ, ω k = TransformedKernel(κ.kernel, ARDTransform(γ)) - α * k(x, y) * cospi(2 * dot(ω, x - y)) + return α * k(x, y) * cospi(2 * dot(ω, xy)) end + return sum(Broadcast.instantiate(broadcasted)) end function Base.show(io::IO, κ::SpectralMixtureKernel) From 1b1ea990acfc0eea57dd1d82d8959087a7e58576 Mon Sep 17 00:00:00 2001 From: Theo Galy-Fajou Date: Tue, 19 Oct 2021 16:43:01 +0200 Subject: [PATCH 18/18] Introduction of Tullio to perform pairwise operations --- Project.toml | 1 + src/KernelFunctions.jl | 8 +++++ src/distances/binaryop.jl | 0 src/distances/delta.jl | 6 ++-- src/distances/dotproduct.jl | 31 ++++++++++--------- src/distances/pairwise.jl | 61 +++++++------------------------------ src/distances/sinus.jl | 4 +-- 7 files changed, 40 insertions(+), 71 deletions(-) create mode 100644 src/distances/binaryop.jl diff --git a/Project.toml b/Project.toml index 2c0bafd1d..047926f01 100644 --- a/Project.toml +++ b/Project.toml @@ -18,6 +18,7 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" TensorCore = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444" [compat] diff --git a/src/KernelFunctions.jl b/src/KernelFunctions.jl index 3d52fe648..fff9f05ed 100644 --- a/src/KernelFunctions.jl +++ b/src/KernelFunctions.jl @@ -59,6 +59,7 @@ using IrrationalConstants: logtwo, twoπ, invsqrt2 using LogExpFunctions: softplus using StatsBase using TensorCore +using Tullio using ZygoteRules: ZygoteRules, AContext, literal_getproperty, literal_getfield # Hack to work around Zygote type inference problems. @@ -68,6 +69,13 @@ abstract type Kernel end abstract type SimpleKernel <: Kernel end include("utils.jl") + +const VecOfVecs = Union{ColVecs,RowVecs} + +# A general binary op type not respecting Distances metric rules +abstract type AbstractBinaryOp end +const BinaryOp = Union{AbstractBinaryOp,Distances.PreMetric} + include("distances/pairwise.jl") include("distances/dotproduct.jl") include("distances/delta.jl") diff --git a/src/distances/binaryop.jl b/src/distances/binaryop.jl new file mode 100644 index 000000000..e69de29bb diff --git a/src/distances/delta.jl b/src/distances/delta.jl index f41370eae..f25ba633a 100644 --- a/src/distances/delta.jl +++ b/src/distances/delta.jl @@ -1,6 +1,6 @@ -# Delta is not following the PreMetric rules since d(x, x) == 1 -struct Delta <: Distances.UnionPreMetric end +struct Delta <: AbstractBinaryOp end +# Basic definitions (dist::Delta)(a::Number, b::Number) = a == b Base.@propagate_inbounds function (dist::Delta)( a::AbstractArray{<:Number}, b::AbstractArray{<:Number} @@ -14,5 +14,3 @@ Base.@propagate_inbounds function (dist::Delta)( end return a == b end - -Distances.result_type(::Delta, Ta::Type, Tb::Type) = Bool diff --git a/src/distances/dotproduct.jl b/src/distances/dotproduct.jl index 1cef13ab5..f689d3cb1 100644 --- a/src/distances/dotproduct.jl +++ b/src/distances/dotproduct.jl @@ -1,21 +1,22 @@ ## DotProduct is not following the PreMetric rules since d(x, x) != 0 and d(x, y) >= 0 for all x, y -struct DotProduct <: Distances.UnionPreMetric end +struct DotProduct <: AbstractBinaryOp end -@inline function Distances._evaluate(::DotProduct, a::AbstractVector, b::AbstractVector) - @boundscheck if length(a) != length(b) - throw( - DimensionMismatch( - "first array has length $(length(a)) which does not match the length of the second, $(length(b)).", - ), - ) - end - return dot(a, b) +(::DotProduct)(a::AbstractVector, b::AbstractVector) = dot(a, b) + +(::DotProduct)(a::Number, b::Number) = a * b + +function pairwise(::DotProduct, x::ColVecs, y::ColVecs) + return @tullio out[i, j] := x.X[k, i] * y.X[k, j] end -Distances.result_type(::DotProduct, Ta::Type, Tb::Type) = promote_type(Ta, Tb) +function pairwise(::DotProduct, x::RowVecs, y::RowVecs) + return @tullio out[i, j] := x.X[i, k] * y.X[j, k] +end -@inline Distances.eval_op(::DotProduct, a::Real, b::Real) = a * b -@inline function (dist::DotProduct)(a::AbstractArray, b::AbstractArray) - return Distances._evaluate(dist, a, b) +function colwise(::DotProduct, x::RowVecs, y::RowVecs=x) + return @tullio out[i] := x.X[i, k] * y.X[i, k] end -@inline (dist::DotProduct)(a::Number, b::Number) = a * b + +function colwise(::DotProduct, x::ColVecs, y::ColVecs=x) + return @tullio out[i] := x.X[k, i] * y.X[k, i] +end \ No newline at end of file diff --git a/src/distances/pairwise.jl b/src/distances/pairwise.jl index 8b5cb43e7..daa894a81 100644 --- a/src/distances/pairwise.jl +++ b/src/distances/pairwise.jl @@ -1,34 +1,16 @@ # Add our own pairwise function to be able to apply it on vectors -function pairwise(d::PreMetric, X::AbstractVector, Y::AbstractVector) - return broadcast(d, X, permutedims(Y)) +function pairwise(d::BinaryOp, X::AbstractVector, Y::AbstractVector) + return @tullio out[i, j] := d(X[i], Y[j]) end -pairwise(d::PreMetric, X::AbstractVector) = pairwise(d, X, X) +pairwise(d::BinaryOp, X::AbstractVector) = pairwise(d, X, X) -function pairwise!(out::AbstractMatrix, d::PreMetric, X::AbstractVector, Y::AbstractVector) - return broadcast!(d, out, X, permutedims(Y)) +function pairwise!(out::AbstractMatrix, d::BinaryOp, X::AbstractVector, Y::AbstractVector) + return @tullio out[i, j] = d(X[i], Y[j]) end -pairwise!(out::AbstractMatrix, d::PreMetric, X::AbstractVector) = pairwise!(out, d, X, X) - -function pairwise(d::PreMetric, x::AbstractVector{<:Real}) - return Distances_pairwise(d, reshape(x, :, 1); dims=1) -end - -function pairwise(d::PreMetric, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) - return Distances_pairwise(d, reshape(x, :, 1), reshape(y, :, 1); dims=1) -end - -function pairwise!(out::AbstractMatrix, d::PreMetric, x::AbstractVector{<:Real}) - return Distances.pairwise!(out, d, reshape(x, :, 1); dims=1) -end - -function pairwise!( - out::AbstractMatrix, d::PreMetric, x::AbstractVector{<:Real}, y::AbstractVector{<:Real} -) - return Distances.pairwise!(out, d, reshape(x, :, 1), reshape(y, :, 1); dims=1) -end +pairwise!(out::AbstractMatrix, d::BinaryOp, X::AbstractVector) = pairwise!(out, d, X, X) # Also defines the colwise method for abstractvectors @@ -36,35 +18,14 @@ function colwise(d::PreMetric, x::AbstractVector) return zeros(Distances.result_type(d, x, x), length(x)) # Valid since d(x,x) == 0 by definition end -function colwise(d::PreMetric, x::ColVecs) - return zeros(Distances.result_type(d, x.X, x.X), length(x)) # Valid since d(x,x) == 0 by definition -end - -function colwise(d::PreMetric, x::RowVecs) +function colwise(d::PreMetric, x::VecOfVecs) return zeros(Distances.result_type(d, x.X, x.X), length(x)) # Valid since d(x,x) == 0 by definition end -## The following is a hack for DotProduct and Delta to still work -function colwise(d::Distances.UnionPreMetric, x::ColVecs) - return Distances.colwise(d, x.X, x.X) -end - -function colwise(d::Distances.UnionPreMetric, x::RowVecs) - return Distances.colwise(d, x.X', x.X') -end - -function colwise(d::Distances.UnionPreMetric, x::AbstractVector) - return map(d, x, x) -end - -function colwise(d::PreMetric, x::ColVecs, y::ColVecs) - return Distances.colwise(d, x.X, y.X) -end - -function colwise(d::PreMetric, x::RowVecs, y::RowVecs) - return Distances.colwise(d, x.X', y.X') +function colwise(d::AbstractBinaryOp, x::AbstractVector) + return @tullio out[i] := d(x[i], x[i]) end function colwise(d::PreMetric, x::AbstractVector, y::AbstractVector) - return map(d, x, y) -end + return @tullio out[i] := d(x[i], y[i]) +end \ No newline at end of file diff --git a/src/distances/sinus.jl b/src/distances/sinus.jl index 4bcf4bdf0..1c063f41e 100644 --- a/src/distances/sinus.jl +++ b/src/distances/sinus.jl @@ -1,5 +1,5 @@ -struct Sinus{T} <: Distances.UnionSemiMetric - r::Vector{T} +struct Sinus{T,V<:AbstractVector{T}} <: Distances.SemiMetric + r::V end Distances.parameters(d::Sinus) = d.r