Implement Nystrom approximation in nystrom.jl

IsakFalk · IsakFalk · commit eeaf1c6630b1 · 2020-01-26T15:56:00.000Z
diff --git a/Project.toml b/Project.toml
@@ -8,6 +8,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"
 
diff --git a/src/KernelFunctions.jl b/src/KernelFunctions.jl
@@ -14,6 +14,8 @@ export KernelSum, KernelProduct
 
 export Transform, SelectTransform, ChainTransform, ScaleTransform, LowRankTransform, IdentityTransform, FunctionTransform
 
+export NystromFact, nystrom
+
 using Compat
 using Distances, LinearAlgebra
 using SpecialFunctions: logabsgamma, besselk
@@ -41,6 +43,7 @@ include("matrix/kernelmatrix.jl")
 include("matrix/kernelpdmat.jl")
 include("kernels/kernelsum.jl")
 include("kernels/kernelproduct.jl")
+include("approximations/nystrom.jl")
 
 include("generic.jl")
 
diff --git a/src/approximations/nystrom.jl b/src/approximations/nystrom.jl
@@ -0,0 +1,103 @@
+using StatsBase
+using LinearAlgebra
+# Following the algorithm by William and Seeger, 2001
+# Cs is equivalent to X_mm and C to X_mn
+
+function sampleindex(X::AbstractMatrix, r::AbstractFloat; obsdim::Integer=defaultobs)
+    0 < r <= 1 || throw(ArgumentError("Sample rate `r` must be in range (0,1]"))
+    n = size(X, obsdim)
+    m = ceil(Int, n*r)
+    S = StatsBase.sample(collect(1:n), m; replace=false, ordered=true)
+    return S
+end
+
+function nystrom_sample(k::Kernel, X::AbstractMatrix, S::Vector{<:Integer}; obsdim::Integer=defaultobs)
+    obsdim ∈ [1, 2] || throw(ArgumentError("`obsdim` should be 1 or 2 (see docs of kernelmatrix))"))
+    Xₘ = obsdim == 1 ? getindex(X, S, :) : getindex(X, :, S)
+    C = k(Xₘ, X; obsdim=obsdim)
+    Cs = getindex(C, :, S)
+    return (C, Cs)
+end
+
+function nystrom_pinv!(Cs::Matrix{T}, tol::T=eps(T)*size(Cs,1)) where {T<:AbstractFloat}
+    # Compute eigendecomposition of sampled component of K
+    QΛQᵀ = LinearAlgebra.eigen!(LinearAlgebra.Symmetric(Cs))
+
+    # Solve for D = Λ^(-1/2) (pseudo inverse - use tolerance from before factorization)
+    D = QΛQᵀ.values
+    λ_tol = maximum(D)*tol
+
+    for i in eachindex(D)
+        @inbounds D[i] = abs(D[i]) <= λ_tol ? zero(T) : one(T)/sqrt(D[i])
+    end
+
+    # Scale eigenvectors by D
+    Q = QΛQᵀ.vectors
+    QD = LinearAlgebra.rmul!(Q, LinearAlgebra.Diagonal(D))  # Scales column i of Q by D[i]
+
+    # W := (QD)(QD)ᵀ = (QΛQᵀ)^(-1)  (pseudo inverse)
+    W = QD*QD'
+
+    # Symmetrize W
+    return LinearAlgebra.copytri!(W, 'U')
+end
+
+"""
+    NystromFact
+
+Type for storing a Nystrom factorization. The factorization contains two fields: `W` and
+`C`, two matrices satisfying:
+```math
+\mathbf{K} \approx \mathbf{C}^{\intercal}\mathbf{W}\mathbf{C}
+```
+"""
+struct NystromFact{T<:AbstractFloat}
+    W::Matrix{T}
+    C::Matrix{T}
+end
+
+@doc raw"""
+    nystrom(k::Kernel, X::Matrix, S::Vector; obsdim::Int=defaultobs)
+
+Computes a factorization of Nystrom approximation of the square kernel matrix of data
+matrix `X` with respect to kernel `k`. Returns a `NystromFact` struct which stores a
+Nystrom factorization satisfying:
+```math
+\mathbf{K} \approx \mathbf{C}^{\intercal}\mathbf{W}\mathbf{C}
+```
+"""
+function nystrom(k::Kernel, X::AbstractMatrix, S::Vector{<:Integer}; obsdim::Int=defaultobs)
+    C, Cs = nystrom_sample(k, X, S; obsdim=obsdim)
+    W = nystrom_pinv!(Cs)
+    T = typeof(first(W))
+    return NystromFact{T}(W, C)
+end
+
+@doc raw"""
+    nystrom(k::Kernel, X::Matrix, r::AbstractFloat; obsdim::Int=defaultobs)
+
+Computes a factorization of Nystrom approximation of the square kernel matrix of data
+matrix `X` with respect to kernel `k` using a sample ratio of `r`.
+Returns a `NystromFact` struct which stores a Nystrom factorization satisfying:
+```math
+\mathbf{K} \approx \mathbf{C}^{\intercal}\mathbf{W}\mathbf{C}
+```
+"""
+function nystrom(k::Kernel, X::AbstractMatrix, r::AbstractFloat; obsdim::Int=defaultobs)
+    S = sampleindex(X, r; obsdim=obsdim)
+    C, Cs = nystrom_sample(k, X, S; obsdim=obsdim)
+    W = nystrom_pinv!(Cs)
+    T = typeof(first(W))
+    return NystromFact{T}(W, C)
+end
+
+"""
+    nystrom(CᵀWC::NystromFact)
+
+Compute the approximate kernel matrix based on the Nystrom factorization.
+"""
+function kernelmatrix(CᵀWC::NystromFact{<:AbstractFloat})
+    W = CᵀWC.W
+    C = CᵀWC.C
+    return C'*W*C
+end