Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 10 additions & 20 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,34 +1,26 @@
name = "VecchiaMLE"
uuid = "499233a1-c46f-47cc-ab67-5a37788e9870"
authors = ["Caleb Derrickson (@CalebDerrickson), Alexis Montoison (@amontoison)"]
version = "0.1.0"
authors = ["Caleb Derrickson (@CalebDerrickson), Alexis Montoison (@amontoison)"]

[deps]
BesselK = "432ab697-7a72-484f-bc4a-bc531f5c819b"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
HNSW = "540f64fa-c57e-11e8-081c-41422cda4629"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[weakdeps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Vecchia = "8d73829f-f4b0-474a-9580-cecc8e084068"

[extensions]
VecchiaMLECUDAExt = "CUDA"
VecchiaMLEVecchiaExt = ["StaticArrays", "Vecchia"]

[compat]
BesselK = "0.5.7"
CUDA = "5.8.2"
Distances = "0.10.12"
Distributions = "0.25.120"
HNSW = "0.1.5"
HSL = "0.5.1"
Ipopt = "1.10.6"
JuMP = "1.27.0"
Expand All @@ -41,27 +33,25 @@ NLPModels = "0.21.5"
NLPModelsIpopt = "0.10.4, 0.11"
NLPModelsJuMP = "0.13.2"
NLPModelsTest = "0.10.2"
NearestNeighbors = "0.4.22"
Random = "1.10"
SparseArrays = "1.10"
SpecialFunctions = "2.5.1"
Statistics = "1.10"
Test = "1.10"
UnoSolver = "0.1.9"
julia = "1.10"

[extras]
HSL = "34c5aeac-e683-54a6-a0e9-6e0fdc586c50"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
MadNLPGPU = "d72a61cc-809d-412f-99be-fd81f4b8a598"
MadNLPHSL = "7fb6135f-58fe-4112-84ca-653cf5be0c77"
NLPModelsIpopt = "f4238b75-b362-5c4c-b852-0801c9a21d71"
UnoSolver = "1baa60ac-02f7-4b39-a7a8-2f4f58486b05"
HSL = "34c5aeac-e683-54a6-a0e9-6e0fdc586c50"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e"
NLPModelsTest = "7998695d-6960-4d3a-85c4-e1bceb8cd856"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UnoSolver = "1baa60ac-02f7-4b39-a7a8-2f4f58486b05"

[targets]
test = ["CUDA", "Test", "JuMP", "Ipopt", "NLPModelsIpopt", "NLPModelsJuMP", "UnoSolver", "MadNLP", "MadNLPGPU", "MadNLPHSL", "NLPModelsTest", "HSL"]
test = ["Test", "JuMP", "Ipopt", "NLPModelsIpopt", "NLPModelsJuMP", "UnoSolver", "MadNLP", "MadNLPGPU", "MadNLPHSL", "NLPModelsTest", "HSL", "StableRNGs", "CUDA", "Vecchia", "StaticArrays"]
5 changes: 5 additions & 0 deletions examples/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[deps]
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Vecchia = "8d73829f-f4b0-474a-9580-cecc8e084068"
VecchiaMLE = "499233a1-c46f-47cc-ab67-5a37788e9870"
43 changes: 43 additions & 0 deletions examples/with_vecchia.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

using VecchiaMLE, Vecchia, StaticArrays

# Generate some fake data with an exponential covariance function.
# Note that each _column_ of sim is an iid replicate, in keeping with formatting
# standards of the many R packages for GPs. In this demonstration, n is small
# and the number of replicates is large to demonstrate asymptotic correctness.
pts = rand(SVector{2,Float64}, 100)
z = randn(length(pts), 1000)
K = Symmetric([exp(-norm(x-y)) for x in pts, y in pts])
sim = cholesky(K).L*z

# Create a VecchiaModel using the options specified by Vecchia.jl, in
# particular choosing an ordering (in this case RandomOrdering()) and a
# conditioning set design (in this case KNNConditioning(10)). This also returns
# the permutation for you to use with subsequent data operations. See the docs
# and myriad extensions of Vecchia.jl for more information on ordering and
# conditioning set design options.
(perm, nlp) = VecchiaModel(pts, sim, RandomOrdering(), KNNConditioning(10);
lvar_diag=fill(inv(sqrt(1.0)), length(pts)),
uvar_diag=fill(inv(sqrt(1e-2)), length(pts)),
lambda=1e-3)

# Now bring in some optimizers and fit the nonparametric model. This gives a U
# such that Σ^{-1} ≈ U*U', where Σ is the covariance matrix for each column of sim.
using MadNLP
result = madnlp(nlp; tol=1e-10)
U = UpperTriangular(recover_factor(nlp, result.solution))

# KL divergence from the true covariance:
K_perm = K[perm, perm]
kl = (tr(U'*K_perm*U) - length(pts)) + (-2*logdet(U) - logdet(K_perm))

# compare that with what you get from a parametric Vecchia model using the
# correct kernel and the same permutation.
para_vecchia = VecchiaApproximation(pts[perm], (x,y,p)->exp(-norm(x-y)), sim[perm,:];
ordering=NoPermutation())
para_U = rchol(para_vecchia, Float64[]).U
para_kl = (tr(para_U'*K_perm*para_U) - length(pts)) + (-2*logdet(para_U) - logdet(K_perm))

println("KL divergence with true parametric kernel: ", para_kl)
println("KL divergence with nonparametric estimator: ", kl)

2 changes: 2 additions & 0 deletions ext/VecchiaMLECUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ function VecchiaMLE.vecchia_generate_hess_tri_structure!(nnzh::Int, n::Int, colp
return nothing
end

#=
function VecchiaMLE.generate_samples(MatCov::CuMatrix{Float64}, number_of_samples::Int, ::Val{:gpu})
S = copy(MatCov)
V = CUDA.randn(Float64, number_of_samples, size(S, 1))
Expand All @@ -189,5 +190,6 @@ function VecchiaMLE.generate_samples(::CuMatrix{Float64}, ::Int, ::Val{:cpu})
end

VecchiaMLE.generate_samples(MatCov::CuMatrix{Float64}, number_of_samples::Int; arch::Symbol=:gpu) = generate_samples(MatCov, number_of_samples, Val(arch))
=#

end
35 changes: 35 additions & 0 deletions ext/VecchiaMLEVecchiaExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
module VecchiaMLEVecchiaExt

using VecchiaMLE, Vecchia, StaticArrays

function VecchiaMLE.VecchiaModel(pts::Vector{SVector{D,Float64}},
data::Matrix{Float64},
ordering, conditioning;
lvar_diag=fill(1e-10, length(pts)),
uvar_diag=fill(1e10, length(pts)),
lambda=0.0) where{D}
va = VecchiaApproximation(pts, nothing, data; ordering=ordering,
conditioning=conditioning)
VecchiaModel(va; lvar_diag=lvar_diag, uvar_diag=uvar_diag, lambda=lambda)
end

function VecchiaMLE.VecchiaModel(va::VecchiaApproximation;
lvar_diag=fill(1e-10, length(pts)),
uvar_diag=fill(1e10, length(pts)),
lambda=0.0)
(condsets, perm) = (va.condix, va.perm)
n = length(condsets)
nnz = sum(length, condsets) + length(condsets)
IJ = Vector{Tuple{Int64, Int64}}()
sizehint!(IJ, nnz)
for (j, cj) in enumerate(condsets)
foreach(k->push!(IJ, (k, j)), cj)
push!(IJ, (j,j))
end
(I, J) = (getindex.(IJ, 1), getindex.(IJ, 2))
(perm, VecchiaModel(I, J, permutedims(va.data);
uplo=:U, lvar_diag=lvar_diag,
uvar_diag=uvar_diag, lambda=lambda))
end

end
15 changes: 1 addition & 14 deletions src/VecchiaMLE.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,20 @@
module VecchiaMLE

using BesselK
using Distances
using Distributions
using HNSW
using KernelAbstractions
using LinearAlgebra
using NearestNeighbors
using NLPModels
using Random
using SparseArrays
using SpecialFunctions
using Statistics

const KA = KernelAbstractions

# Includes
include("defines.jl")
include("internals.jl")
include("utils.jl")
include("sparsity_pattern.jl")
include("permutations.jl")
include("kernels.jl")
include("models/VecchiaModel.jl")
include("models/api.jl")

# Exports
export VecchiaMLEInput, VecchiaModel
export sparsity_pattern, recover_factor
export generate_samples, generate_MatCov, generate_xyGrid, generate_rectGrid
export VecchiaModel, recover_factor

end
74 changes: 0 additions & 74 deletions src/defines.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
"""
Specification for the Sparsity Pattern generation algorithm.

## Supported Sparsity Patterns
- `NearestNeighbors` (`:NN`) : Based on https://github.com/KristofferC/NearestNeighbors.jl
- `HNSW` (`:HNSW`) : Based on https://github.com/JuliaNeighbors/HNSW.jl
- `Custom` (`:USERGIVEN`) : Given by user. You don't need to fill this field out, but the given sparsity pattern must be in CSC format.
"""
const SPARSITY_GEN = (:NN, :HNSW, :USERGIVEN)

"""
Internal struct from which to fetch persisting objects in the optimization function.
There is no need for a user to mess with this!
Expand Down Expand Up @@ -53,67 +43,3 @@ mutable struct VecchiaModel{T, S, VI, M} <: AbstractNLPModel{T, S}
counters::Counters
cache::VecchiaCache{T, S, VI, M}
end

"""
Input to the VecchiaMLE analysis.

## Fields

- `k::Int`: Number of neighbors, representing the number of conditioning points in the Vecchia Approximation.
- `samples::M`: Samples to generate the output. Each sample should match the length of the `observed_pts` vector.

## Keyword Arguments

* ptset::AbstractVector # The locations of the analysis. May be passed as a matrix or vector of vectors.
* rowsL::AbstractVector # The sparsity pattern rows of L if the user gives one. MUST BE IN CSC FORMAT!
* colptrL::AbstractVector # The column pointer of L if the user gives one. MUST BE IN CSC FORMAT!
* metric::Distances.metric # The metric by which nearest neighbors are determined. Defaults to Euclidean.
* sparsitygen::Symbol # The method by which to generate a sparsity pattern. See SPARSITY_GEN.
"""
mutable struct VecchiaMLEInput{M, V, VI}
n::Int
k::Int
samples::M
number_of_samples::Int
ptset::V
rowsL::VI
colptrL::VI
metric::Distances.Metric
sparsitygen::Symbol
end

function VecchiaMLEInput(
n::Int,
k::Int,
samples::M,
number_of_samples::Int;
ptset::V=generate_safe_xyGrid(n),
rowsL::VI=nothing,
colptrL::VI=nothing,
metric::Distances.Metric=Distances.Euclidean(),
sparsitygen::Symbol=:NN,
) where
{
M <: AbstractMatrix,
V <: Union{AbstractVector, AbstractMatrix},
VI <: Union{Nothing, AbstractVector},
}
ptset_ = resolve_ptset(n, ptset)
n_::Int = length(ptset_)

rowsL = resolve_rowsL(rowsL, n_, k)
colptrL = resolve_colptrL(colptrL, n_)
return VecchiaMLEInput{M, typeof(ptset_), typeof(rowsL)}(
n_,
k,
samples,
number_of_samples,
ptset_,
rowsL,
colptrL,
metric,
sparsitygen,
)
end

VecchiaMLEInput(k::Int, samples; kwargs...) = VecchiaMLEInput(size(samples, 2), k, samples, size(samples, 1); kwargs...)
Loading
Loading