Skip to content

Commit 492067e

Browse files
committed
official support for space-only density estimation (fixed smoothing level, KFoldCV, custom optimizer stopping criterion)
1 parent d610d3d commit 492067e

File tree

5 files changed

+151
-28
lines changed

5 files changed

+151
-28
lines changed

fdaPDE/models/density_estimation/depde.h

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,19 @@ template <typename RegularizationType_>
3030
class DEPDE : public DensityEstimationBase<DEPDE<RegularizationType_>, RegularizationType_> {
3131
private:
3232
using Base = DensityEstimationBase<DEPDE<RegularizationType_>, RegularizationType_>;
33-
double tol_ = 1e-5; // tolerance on custom stopping criterion
33+
double tol_ = 1e-5; // tolerance on custom stopping criterion
34+
DVector<double> g_init_; // density initialization basis expansion coefficients
3435
core::Optimizer<DEPDE<RegularizationType_>> opt_;
35-
DVector<double> g_init_;
3636
public:
3737
using RegularizationType = std::decay_t<RegularizationType_>;
3838
using This = DEPDE<RegularizationType>;
39-
using Base::grad_int_exp; // \nabla_g (\int_{\mathcal{D}} \exp(g))
40-
using Base::int_exp; // \int_{\mathcal{D}} \exp(g)
39+
using Base::grad_int_exp; // \nabla_g (\int_{\Omega} \exp(g))
40+
using Base::int_exp; // \int_{\Omega} \exp(g)
4141
using Base::n_locs; // overall number of data locations
42-
using Base::n_obs; // number of observations
43-
using Base::P; // discretized penalty matrix
42+
using Base::n_obs; // number of observations (!= n_locs if masked)
43+
using Base::P; // discretized penalty matrix R_1^\top * (R_0)^{-1} * R_1
4444
using Base::PsiQuad; // reference basis evaluation at quadrature nodes
45-
using Base::Upsilon; // \Upsilon_(i,:) = \Phi(i,:) \kron S(p_i) \Psi
45+
using Base::Upsilon; // \Upsilon_(i,:) = is_space_only<Model> ? \Psi : \Phi(i,:) \kron S(p_i) \Psi
4646

4747
// space-only constructor
4848
template <typename PDE_>
@@ -87,18 +87,35 @@ class DEPDE : public DensityEstimationBase<DEPDE<RegularizationType_>, Regulariz
8787
};
8888
}
8989
// optimization algorithm custom stopping criterion
90-
bool stopping_criterion(const DVector<double>& g) {
91-
double llik = -(Upsilon() * g).sum() + n_locs() * int_exp(g);
92-
double loss = llik + g.dot(P() * g);
93-
return llik > tol_ || loss > tol_;
90+
template <typename OptimizerType> bool opt_stopping_criterion(OptimizerType& opt) {
91+
// opt.x_old: density function before update step, opt.x_new: density function after update step
92+
bool stop = true;
93+
double llik_old = -(Upsilon() * opt.x_old).sum() + n_obs() * int_exp(opt.x_old);
94+
double llik_new = -(Upsilon() * opt.x_new).sum() + n_obs() * int_exp(opt.x_new);
95+
stop &= std::abs((llik_new - llik_old) / llik_old) < tol_;
96+
if(!stop) return false;
97+
double penD_old = Base::xtPDx(opt.x_old), penD_new = Base::xtPDx(opt.x_new); //opt.x_old.dot(Base::PD() * opt.x_old);
98+
stop &= std::abs((penD_new - penD_old) / penD_old) < tol_;
99+
if (!stop) return false;
100+
double loss_old = llik_old + Base::lambda_D() * penD_old, loss_new = llik_new + Base::lambda_D() * penD_new;
101+
// space-time case
102+
if constexpr (is_space_time_separable<This>::value) {
103+
double penT_old = Base::xtPTx(opt.x_old), penT_new = Base::xtPTx(opt.x_new);
104+
loss_old += Base::lambda_T() * penT_old;
105+
loss_new += Base::lambda_T() * penT_new;
106+
stop &= std::abs((penT_new - penT_old) / penT_old) < tol_;
107+
}
108+
stop &= std::abs((loss_new - loss_old) / loss_old) < tol_;
109+
return stop;
94110
}
95111
// call optimization algorithm for log-likelihood minimization
96112
void solve() { Base::g_ = opt_.optimize(*this, g_init_); }
97113
void init_model() { return; }
98114
// setters
99115
void set_tolerance(double tol) { tol_ = tol; }
100116
void set_g_init(const DVector<double>& g_init) { g_init_ = g_init; }
101-
template <typename Optimizer> void set_optimizer(Optimizer&& opt) { opt_ = opt; }
117+
void set_f_init(const DVector<double>& f_init) { g_init_ = f_init.array().log(); }
118+
template <typename OptimizerType> void set_optimizer(OptimizerType&& opt) { opt_ = opt; }
102119
// getters
103120
const DVector<double>& g_init() const { return g_init_; }
104121
};

fdaPDE/models/space_only_base.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ template <typename Model> class SpaceOnlyBase : public ModelBase<Model> {
7373
if (is_empty(P_)) { P_ = R1().transpose() * invR0().solve(R1()); }
7474
return P_;
7575
}
76+
// evaluation of x^\top*(R1^\top*R0^{-1}*R1)*x
77+
double xtPDx(const DVector<double>& x) const { return x.dot(PD() * x); }
7678
// evaluation of penalty term \lambda*(R1^\top*R0^{-1}*R1) at \lambda
7779
auto P(const SVector<n_lambda>& lambda) const { return lambda[0] * PD(); }
7880
auto P() const { return P(lambda()); }

fdaPDE/models/space_time_separable_base.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ class SpaceTimeSeparableBase : public SpaceTimeBase<Model, SpaceTimeSeparable> {
102102
}
103103
return PD_;
104104
}
105+
// evaluation of x^\top*PD*x and x^\top*PT*x
106+
double xtPDx(const DVector<double>& x) const { return x.dot(PD() * x); }
107+
double xtPTx(const DVector<double>& x) const { return x.dot(PT() * x); }
105108
// discretized penalty P = \lambda_D*(P0 \kron (R1^T*R0^{-1}*R1)) + \lambda_T*(P1 \kron R0)
106109
auto P(const SVector<n_lambda>& lambda) const { return lambda[0] * PD() + lambda[1] * PT(); }
107110
auto P() const { return P(Base::lambda()); }

test/src/density_estimation_test.cpp

Lines changed: 116 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@ using fdapde::core::fem_order;
2323
using fdapde::core::laplacian;
2424
using fdapde::core::PDE;
2525
using fdapde::core::Triangulation;
26+
using fdapde::core::GradientDescent;
27+
using fdapde::core::BacktrackingLineSearch;
28+
using fdapde::core::WolfeLineSearch;
29+
using fdapde::core::BFGS;
2630

2731
#include "../../fdaPDE/models/density_estimation/depde.h"
2832
#include "../../fdaPDE/models/sampling_design.h"
2933
using fdapde::models::DEPDE;
3034
using fdapde::models::Sampling;
35+
#include "../../fdaPDE/calibration/kfold_cv.h"
36+
using fdapde::calibration::KCV;
3137

3238
#include "utils/constants.h"
3339
#include "utils/mesh_loader.h"
@@ -38,30 +44,125 @@ using fdapde::testing::read_csv;
3844

3945
// test 1
4046
// domain: unit square [1,1] x [1,1]
41-
// sampling: locations = nodes
42-
// penalization: simple laplacian
43-
// covariates: no
44-
// BC: no
47+
// optimizer: BGFS, fixed step
48+
// lambda: fixed
4549
// order FE: 1
46-
TEST(depde_test, test1) {
47-
// define domain
50+
TEST(depde_test, fixed_smoothing_bfgs_fixed_step) {
51+
// define domain
4852
MeshLoader<Triangulation<2, 2>> domain("square_density");
4953
// define regularizing PDE
5054
auto L = -laplacian<FEM>();
5155
DMatrix<double> u = DMatrix<double>::Zero(domain.mesh.n_cells() * 3, 1);
52-
PDE<decltype(domain.mesh), decltype(L), DMatrix<double>, FEM, fem_order<1>> problem(domain.mesh, L, u);
56+
PDE<Triangulation<2, 2>, decltype(L), DMatrix<double>, FEM, fem_order<1>> problem(domain.mesh, L, u);
5357
// define model
5458
double lambda = 0.1;
5559
DEPDE<SpaceOnly> model(problem);
5660
model.set_lambda_D(lambda);
61+
model.set_tolerance(1e-15); // to let the optimization process stop for maximum number of iterations
62+
// set model's data
63+
BlockFrame<double, int> df;
64+
df.insert(SPACE_LOCS, read_csv<double>("../data/models/depde/2D_test1/data.csv"));
65+
model.set_data(df);
66+
model.set_optimizer(BFGS<fdapde::Dynamic> {500, 1e-5, 1e-2});
67+
model.set_g_init(read_csv<double>("../data/models/depde/2D_test1/f_init.csv").array().log());
68+
// solve density estimation problem
69+
model.init();
70+
model.solve(); // this stops at maximum number of iterations (expected)
71+
// test correctness
72+
EXPECT_TRUE(almost_equal(model.g(), "../data/models/depde/2D_test1/g.mtx"));
73+
}
74+
75+
// test 2
76+
// domain: unit square [1,1] x [1,1]
77+
// optimizer: gradient descent, backtracking adaptive step
78+
// lambda: fixed
79+
// order FE: 1
80+
TEST(depde_test, fixed_smoothing_gradient_descent_backtracking_step) {
81+
// define domain
82+
MeshLoader<Triangulation<2, 2>> domain("square_density");
83+
// define regularizing PDE
84+
auto L = -laplacian<FEM>();
85+
DMatrix<double> u = DMatrix<double>::Zero(domain.mesh.n_cells() * 3, 1);
86+
PDE<Triangulation<2, 2>, decltype(L), DMatrix<double>, FEM, fem_order<1>> problem(domain.mesh, L, u);
87+
// define model
88+
double lambda = 0.1;
89+
DEPDE<SpaceOnly> model(problem);
90+
model.set_lambda_D(lambda);
91+
// set model's data
92+
BlockFrame<double, int> df;
93+
df.insert(SPACE_LOCS, read_csv<double>("../data/models/depde/2D_test2/data.csv"));
94+
model.set_data(df);
95+
model.set_optimizer(GradientDescent<fdapde::Dynamic, BacktrackingLineSearch> {1000, 1e-5, 1e-2});
96+
model.set_g_init(read_csv<double>("../data/models/depde/2D_test2/f_init.csv").array().log());
97+
// solve density estimation problem
98+
model.init();
99+
model.solve();
100+
// test correctness
101+
EXPECT_TRUE(almost_equal(model.g(), "../data/models/depde/2D_test2/g.mtx"));
102+
}
57103

58-
DMatrix<double> locs = read_csv<double>("../data/models/depde/2D_test1/data.csv");
59-
DVector<double> f_init = read_csv<double>("../data/models/depde/2D_test1/f_init.csv");
60-
model.set_spatial_locations(locs);
104+
// test 3
105+
// domain: unit square [1,1] x [1,1]
106+
// optimizer: BFGS, wolfe adaptive step
107+
// lambda: fixed
108+
// order FE: 1
109+
TEST(depde_test, fixed_smoothing_bfgs_wolfe_step) {
110+
// define domain
111+
MeshLoader<Triangulation<2, 2>> domain("square_density");
112+
// define regularizing PDE
113+
auto L = -laplacian<FEM>();
114+
DMatrix<double> u = DMatrix<double>::Zero(domain.mesh.n_cells() * 3, 1);
115+
PDE<Triangulation<2, 2>, decltype(L), DMatrix<double>, FEM, fem_order<1>> problem(domain.mesh, L, u);
116+
// define model
117+
double lambda = 0.1;
118+
DEPDE<SpaceOnly> model(problem);
119+
model.set_lambda_D(lambda);
120+
// set model's data
121+
BlockFrame<double, int> df;
122+
df.insert(SPACE_LOCS, read_csv<double>("../data/models/depde/2D_test3/data.csv"));
123+
model.set_data(df);
124+
model.set_optimizer(BFGS<fdapde::Dynamic, WolfeLineSearch> {1000, 1e-5, 1e-2});
125+
model.set_g_init(read_csv<double>("../data/models/depde/2D_test3/f_init.csv").array().log());
126+
// solve density estimation problem
61127
model.init();
62-
63-
fdapde::core::BFGS<fdapde::Dynamic> opt(500, 1e-5, 1e-2);
64-
opt.optimize(model, f_init.array().log());
65-
66-
std::cout << opt.optimum() << std::endl;
128+
model.solve();
129+
// test correctness
130+
EXPECT_TRUE(almost_equal(model.g(), "../data/models/depde/2D_test3/g.mtx"));
131+
}
132+
133+
// test 4
134+
// domain: unit square [1,1] x [1,1]
135+
// optimizer: BFGS, backtracking adaptive step
136+
// lambda: K-fold cross validation, 5 folds
137+
// order FE: 1
138+
TEST(depde_test, kcv_smoothing_bfgs_backtracking_step) {
139+
// define domain
140+
MeshLoader<Triangulation<2, 2>> domain("square_density");
141+
// define regularizing PDE
142+
auto L = -laplacian<FEM>();
143+
DMatrix<double> u = DMatrix<double>::Zero(domain.mesh.n_cells() * 3, 1);
144+
PDE<Triangulation<2, 2>, decltype(L), DMatrix<double>, FEM, fem_order<1>> problem(domain.mesh, L, u);
145+
// define model
146+
DVector<double> lambda_vect;
147+
lambda_vect.resize(4);
148+
lambda_vect << 0.001, 0.01, 0.1, 1;
149+
DEPDE<SpaceOnly> model(problem);
150+
// set model's data
151+
BlockFrame<double, int> df;
152+
df.insert(SPACE_LOCS, read_csv<double>("../data/models/depde/2D_test4/data.csv"));
153+
model.set_data(df);
154+
model.set_optimizer(BFGS<fdapde::Dynamic, WolfeLineSearch> {1000, 1e-5, 1e-2});
155+
DMatrix<double> f_init = read_csv<double>("../data/models/depde/2D_test4/f_init.csv");
156+
// declare cross validation engine
157+
KCV kcv(5, 10);
158+
DVector<double> cv_error;
159+
cv_error.resize(lambda_vect.size());
160+
161+
for (int i = 0; i < lambda_vect.rows(); ++i) {
162+
model.set_g_init(f_init.col(i).array().log());
163+
kcv.fit(model, SVector<1>(lambda_vect[i]), DEPDE<SpaceOnly>::CVScore(model));
164+
cv_error[i] = kcv.avg_scores()[0];
165+
}
166+
// test correctness
167+
EXPECT_TRUE(almost_equal(cv_error, "../data/models/depde/2D_test4/cv_error.mtx"));
67168
}

0 commit comments

Comments
 (0)