1+ #pragma once
12#include < Eigen/Dense>
23#include < LBFGSB.h>
34#include < tuple>
5+ #include < iostream>
46
57using number_t = double ;
68
@@ -16,6 +18,18 @@ using MatrixN = Eigen::Matrix<T, N, N, Eigen::ColMajor>;
1618template <typename T = number_t >
1719using MatrixX = MatrixN<Eigen::Dynamic, T>;
1820
21+ template <typename T = number_t >
22+ MatrixX<T> IdentityLike (const MatrixX<T> &A)
23+ {
24+ assert (A.rows () == A.cols ());
25+ MatrixX<T> I;
26+ I.resizeLike (A);
27+ I.setZero ();
28+ for (Eigen::Index ri = 0 ; ri < I.rows (); ++ri)
29+ I (ri, ri) = T (1.0 );
30+ return I;
31+ }
32+
1933template <typename T = number_t >
2034void pdist (const std::vector<Vector2<T> > &X1, const std::vector<Vector2<T> > &X2, MatrixX<T> &Dist){
2135 Dist.resize (X1.size (), X2.size ());
@@ -32,9 +46,9 @@ MatrixX<T> rbf_kernel_2d(const std::vector<Vector2<T> > &X1, const std::vector<V
3246 dist.resize (rows, cols);
3347 pdist (X1, X2, dist);
3448 T sigma2 = sigma * sigma;
35- T inv_l2 = 1 /(l*l);
49+ T inv_l2 = T ( 1.0 ) /(l*l);
3650 T inv_l3 = inv_l2 / l;
37- return sigma2 * (-0.5 * inv_l2 * dist).exp ();
51+ return sigma2 * (-0.5 * inv_l2 * dist).array (). exp ();
3852}
3953
4054template <typename T = number_t >
@@ -46,7 +60,7 @@ std::tuple<MatrixX<T>, MatrixX<T>, MatrixX<T>> rbf_kernel_2d_with_grad(const std
4660 T sigma2 = sigma * sigma;
4761 T inv_l2 = 1 /(l*l);
4862 T inv_l3 = inv_l2 / l;
49- MatrixX<T> K = (-0.5 * inv_l2 * dist).exp ();
63+ MatrixX<T> K = (-0.5 * inv_l2 * dist).array (). exp ();
5064 MatrixX<T> Jac_sigma = 2 * sigma * K;
5165 MatrixX<T> Jac_l = sigma2 * K * dist * inv_l3;
5266 return std::make_tuple (sigma2 * K, Jac_sigma, Jac_l);
@@ -59,8 +73,8 @@ std::tuple<MatrixX<T>, MatrixX<T>, MatrixX<T>> rbf_kernel_2d_with_grad(const std
5973 * @return double
6074 */
6175double LogDet (const MatrixX<> &A){
62- VectorX<> log_diag = A.diagonal ().log ();
63- return 2 * log_diag.sum ();
76+ VectorX<> log_diag = A.diagonal ().array (). log ();
77+ return 2.0 * log_diag.sum ();
6478}
6579
6680class GPRParams {
@@ -85,7 +99,7 @@ class GPRCache{
8599 VectorX<T> alpha;
86100 T cache_atol = 1e-8 ;
87101public:
88- GPRCache (){};
102+ GPRCache (){}
89103 void init (int train_num){
90104 Kff.resize (train_num, train_num);
91105 L.resize (train_num, train_num);
@@ -114,6 +128,102 @@ class GPRCache{
114128
115129};
116130
131+ class NLML {
132+ public:
133+ /* *
134+ * @brief Construct a new Negative Log Marginal Likelihood Function Object
135+ *
136+ * @param _Dist pdist matrix
137+ * @param _trY train_y
138+ * @param _sigma_noise _sigma_noise
139+ */
140+ NLML (const MatrixX<> &_Dist, const VectorX<> &_trY, const double &_sigma_noise):
141+ Dist (_Dist), trY(_trY), sigma_noise(_sigma_noise){}
142+ /* *
143+ * @brief negative log marginal likelihood
144+ *
145+ * @param x
146+ * @param g
147+ * @return double
148+ */
149+ double operator ()(const Eigen::VectorXd &x, Eigen::VectorXd &g)
150+ {
151+ double sigma = x[0 ], l = x[1 ];
152+ MatrixX<> Kff, L, Kinv; // (N, N)
153+ VectorX<> alpha; // (N, 1)
154+ std::tie (Kff, L ,alpha, Kinv) = compute_K_L_Alpha_Kinv (sigma, l, Dist, trY);
155+ int n = alpha.rows ();
156+ double fval = 0.5 * (trY.dot (alpha) + LogDet (L) + n * log (2 * M_PI));
157+ MatrixX<> dK_dsigma, dK_dl; // (N, N)
158+ std::tie (dK_dsigma, dK_dl) = grad_kernel (sigma, l, Kff);
159+ MatrixX<> inner_term = alpha * alpha.transpose () - Kinv; // (N, N)
160+ g (0 ) = 0.5 * (inner_term * dK_dsigma).trace (); // Tr(N, N)
161+ g (1 ) = 0.5 * (inner_term * dK_dl).trace (); // Tr(N, N)
162+ stored_alpha = alpha;
163+ return fval;
164+ }
165+
166+ VectorX<> return_alpha ()
167+ {
168+ return stored_alpha;
169+ }
170+
171+ private:
172+ const double sigma_noise;
173+ const MatrixX<> Dist;
174+ const VectorX<> trY;
175+ VectorX<> stored_alpha;
176+
177+ private:
178+ /* *
179+ * @brief Compute Kff, L, alpha, Kff_inv using Cached pdist and train_y
180+ *
181+ * @tparam T typename
182+ * @param sigma hyper-parameter
183+ * @param l hyper-parameter
184+ * @param Dist parwise dist
185+ * @param train_y
186+ * @return std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>, atrixX<> > Kff, L, alpha, Kff_inv
187+ */
188+ std::tuple<MatrixX<>, MatrixX<>, VectorX<>, MatrixX<> > compute_K_L_Alpha_Kinv (const double sigma ,const double l, MatrixX<> Dist, VectorX<> train_y) const
189+ {
190+ assert (Dist.rows () == train_y.rows ());
191+ MatrixX<> Kff = computeCovariance (sigma, l, Dist);
192+ Kff += sigma_noise * IdentityLike (Kff);
193+ Eigen::LLT<MatrixX<>> llt (Kff);
194+ assert (llt.info () == Eigen::Success); // if failed, enlarge the sigma_noise to ensure the PSD of Kff
195+ VectorX<> alpha = llt.solve (train_y);
196+ MatrixX<> L = llt.matrixL ();
197+ MatrixX<> Kinv; // assign Identity first, then solve inplace
198+ Kinv.resizeLike (Kff);
199+ Kinv.setIdentity ();
200+ llt.solveInPlace (Kinv);
201+ return {Kff, L, alpha, Kinv};
202+ }
203+
204+ MatrixX<> computeCovariance (const double sigma, const double l, MatrixX<> Dist) const
205+ {
206+ return sigma * sigma * (-0.5 / (l*l) * Dist).array ().exp ();
207+ }
208+
209+ /* *
210+ * @brief gradient of RBF Kenerl
211+ *
212+ * @param sigma
213+ * @param l
214+ * @param Kff
215+ * @return std::tuple<double, double> dK_dsigma, dK_dl
216+ */
217+ std::tuple<MatrixX<>, MatrixX<>> grad_kernel (const double sigma, const double l, const MatrixX<> &Kff)
218+ {
219+ Eigen::Vector2d grad; // dK_dsigma, dK_dl
220+ double inv_l3 = 1.0 /(l*l*l);
221+ return {2 * sigma * Kff, Kff * Dist * inv_l3};
222+ }
223+
224+ };
225+
226+
117227class GPR {
118228public:
119229 double sigma_noise;
@@ -125,7 +235,6 @@ class GPR{
125235
126236private:
127237 MatrixX<> Dist;
128- GPRCache<> Cache;
129238 std::vector<Vector2<>> trX;
130239 VectorX<> trY;
131240 bool is_fit = false ;
@@ -140,20 +249,23 @@ class GPR{
140249 sigma_noise (params.sigma_noise), sigma(params.sigma), l(params.l),
141250 lb (params.lb), ub(params.ub), optimize(params.optimize), verborse(params.verborse){}
142251
143- void fit (const std::vector<Vector2<>> &train_x, const VectorX &train_y)
252+ void fit (const std::vector<Vector2<>> &train_x, const VectorX<> &train_y)
144253 {
145254 trX = train_x;
146255 trY = train_y;
147256 pdist (train_x, train_x, Dist);
148257 if (optimize)
149258 {
150- LBFGSpp::LBFGSParam <double > param;
259+ LBFGSpp::LBFGSBParam <double > param;
151260 param.epsilon = 1e-6 ;
152261 param.max_iterations = 30 ;
153262 LBFGSpp::LBFGSBSolver<double > solver (param);
154- Eigen::Vector2d theta = {sigma, l};
263+ VectorX<> theta;
264+ theta.resize (2 );
265+ theta (0 ) = sigma; theta (1 ) = l;
155266 double fval;
156- int niter = solver.minimize <double >(&optmize_func, theta, fval, lb, ub);
267+ NLML func (Dist, trY, sigma_noise);
268+ int niter = solver.minimize <NLML>(func, theta, fval, lb, ub);
157269 sigma = theta[0 ];
158270 l = theta[1 ];
159271 is_fit = true ;
@@ -167,73 +279,23 @@ class GPR{
167279
168280 }
169281
170- double predict (const VectorX &test_x)
282+ double predict (const VectorX<> &test_x)
171283 {
172284 assert (is_fit);
173- MatrixX <> Kstar = rbf_kernel_2d<>(trX, {test_x}, sigma, l); // (N, 1)
285+ VectorX <> Kstar = rbf_kernel_2d<>(trX, {test_x}, sigma, l); // (N, 1)
174286 VectorX<> alpha;
175- if (Cache.allclose (sigma, l)){
176- alpha = Cache.alpha ;
177- }else
178- {
179- std::tie (_, _, alpha) = compute_K_L_Alpha (sigma, l, Dist, trY);
180- }
181- return Kstar.transpose () * alpha; // (1, N) * (N, 1) -> (1, 1)
287+ std::tie (std::ignore, std::ignore, alpha) = compute_K_L_Alpha (sigma, l, Dist, trY);
288+ return Kstar.dot (alpha); // (1, N) * (N, 1) -> (1, 1)
182289 }
183290
184291private:
185- /* *
186- * @brief negative log marginal likelihood
187- *
188- * @param x
189- * @param g
190- * @return double
191- */
192- double optmize_func (const Eigen::VectorXd &x, Eigen::VectorXd &g)
193- {
194- double sigma_f = x[0 ], l = x[1 ];
195- MatrixX<> Kff, L, Kinv; // (N, N)
196- VectorX<> alpha; // (N, 1)
197- std::tie (Kff, L ,alpha, Kinv) = compute_K_L_Alpha_Kinv (sigma_f, l, Dist, trY);
198- double fval = 0.5 * (trY.dot (alpha) + LogDet (L) + n * log (2 * M_PI));
199- MatrixX<> dK_dsigma, dK_dl; // (N, N)
200- std::tie (dK_dsigma, dK_dl) = grad_kernel (sigma, l, Kff);
201- MatrixX<> inner_term = alpha * alpha.transpose () - Kinv; // (N, N)
202- g (0 ) = 0.5 * (inner_term * dK_dsigma).trace (); // Tr(N, N)
203- g (1 ) = 0.5 * (inner_term * dK_dl).trace (); // Tr(N, N)
204- return fval;
205- }
292+
206293 MatrixX<> computeCovariance (const double sigma, const double l, MatrixX<> Dist) const
207294 {
208- return sigma * sigma * (-0.5 / (l*l) * Dist).exp ();
295+ return sigma * sigma * (-0.5 / (l*l) * Dist).array (). exp ();
209296 }
210297
211- /* *
212- * @brief Compute Kff, L, alpha, Kff_inv using Cached pdist and train_y
213- *
214- * @tparam T typename
215- * @param sigma hyper-parameter
216- * @param l hyper-parameter
217- * @param Dist parwise dist
218- * @param train_y
219- * @return std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>, atrixX<> > Kff, L, alpha, Kff_inv
220- */
221- std::tuple<MatrixX<>, MatrixX<>, VectorX<>, MatrixX<> > compute_K_L_Alpha_Kinv (const double sigma ,const double l, MatrixX<> Dist, VectorX<> train_y) const
222- {
223- assert (Dist.rows () == train_y.rows ());
224- MatrixX<> Kff = computeCovariance<T>(sigma, l, Dist);
225- Kff += T (sigma_noise) * MatrixX<>::Identity ();
226- Eigen::LLT<MatrixX<>> llt (Kff);
227- assert (llt.info () == Eigen::Success); // if failed, enlarge the sigma_noise to ensure the PSD of Kff
228- VectorX<> alpha = llt.solve (train_y);
229- MatrixX<> L = llt.matrixL ();
230- MatrixX<> Kinv; // assign Identity first, then solve inplace
231- Kinv.resize (Kff.rows (), Kff.cols ());
232- Kinv.setIdentity ();
233- llt.solveInPlace (Kinv);
234- Cache.assign (sigma, l, alpha); // only need cached alpha for predict
235- return {Kff, L, alpha, Kinv};
236- }
298+
237299
238300 /* *
239301 * @brief Compute Kff, L, alpha using Cached pdist and train_y
@@ -247,38 +309,24 @@ class GPR{
247309 std::tuple<MatrixX<>, MatrixX<>, VectorX<>> compute_K_L_Alpha (const double sigma ,const double l, MatrixX<> Dist, VectorX<> train_y) const
248310 {
249311 assert (Dist.rows () == train_y.rows ());
250- MatrixX<> Kff = computeCovariance<T> (sigma, l, Dist);
251- Kff += T ( sigma_noise) * MatrixX<>:: Identity ( );
312+ MatrixX<> Kff = computeCovariance (sigma, l, Dist);
313+ Kff += sigma_noise * IdentityLike (Kff );
252314 Eigen::LLT<MatrixX<>> llt (Kff);
253315 assert (llt.info () == Eigen::Success); // if failed, enlarge the sigma_noise to ensure the PSD of Kff
254316 VectorX<> alpha = llt.solve (train_y);
255317 MatrixX<> L = llt.matrixL ();
256- Cache.assign (sigma, l, alpha); // only need cached alpha for predict
257318 return {Kff, L, alpha};
258319 }
259320
260- /* *
261- * @brief gradient of RBF Kenerl
262- *
263- * @param sigma
264- * @param l
265- * @param Kff
266- * @return std::tuple<double, double> dK_dsigma, dK_dl
267- */
268- std::tuple<MatrixX<>, MatrixX<>> grad_kernel (const double sigma, const double l, const MatrixX<> &Kff)
269- {
270- Eigen::Vector2d grad; // dK_dsigma, dK_dl
271- double inv_l3 = 1.0 /(l*l*l);
272- return {2 * sigma * Kff, Kff * Dist * inv_l3};
273- }
321+
274322};
275323
276- template < class T >
324+
277325class TGPR {
278326public:
279- T sigma_noise;
280- T sigma;
281- T l;
327+ double sigma_noise;
328+ double sigma;
329+ double l;
282330 bool verborse;
283331
284332public:
@@ -287,27 +335,29 @@ class TGPR{
287335 *
288336 * @param params GPRParams
289337 */
290- TGPR (const GPRParams ¶ms, const MatrixX<T> &_Dist):
291- sigma_noise (params.sigma_noise), sigma(params.sigma), l(params.l),
292- verborse (params.verborse){}
338+ TGPR (const double &_sigma_noise, const double &_sigma, const double &_l, const bool _verborse):
339+ sigma_noise (_sigma_noise), sigma(_sigma), l(_l), verborse(_verborse){}
293340
341+ template <typename T>
294342 T fit_predict (const std::vector<Vector2<T>> &train_x, const VectorX<T> &train_y, const Vector2<T> &test_x){
295343 int N = train_x.size ();
296344 MatrixX<T> Dist;
297345 pdist<T>(train_x, train_x, Dist);
298346 MatrixX<T> Kff;
299347 MatrixX<T> L;
300348 VectorX<T> alpha;
301- std::tie (Kff, L, alpha) = compute_K_L_Alpha<T>(sigma, l, Dist, train_y);
302- VectorX<T> Kstar = rbf_kernel_2d<T>(train_x, {test_x}, sigma, l); // (N, 1)
349+ T _sigma (sigma), _l (l);
350+ std::tie (Kff, L, alpha) = compute_K_L_Alpha<T>(_sigma, _l, Dist, train_y);
351+ VectorX<T> Kstar = rbf_kernel_2d<T>(train_x, {test_x}, _sigma, _l); // (N, 1)
303352 T mu = Kstar.transpose () * alpha; // (1, N) * (N, 1) -> (1,)
304353 return mu;
305354 }
306355
307356private:
357+ template <typename T>
308358 MatrixX<T> computeCovariance (const T sigma, const T l, MatrixX<T> Dist) const
309359 {
310- return sigma * sigma * (-0.5 / (l*l) * Dist).exp ();
360+ return sigma * sigma * (-0.5 / (l*l) * Dist).array (). exp ();
311361 }
312362
313363 /* *
@@ -320,16 +370,16 @@ class TGPR{
320370 * @param train_y
321371 * @return std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>> Kff, L, alpha
322372 */
373+ template <typename T>
323374 std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>> compute_K_L_Alpha (const T sigma ,const T l, MatrixX<T> Dist, VectorX<T> train_y) const
324375 {
325376 assert (Dist.rows () == train_y.rows ());
326377 MatrixX<T> Kff = computeCovariance<T>(sigma, l, Dist);
327- int N = Kff.cols ();
328- Kff += T (sigma_noise) * MatrixN<N, T>::Identity ();
329- Eigen::LLT<MatrixN<N, T>> llt (Kff);
378+ Kff += T (sigma_noise) * IdentityLike (Kff);
379+ Eigen::LLT<MatrixX<T>> llt (Kff);
330380 assert (llt.info () == Eigen::Success); // if failed, enlarge the sigma_noise to ensure the PSD of Kff
331- VectorN<N, T> alpha = llt.solve (train_y);
332- MatrixN<N, T> L = llt.matrixL ();
381+ VectorX< T> alpha = llt.solve (train_y);
382+ MatrixX< T> L = llt.matrixL ();
333383 return {Kff, L, alpha};
334384 }
335385
0 commit comments