gitouni
diff --git a/‎include/GPR.hpp‎
Lines changed: 150 additions & 100 deletions b/‎include/GPR.hpp‎
Lines changed: 150 additions & 100 deletions
diff --git a/‎include/HECalib.h‎
Lines changed: 1 addition & 0 deletions b/‎include/HECalib.h‎
Lines changed: 1 addition & 0 deletions
@@ -1,6 +1,8 @@
+#pragma once
 #include <Eigen/Dense>
 #include <LBFGSB.h>
 #include <tuple>
+#include <iostream>
 
 using number_t = double;
 
@@ -16,6 +18,18 @@ using MatrixN = Eigen::Matrix<T, N, N, Eigen::ColMajor>;
 template <typename T = number_t>
 using MatrixX = MatrixN<Eigen::Dynamic, T>;
 
+template <typename T = number_t>
+MatrixX<T> IdentityLike(const MatrixX<T> &A)
+{
+    assert(A.rows() == A.cols());
+    MatrixX<T> I;
+    I.resizeLike(A);
+    I.setZero();
+    for(Eigen::Index ri = 0; ri < I.rows(); ++ri)
+        I(ri, ri) = T(1.0);
+    return I;
+}
+
 template <typename T = number_t>
 void pdist(const std::vector<Vector2<T> > &X1, const std::vector<Vector2<T> > &X2, MatrixX<T> &Dist){
     Dist.resize(X1.size(), X2.size());
@@ -32,9 +46,9 @@ MatrixX<T> rbf_kernel_2d(const std::vector<Vector2<T> > &X1, const std::vector<V
     dist.resize(rows, cols);
     pdist(X1, X2, dist);
     T sigma2 = sigma * sigma;
-    T inv_l2 = 1/(l*l);
+    T inv_l2 = T(1.0)/(l*l);
     T inv_l3 = inv_l2 / l;
-    return sigma2 * (-0.5 * inv_l2 * dist).exp();  
+    return sigma2 * (-0.5 * inv_l2 * dist).array().exp();  
 }
 
 template <typename T = number_t>
@@ -46,7 +60,7 @@ std::tuple<MatrixX<T>, MatrixX<T>, MatrixX<T>> rbf_kernel_2d_with_grad(const std
     T sigma2 = sigma * sigma;
     T inv_l2 = 1/(l*l);
     T inv_l3 = inv_l2 / l;
-    MatrixX<T> K = (-0.5 * inv_l2 * dist).exp();  
+    MatrixX<T> K = (-0.5 * inv_l2 * dist).array().exp();  
     MatrixX<T> Jac_sigma = 2 * sigma * K;
     MatrixX<T> Jac_l = sigma2 * K * dist * inv_l3;
     return std::make_tuple(sigma2 * K, Jac_sigma, Jac_l);
@@ -59,8 +73,8 @@ std::tuple<MatrixX<T>, MatrixX<T>, MatrixX<T>> rbf_kernel_2d_with_grad(const std
  * @return double 
  */
 double LogDet(const MatrixX<> &A){
-    VectorX<> log_diag = A.diagonal().log();
-    return 2*log_diag.sum();
+    VectorX<> log_diag = A.diagonal().array().log();
+    return 2.0 * log_diag.sum();
 }
 
 class GPRParams{
@@ -85,7 +99,7 @@ class GPRCache{
     VectorX<T> alpha;
     T cache_atol = 1e-8;
 public:
-    GPRCache(){};
+    GPRCache(){}
     void init(int train_num){
         Kff.resize(train_num, train_num);
         L.resize(train_num, train_num);
@@ -114,6 +128,102 @@ class GPRCache{
 
 };
 
+class NLML{
+public:
+    /**
+     * @brief Construct a new Negative Log Marginal Likelihood Function Object
+     * 
+     * @param _Dist pdist matrix
+     * @param _trY train_y
+     * @param _sigma_noise _sigma_noise
+     */
+    NLML(const MatrixX<> &_Dist, const VectorX<> &_trY, const double &_sigma_noise):
+        Dist(_Dist), trY(_trY), sigma_noise(_sigma_noise){}
+     /**
+     * @brief negative log marginal likelihood
+     * 
+     * @param x 
+     * @param g 
+     * @return double 
+     */
+    double operator()(const Eigen::VectorXd &x, Eigen::VectorXd &g)
+    {
+        double sigma = x[0], l = x[1];
+        MatrixX<> Kff, L, Kinv;  // (N, N)
+        VectorX<> alpha;  // (N, 1)
+        std::tie(Kff, L ,alpha, Kinv) = compute_K_L_Alpha_Kinv(sigma, l, Dist, trY);
+        int n = alpha.rows();
+        double fval = 0.5 * (trY.dot(alpha) + LogDet(L) + n * log(2 * M_PI));
+        MatrixX<> dK_dsigma, dK_dl;  // (N, N)
+        std::tie(dK_dsigma, dK_dl) = grad_kernel(sigma, l, Kff);
+        MatrixX<> inner_term = alpha * alpha.transpose() - Kinv;  // (N, N)
+        g(0) = 0.5 * (inner_term * dK_dsigma).trace();  // Tr(N, N)
+        g(1) = 0.5 * (inner_term * dK_dl).trace();  // Tr(N, N)
+        stored_alpha = alpha;
+        return fval;
+    }
+
+    VectorX<> return_alpha()
+    {
+        return stored_alpha;
+    }
+
+private:
+    const double sigma_noise;
+    const MatrixX<> Dist;
+    const VectorX<> trY;
+    VectorX<> stored_alpha;
+
+private:
+    /**
+     * @brief Compute Kff, L, alpha, Kff_inv using Cached pdist and train_y
+     * 
+     * @tparam T typename
+     * @param sigma hyper-parameter
+     * @param l hyper-parameter
+     * @param Dist parwise dist
+     * @param train_y 
+     * @return std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>, atrixX<> > Kff, L, alpha, Kff_inv
+     */
+    std::tuple<MatrixX<>, MatrixX<>, VectorX<>, MatrixX<> > compute_K_L_Alpha_Kinv(const double sigma ,const double l, MatrixX<> Dist, VectorX<> train_y) const
+    {
+        assert(Dist.rows() == train_y.rows());
+        MatrixX<> Kff = computeCovariance(sigma, l, Dist);
+        Kff += sigma_noise * IdentityLike(Kff);
+        Eigen::LLT<MatrixX<>> llt(Kff);
+        assert(llt.info() == Eigen::Success);  // if failed, enlarge the sigma_noise to ensure the PSD of Kff
+        VectorX<> alpha = llt.solve(train_y);
+        MatrixX<> L = llt.matrixL();
+        MatrixX<> Kinv; // assign Identity first, then solve inplace
+        Kinv.resizeLike(Kff);
+        Kinv.setIdentity();
+        llt.solveInPlace(Kinv);
+        return {Kff, L, alpha, Kinv};
+    }
+
+    MatrixX<> computeCovariance(const double sigma, const double l, MatrixX<> Dist) const
+    {
+        return sigma * sigma * (-0.5 / (l*l) * Dist).array().exp();
+    }
+
+        /**
+     * @brief gradient of RBF Kenerl
+     * 
+     * @param sigma 
+     * @param l 
+     * @param Kff 
+     * @return std::tuple<double, double> dK_dsigma, dK_dl
+     */
+    std::tuple<MatrixX<>, MatrixX<>> grad_kernel(const double sigma, const double l, const MatrixX<> &Kff)
+    {
+        Eigen::Vector2d grad; // dK_dsigma, dK_dl
+        double inv_l3 = 1.0/(l*l*l);
+        return {2 * sigma * Kff, Kff * Dist  * inv_l3};
+    }
+
+};
+
+
 class GPR{
 public:
     double sigma_noise;
@@ -125,7 +235,6 @@ class GPR{
 
 private:
     MatrixX<> Dist;
-    GPRCache<> Cache;
     std::vector<Vector2<>> trX;
     VectorX<> trY;
     bool is_fit = false;
@@ -140,20 +249,23 @@ class GPR{
         sigma_noise(params.sigma_noise), sigma(params.sigma), l(params.l),
         lb(params.lb), ub(params.ub), optimize(params.optimize), verborse(params.verborse){}
 
-    void fit(const std::vector<Vector2<>> &train_x, const VectorX &train_y)
+    void fit(const std::vector<Vector2<>> &train_x, const VectorX<> &train_y)
     {
         trX = train_x;
         trY = train_y;
         pdist(train_x, train_x, Dist);
         if(optimize)
         {
-            LBFGSpp::LBFGSParam<double> param;
+            LBFGSpp::LBFGSBParam<double> param;
             param.epsilon = 1e-6;
             param.max_iterations = 30;
             LBFGSpp::LBFGSBSolver<double> solver(param);
-            Eigen::Vector2d theta = {sigma, l};
+            VectorX<> theta;
+            theta.resize(2);
+            theta(0) = sigma; theta(1) = l;
             double fval;
-            int niter = solver.minimize<double>(&optmize_func, theta, fval, lb, ub);
+            NLML func(Dist, trY, sigma_noise);
+            int niter = solver.minimize<NLML>(func, theta, fval, lb, ub);
             sigma = theta[0];
             l = theta[1];
             is_fit = true;
@@ -167,73 +279,23 @@ class GPR{
 
     }
 
-    double predict(const VectorX &test_x)
+    double predict(const VectorX<> &test_x)
     {
         assert(is_fit);
-        MatrixX<> Kstar = rbf_kernel_2d<>(trX, {test_x}, sigma, l);  // (N, 1)
+        VectorX<> Kstar = rbf_kernel_2d<>(trX, {test_x}, sigma, l);  // (N, 1)
         VectorX<> alpha;
-        if(Cache.allclose(sigma, l)){
-            alpha = Cache.alpha;
-        }else
-        {
-            std::tie(_, _, alpha) = compute_K_L_Alpha(sigma, l, Dist, trY);
-        }
-        return Kstar.transpose() * alpha;  // (1, N) * (N, 1) -> (1, 1)
+        std::tie(std::ignore, std::ignore, alpha) = compute_K_L_Alpha(sigma, l, Dist, trY);
+        return Kstar.dot(alpha);  // (1, N) * (N, 1) -> (1, 1)
     }
 
 private:
-     /**
-     * @brief negative log marginal likelihood
-     * 
-     * @param x 
-     * @param g 
-     * @return double 
-     */
-    double optmize_func(const Eigen::VectorXd &x, Eigen::VectorXd &g)
-    {
-        double sigma_f = x[0], l = x[1];
-        MatrixX<> Kff, L, Kinv;  // (N, N)
-        VectorX<> alpha;  // (N, 1)
-        std::tie(Kff, L ,alpha, Kinv) = compute_K_L_Alpha_Kinv(sigma_f, l, Dist, trY);
-        double fval = 0.5 * (trY.dot(alpha) + LogDet(L) + n * log(2 * M_PI));
-        MatrixX<> dK_dsigma, dK_dl;  // (N, N)
-        std::tie(dK_dsigma, dK_dl) = grad_kernel(sigma, l, Kff);
-        MatrixX<> inner_term = alpha * alpha.transpose() - Kinv;  // (N, N)
-        g(0) = 0.5 * (inner_term * dK_dsigma).trace();  // Tr(N, N)
-        g(1) = 0.5 * (inner_term * dK_dl).trace();  // Tr(N, N)
-        return fval;
-    }
+    
     MatrixX<> computeCovariance(const double sigma, const double l, MatrixX<> Dist) const
     {
-        return sigma * sigma * (-0.5 / (l*l) * Dist).exp();
+        return sigma * sigma * (-0.5 / (l*l) * Dist).array().exp();
     }
 
-    /**
-     * @brief Compute Kff, L, alpha, Kff_inv using Cached pdist and train_y
-     * 
-     * @tparam T typename
-     * @param sigma hyper-parameter
-     * @param l hyper-parameter
-     * @param Dist parwise dist
-     * @param train_y 
-     * @return std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>, atrixX<> > Kff, L, alpha, Kff_inv
-     */
-    std::tuple<MatrixX<>, MatrixX<>, VectorX<>, MatrixX<> > compute_K_L_Alpha_Kinv(const double sigma ,const double l, MatrixX<> Dist, VectorX<> train_y) const
-    {
-        assert(Dist.rows() == train_y.rows());
-        MatrixX<> Kff = computeCovariance<T>(sigma, l, Dist);
-        Kff += T(sigma_noise) * MatrixX<>::Identity();
-        Eigen::LLT<MatrixX<>> llt(Kff);
-        assert(llt.info() == Eigen::Success);  // if failed, enlarge the sigma_noise to ensure the PSD of Kff
-        VectorX<> alpha = llt.solve(train_y);
-        MatrixX<> L = llt.matrixL();
-        MatrixX<> Kinv; // assign Identity first, then solve inplace
-        Kinv.resize(Kff.rows(), Kff.cols());
-        Kinv.setIdentity();
-        llt.solveInPlace(Kinv);
-        Cache.assign(sigma, l, alpha);  // only need cached alpha for predict
-        return {Kff, L, alpha, Kinv};
-    }
+
 
     /**
      * @brief Compute Kff, L, alpha using Cached pdist and train_y
@@ -247,38 +309,24 @@ class GPR{
     std::tuple<MatrixX<>, MatrixX<>, VectorX<>> compute_K_L_Alpha(const double sigma ,const double l, MatrixX<> Dist, VectorX<> train_y) const
     {
         assert(Dist.rows() == train_y.rows());
-        MatrixX<> Kff = computeCovariance<T>(sigma, l, Dist);
-        Kff += T(sigma_noise) * MatrixX<>::Identity();
+        MatrixX<> Kff = computeCovariance(sigma, l, Dist);
+        Kff += sigma_noise * IdentityLike(Kff);
         Eigen::LLT<MatrixX<>> llt(Kff);
         assert(llt.info() == Eigen::Success);  // if failed, enlarge the sigma_noise to ensure the PSD of Kff
         VectorX<> alpha = llt.solve(train_y);
         MatrixX<> L = llt.matrixL();
-        Cache.assign(sigma, l, alpha);  // only need cached alpha for predict
         return {Kff, L, alpha};
     }
 
-    /**
-     * @brief gradient of RBF Kenerl
-     * 
-     * @param sigma 
-     * @param l 
-     * @param Kff 
-     * @return std::tuple<double, double> dK_dsigma, dK_dl
-     */
-    std::tuple<MatrixX<>, MatrixX<>> grad_kernel(const double sigma, const double l, const MatrixX<> &Kff)
-    {
-        Eigen::Vector2d grad; // dK_dsigma, dK_dl
-        double inv_l3 = 1.0/(l*l*l);
-        return {2 * sigma * Kff, Kff * Dist  * inv_l3};
-    }
+
 };
 
-template <class T>
+
 class TGPR{
 public:
-    T sigma_noise;
-    T sigma;
-    T l;
+    double sigma_noise;
+    double sigma;
+    double l;
     bool verborse;
 
 public:
@@ -287,27 +335,29 @@ class TGPR{
      * 
      * @param params GPRParams
      */
-    TGPR(const GPRParams &params, const MatrixX<T> &_Dist):
-        sigma_noise(params.sigma_noise), sigma(params.sigma), l(params.l),
-        verborse(params.verborse){}
+    TGPR(const double &_sigma_noise, const double &_sigma, const double &_l, const bool _verborse):
+        sigma_noise(_sigma_noise), sigma(_sigma), l(_l), verborse(_verborse){}
 
+    template <typename T>
     T fit_predict(const std::vector<Vector2<T>> &train_x, const VectorX<T> &train_y, const Vector2<T> &test_x){
         int N = train_x.size();
         MatrixX<T> Dist;
         pdist<T>(train_x, train_x, Dist);
         MatrixX<T> Kff;
         MatrixX<T> L;
         VectorX<T> alpha;
-        std::tie(Kff, L, alpha) = compute_K_L_Alpha<T>(sigma, l, Dist, train_y);
-        VectorX<T> Kstar = rbf_kernel_2d<T>(train_x, {test_x}, sigma, l);  // (N, 1)
+        T _sigma(sigma), _l(l);
+        std::tie(Kff, L, alpha) = compute_K_L_Alpha<T>(_sigma, _l, Dist, train_y);
+        VectorX<T> Kstar = rbf_kernel_2d<T>(train_x, {test_x}, _sigma, _l);  // (N, 1)
         T mu = Kstar.transpose() * alpha; // (1, N) * (N, 1) -> (1,)
         return mu;
     }
 
 private:    
+    template <typename T>
     MatrixX<T> computeCovariance(const T sigma, const T l, MatrixX<T> Dist) const
     {
-        return sigma * sigma * (-0.5 / (l*l) * Dist).exp();
+        return sigma * sigma * (-0.5 / (l*l) * Dist).array().exp();
     }
 
     /**
@@ -320,16 +370,16 @@ class TGPR{
      * @param train_y 
      * @return std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>> Kff, L, alpha
      */
+    template <typename T>
     std::tuple<MatrixX<T>, MatrixX<T>, VectorX<T>> compute_K_L_Alpha(const T sigma ,const T l, MatrixX<T> Dist, VectorX<T> train_y) const
     {
         assert(Dist.rows() == train_y.rows());
         MatrixX<T> Kff = computeCovariance<T>(sigma, l, Dist);
-        int N = Kff.cols();
-        Kff += T(sigma_noise) * MatrixN<N, T>::Identity();
-        Eigen::LLT<MatrixN<N, T>> llt(Kff);
+        Kff += T(sigma_noise) * IdentityLike(Kff);
+        Eigen::LLT<MatrixX<T>> llt(Kff);
         assert(llt.info() == Eigen::Success);  // if failed, enlarge the sigma_noise to ensure the PSD of Kff
-        VectorN<N, T> alpha = llt.solve(train_y);
-        MatrixN<N, T> L = llt.matrixL();
+        VectorX<T> alpha = llt.solve(train_y);
+        MatrixX<T> L = llt.matrixL();
         return {Kff, L, alpha};
     }
 
 
@@ -1,3 +1,4 @@
+#pragma once
 #include <Eigen/Dense>
 #include <vector>
 #include <iostream>
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+#pragma once`
`1`	`2`	`#include <Eigen/Dense>`
`2`	`3`	`#include <vector>`
`3`	`4`	`#include <iostream>`