Upgrade ensmallen 1 14 4 (#10)

coatless · web-flow · commit 203630dbdea8 · 2019-05-13T13:16:45.000-05:00
* Update ensmallen 1.14.4

* Format news with PR numbers aligning to Ensmallen repo.

* Update version

* Add changelog entry

* Bump R version information
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,12 @@
+2019-05-12  James Balamuta  <balamut2@illinois.edu>
+
+	* DESCRIPTION (Version, Date): Release 1.14.4
+
+	* NEWS.md: Update for Ensmallen release 1.14.4
+
+	* inst/include/ensmallen_bits: Upgraded to Ensmallen 1.14.4
+	* inst/include/ensmallen.hpp: ditto
+
 2019-03-09  James Balamuta  <balamut2@illinois.edu>
 
 	* DESCRIPTION (Version, Date): Release 1.14.1
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: RcppEnsmallen
 Title: Header-Only C++ Mathematical Optimization Library for 'Armadillo'
-Version: 0.1.14.1.1
+Version: 0.1.14.4.1
 Authors@R: c(
     person("James Joseph", "Balamuta", email = "balamut2@illinois.edu", 
            role = c("aut", "cre", "cph"), 
diff --git a/NEWS.md b/NEWS.md
@@ -1,18 +1,27 @@
+# RcppEnsmallen 0.1.14.4.1
+
+- Upgraded to ensmallen release 1.14.4 "Difficult Crimp" (2019-05-12)
+   - Fixes for BigBatchSGD ([#91](https://github.com/mlpack/ensmallen/pull/91)).
+   - Handle eig_sym() failures correctly ([#100](https://github.com/mlpack/ensmallen/pull/100)).
+   - SPSA test tolerance fix ([#97](https://github.com/mlpack/ensmallen/pull/97)).
+   - Minor documentation fixes ([#95](https://github.com/mlpack/ensmallen/pull/95), [#98](https://github.com/mlpack/ensmallen/pull/98)).
+   - Fix newlines at end of file ([#92](https://github.com/mlpack/ensmallen/pull/92)).
+
 # RcppEnsmallen 0.1.14.1.1
 
 - Upgraded to ensmallen release 1.14.1 "Difficult Crimp" (2019-03-09)
-   - Fixes for SPSA (#87).
-   - Optimized CNE and DE (#90). Changed initial population generation 
+   - Fixes for SPSA ([#87](https://github.com/mlpack/ensmallen/pull/87)).
+   - Optimized CNE and DE ([#90](https://github.com/mlpack/ensmallen/pull/90)). Changed initial population generation 
      in CNE to be a normal distribution about the given starting point, 
      which should accelerate convergence.
-   - Add DE optimizer (#77).
-   - Fix for Cholesky decomposition in CMAES (#83).
+   - Add DE optimizer ([#77](https://github.com/mlpack/ensmallen/pull/77)).
+   - Fix for Cholesky decomposition in CMAES ([#83](https://github.com/mlpack/ensmallen/pull/83)).
 
 # RcppEnsmallen 0.1.13.0.1
 
 - Upgraded to ensmallen release 1.13.0 "Coronavirus Invasion" (2019-01-14)
-   - Enhance options for AugLagrangian optimizer (#66).
-   - Add SPSA optimizer (#69).
+   - Enhance options for AugLagrangian optimizer ([#66](https://github.com/mlpack/ensmallen/pull/66)).
+   - Add SPSA optimizer ([#69](https://github.com/mlpack/ensmallen/pull/69)).
    - Fix list of contributors.
    - Make sure all files end with newlines.
 - Reordered SPSA parameters to quiet initialization error surfaced with `-Wreorder`.
@@ -22,12 +31,12 @@
 - Upgraded to ensmallen release 1.12.0 "New Year's Party" (2018-12-30)
    - Add link to ensmallen PDF to README.md.
    - Minor documentation fixes.  Remove too-verbose documentation from source for
-     each optimizer (#61).
-   - Add FTML optimizer (#48).
-   - Add SWATS optimizer (#42).
-   - Add Padam optimizer (#46).
-   - Add Eve optimizer (#45).
-   - Add ResetPolicy() to SGD-like optimizers (#60).
+     each optimizer ([#61](https://github.com/mlpack/ensmallen/pull/61)).
+   - Add FTML optimizer ([#48](https://github.com/mlpack/ensmallen/pull/48)).
+   - Add SWATS optimizer ([#42](https://github.com/mlpack/ensmallen/pull/42)).
+   - Add Padam optimizer ([#46](https://github.com/mlpack/ensmallen/pull/46)).
+   - Add Eve optimizer ([#45](https://github.com/mlpack/ensmallen/pull/45)).
+   - Add ResetPolicy() to SGD-like optimizers ([#60](https://github.com/mlpack/ensmallen/pull/60)).
 - Updated citation information
 
 # RcppEnsmallen 0.1.11.1.1
diff --git a/cran-comments.md b/cran-comments.md
@@ -1,7 +1,7 @@
 ## Test environments
 
-* local OS X install, R 3.5.2
-* ubuntu 14.04 (on travis-ci), R 3.5.2
+* local OS X install, R 3.6.0
+* ubuntu 14.04 (on travis-ci), R 3.6.0
 * win-builder (devel and release)
 
 ## R CMD check results
diff --git a/inst/include/ensmallen_bits/bigbatch_sgd/adaptive_stepsize.hpp b/inst/include/ensmallen_bits/bigbatch_sgd/adaptive_stepsize.hpp
@@ -48,7 +48,7 @@ class AdaptiveStepsize
    * @param searchParameter The backtracking search parameter for each
    *        iteration.
    */
-  AdaptiveStepsize(const double backtrackStepSize = 0.1,
+  AdaptiveStepsize(const double backtrackStepSize = 0.5,
                    const double searchParameter = 0.1) :
       backtrackStepSize(backtrackStepSize),
       searchParameter(searchParameter)
@@ -73,9 +73,9 @@ class AdaptiveStepsize
   void Update(DecomposableFunctionType& function,
               double& stepSize,
               arma::mat& iterate,
-              const arma::mat& gradient,
-              const double gradientNorm,
-              const double sampleVariance,
+              arma::mat& gradient,
+              double& gradientNorm,
+              double& sampleVariance,
               const size_t offset,
               const size_t batchSize,
               const size_t backtrackingBatchSize,
@@ -87,6 +87,55 @@ class AdaptiveStepsize
     // Update the iterate.
     iterate -= stepSize * gradient;
 
+    // Update Gradient & calculate curvature of quadratic approximation.
+    arma::mat functionGradient(iterate.n_rows, iterate.n_cols);
+    arma::mat gradPrevIterate(iterate.n_rows, iterate.n_cols);
+    arma::mat functionGradientPrev(iterate.n_rows, iterate.n_cols);
+
+    double vB = 0;
+    arma::mat delta0, delta1;
+
+    // Initialize previous iterate, if not already initialized.
+    if (iteratePrev.is_empty())
+    {
+      iteratePrev.zeros(iterate.n_rows, iterate.n_cols);
+    }
+
+    // Compute the stochastic gradient estimation.
+    function.Gradient(iterate, offset, gradient, 1);
+    function.Gradient(iteratePrev, offset, gradPrevIterate, 1);
+
+    delta1 = gradient;
+
+    for (size_t j = 1, k = 1; j < backtrackingBatchSize; ++j, ++k)
+    {
+      function.Gradient(iterate, offset + j, functionGradient, 1);
+      delta0 = delta1 + (functionGradient - delta1) / k;
+
+      // Compute sample variance.
+      vB += arma::norm(functionGradient - delta1, 2.0) *
+          arma::norm(functionGradient - delta0, 2.0);
+
+      delta1 = delta0;
+      gradient += functionGradient;
+
+      // Used for curvature calculation.
+      function.Gradient(iteratePrev, offset + j, functionGradientPrev, 1);
+      gradPrevIterate += functionGradientPrev;
+    }
+
+    // Update sample variance & norm of the gradient.
+    sampleVariance = vB;
+    gradientNorm = std::pow(arma::norm(gradient / backtrackingBatchSize, 2), 2.0);
+
+    // Compute curvature.
+    double v = arma::trace(arma::trans(iterate - iteratePrev) *
+        (gradient - gradPrevIterate)) /
+        std::pow(arma::norm(iterate - iteratePrev, 2), 2.0);
+
+    // Update previous iterate.
+    iteratePrev = iterate;
+
     // TODO: Develop an absolute strategy to deal with stepSizeDecay updates in
     // case we arrive at local minima. See #1469 for more details.
     double stepSizeDecay = 0;
@@ -95,11 +144,11 @@ class AdaptiveStepsize
       if (batchSize < function.NumFunctions())
       {
         stepSizeDecay = (1 - (1 / ((double) batchSize - 1) * sampleVariance) /
-            (batchSize * gradientNorm)) / batchSize;
+            (batchSize * gradientNorm)) / v;
       }
       else
       {
-        stepSizeDecay = 1 / function.NumFunctions();
+        stepSizeDecay = 1 / v;
       }
     }
 
@@ -153,7 +202,7 @@ class AdaptiveStepsize
         backtrackingBatchSize);
 
     while (overallObjectiveUpdate >
-        (overallObjective + searchParameter * stepSize * gradientNorm))
+        (overallObjective - searchParameter * stepSize * gradientNorm))
     {
       stepSize *= backtrackStepSize;
 
@@ -163,6 +212,9 @@ class AdaptiveStepsize
     }
   }
 
+  //! Last function parameters value.
+  arma::mat iteratePrev;
+
   //! The backtracking step size for each iteration.
   double backtrackStepSize;
 
diff --git a/inst/include/ensmallen_bits/bigbatch_sgd/backtracking_line_search.hpp b/inst/include/ensmallen_bits/bigbatch_sgd/backtracking_line_search.hpp
@@ -89,7 +89,7 @@ class BacktrackingLineSearch
         offset, backtrackingBatchSize);
 
     while (overallObjectiveUpdate >
-        (overallObjective + searchParameter * stepSize * gradientNorm))
+        (overallObjective - searchParameter * stepSize * gradientNorm))
     {
       stepSize /= 2;
 
diff --git a/inst/include/ensmallen_bits/ens_version.hpp b/inst/include/ensmallen_bits/ens_version.hpp
@@ -16,7 +16,7 @@
 // The minor version is two digits so regular numerical comparisons of versions
 // work right.  The first minor version of a release is always 10.
 #define ENS_VERSION_MINOR 14
-#define ENS_VERSION_PATCH 1
+#define ENS_VERSION_PATCH 4
 // If this is a release candidate, it will be reflected in the version name
 // (i.e. the version name will be "RC1", "RC2", etc.).  Otherwise the version
 // name will typically be a seemingly arbitrary set of words that does not
diff --git a/inst/include/ensmallen_bits/fw/atoms.hpp b/inst/include/ensmallen_bits/fw/atoms.hpp
@@ -89,7 +89,7 @@ class Atoms
       // Solve for current gradient.
       arma::mat x;
       RecoverVector(x);
-      arma::mat gradient(size(x));
+      arma::mat gradient(arma::size(x));
       function.Gradient(x, gradient);
 
       // Find possible atom to be deleted.
diff --git a/inst/include/ensmallen_bits/sdp/primal_dual_impl.hpp b/inst/include/ensmallen_bits/sdp/primal_dual_impl.hpp
@@ -132,7 +132,10 @@ Alpha(const arma::mat& A, const arma::mat& dA, double tau, double& alpha)
     return false;
   // TODO(stephentu): We only want the top eigenvalue, we should
   // be able to do better than full eigen-decomposition.
-  const arma::vec evals = arma::eig_sym(-Linv * dA * Linv.t());
+  arma::vec evals;
+  if (!arma::eig_sym(evals, -Linv * dA * Linv.t()))
+    return false;
+  
   const double alphahatinv = evals(evals.n_elem - 1);
   double alphahat = 1. / alphahatinv;
   if (alphahat < 0.)
diff --git a/inst/include/ensmallen_bits/sgdr/cyclical_decay.hpp b/inst/include/ensmallen_bits/sgdr/cyclical_decay.hpp
@@ -27,7 +27,6 @@ namespace ens {
  *
  * @code
  * @article{Loshchilov2016,
- *   title   = {Learning representations by back-propagating errors},
  *   author  = {Ilya Loshchilov and Frank Hutter},
  *   title   = {{SGDR:} Stochastic Gradient Descent with Restarts},
  *   journal = {CoRR},

Original file line number	Diff line number	Diff line change
`@@ -89,7 +89,7 @@ class BacktrackingLineSearch`
`89`	`89`	`offset, backtrackingBatchSize);`
`90`	`90`
`91`	`91`	`while (overallObjectiveUpdate >`
`92`		`- (overallObjective + searchParameter * stepSize * gradientNorm))`
	`92`	`+ (overallObjective - searchParameter * stepSize * gradientNorm))`
`93`	`93`	`{`
`94`	`94`	`stepSize /= 2;`
`95`	`95`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,6 @@ namespace ens {`
`27`	`27`	`*`
`28`	`28`	`* @code`
`29`	`29`	`* @article{Loshchilov2016,`
`30`		`- * title = {Learning representations by back-propagating errors},`
`31`	`30`	`* author = {Ilya Loshchilov and Frank Hutter},`
`32`	`31`	`* title = {{SGDR:} Stochastic Gradient Descent with Restarts},`
`33`	`32`	`* journal = {CoRR},`