@@ -48,7 +48,7 @@ class AdaptiveStepsize
4848 * @param searchParameter The backtracking search parameter for each
4949 * iteration.
5050 */
51- AdaptiveStepsize (const double backtrackStepSize = 0.1 ,
51+ AdaptiveStepsize (const double backtrackStepSize = 0.5 ,
5252 const double searchParameter = 0.1 ) :
5353 backtrackStepSize (backtrackStepSize),
5454 searchParameter (searchParameter)
@@ -73,9 +73,9 @@ class AdaptiveStepsize
7373 void Update (DecomposableFunctionType& function,
7474 double & stepSize,
7575 arma::mat& iterate,
76- const arma::mat& gradient,
77- const double gradientNorm,
78- const double sampleVariance,
76+ arma::mat& gradient,
77+ double & gradientNorm,
78+ double & sampleVariance,
7979 const size_t offset,
8080 const size_t batchSize,
8181 const size_t backtrackingBatchSize,
@@ -87,6 +87,55 @@ class AdaptiveStepsize
8787 // Update the iterate.
8888 iterate -= stepSize * gradient;
8989
90+ // Update Gradient & calculate curvature of quadratic approximation.
91+ arma::mat functionGradient (iterate.n_rows , iterate.n_cols );
92+ arma::mat gradPrevIterate (iterate.n_rows , iterate.n_cols );
93+ arma::mat functionGradientPrev (iterate.n_rows , iterate.n_cols );
94+
95+ double vB = 0 ;
96+ arma::mat delta0, delta1;
97+
98+ // Initialize previous iterate, if not already initialized.
99+ if (iteratePrev.is_empty ())
100+ {
101+ iteratePrev.zeros (iterate.n_rows , iterate.n_cols );
102+ }
103+
104+ // Compute the stochastic gradient estimation.
105+ function.Gradient (iterate, offset, gradient, 1 );
106+ function.Gradient (iteratePrev, offset, gradPrevIterate, 1 );
107+
108+ delta1 = gradient;
109+
110+ for (size_t j = 1 , k = 1 ; j < backtrackingBatchSize; ++j, ++k)
111+ {
112+ function.Gradient (iterate, offset + j, functionGradient, 1 );
113+ delta0 = delta1 + (functionGradient - delta1) / k;
114+
115+ // Compute sample variance.
116+ vB += arma::norm (functionGradient - delta1, 2.0 ) *
117+ arma::norm (functionGradient - delta0, 2.0 );
118+
119+ delta1 = delta0;
120+ gradient += functionGradient;
121+
122+ // Used for curvature calculation.
123+ function.Gradient (iteratePrev, offset + j, functionGradientPrev, 1 );
124+ gradPrevIterate += functionGradientPrev;
125+ }
126+
127+ // Update sample variance & norm of the gradient.
128+ sampleVariance = vB;
129+ gradientNorm = std::pow (arma::norm (gradient / backtrackingBatchSize, 2 ), 2.0 );
130+
131+ // Compute curvature.
132+ double v = arma::trace (arma::trans (iterate - iteratePrev) *
133+ (gradient - gradPrevIterate)) /
134+ std::pow (arma::norm (iterate - iteratePrev, 2 ), 2.0 );
135+
136+ // Update previous iterate.
137+ iteratePrev = iterate;
138+
90139 // TODO: Develop an absolute strategy to deal with stepSizeDecay updates in
91140 // case we arrive at local minima. See #1469 for more details.
92141 double stepSizeDecay = 0 ;
@@ -95,11 +144,11 @@ class AdaptiveStepsize
95144 if (batchSize < function.NumFunctions ())
96145 {
97146 stepSizeDecay = (1 - (1 / ((double ) batchSize - 1 ) * sampleVariance) /
98- (batchSize * gradientNorm)) / batchSize ;
147+ (batchSize * gradientNorm)) / v ;
99148 }
100149 else
101150 {
102- stepSizeDecay = 1 / function. NumFunctions () ;
151+ stepSizeDecay = 1 / v ;
103152 }
104153 }
105154
@@ -153,7 +202,7 @@ class AdaptiveStepsize
153202 backtrackingBatchSize);
154203
155204 while (overallObjectiveUpdate >
156- (overallObjective + searchParameter * stepSize * gradientNorm))
205+ (overallObjective - searchParameter * stepSize * gradientNorm))
157206 {
158207 stepSize *= backtrackStepSize;
159208
@@ -163,6 +212,9 @@ class AdaptiveStepsize
163212 }
164213 }
165214
215+ // ! Last function parameters value.
216+ arma::mat iteratePrev;
217+
166218 // ! The backtracking step size for each iteration.
167219 double backtrackStepSize;
168220
0 commit comments