Skip to content

Commit 288493f

Browse files
committed
Bugfix for 1-sided bounds, deterministic initialization, faster subproblem solves, faster linear algebra in Hessian and interpolation problems. This closes #3 and closes #5
1 parent 2ac32d8 commit 288493f

File tree

13 files changed

+175
-139
lines changed

13 files changed

+175
-139
lines changed

dfols/controller.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -326,22 +326,25 @@ def get_new_direction_for_growing(self, step_length):
326326

327327
def trust_region_step(self):
328328
# Build model for full least squares objectives
329-
gopt, hq = self.model.build_full_model()
330-
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, hq, self.model.sl, self.model.su, self.delta)
331-
return d, gopt, hq, gnew, crvmin
329+
gopt, H = self.model.build_full_model()
330+
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
331+
return d, gopt, H, gnew, crvmin
332332

333333
def geometry_step(self, knew, adelt, number_of_samples, params):
334334
logging.debug("Running geometry-fixing step")
335335
try:
336336
c, g = self.model.lagrange_gradient(knew)
337337
# c = 1.0 if knew == self.model.kopt else 0.0 # based at xopt, just like d
338338
# Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt
339-
xnew = trsbox_geometry(self.model.xopt(), c, g, self.model.sl, self.model.su, adelt)
339+
logging.debug("xopt = %s" % str(self.model.xopt()))
340+
logging.debug("sl = %s" % str(self.model.sl))
341+
logging.debug("su = %s" % str(self.model.su))
342+
xnew = trsbox_geometry(self.model.xopt(), c, g, np.minimum(self.model.sl, 0.0), np.maximum(self.model.su, 0.0), adelt)
340343
except LA.LinAlgError:
341344
exit_info = ExitInformation(EXIT_LINALG_ERROR, "Singular matrix encountered in geometry step")
342345
return exit_info # didn't fix geometry - return & quit
343346

344-
gopt, hq = self.model.build_full_model() # save here, to calculate predicted value from geometry step
347+
gopt, H = self.model.build_full_model() # save here, to calculate predicted value from geometry step
345348
fopt = self.model.fopt() # again, evaluate now, before model.change_point()
346349
d = xnew - self.model.xopt()
347350
x = self.model.as_absolute_coordinates(xnew)
@@ -362,8 +365,8 @@ def geometry_step(self, knew, adelt, number_of_samples, params):
362365
# Estimate actual reduction to add to diffs vector
363366
f = sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) # estimate actual objective value
364367

365-
# pred_reduction = - calculate_model_value(gopt, hq, d)
366-
pred_reduction = - model_value(gopt, hq, d)
368+
# pred_reduction = - calculate_model_value(gopt, H, d)
369+
pred_reduction = - model_value(gopt, H, d)
367370
actual_reduction = fopt - f
368371
self.diffs = [abs(pred_reduction - actual_reduction), self.diffs[0], self.diffs[1]]
369372
return None # exit_info = None
@@ -424,16 +427,19 @@ def choose_point_to_replace(self, d, skip_kopt=True):
424427
knew = None # may knew never be set here?
425428
exit_info = None
426429

430+
try:
431+
cs, gs = self.model.lagrange_gradient(k=None) # find all Lagrange polynomials for k in range(self.model.npt())
432+
except LA.LinAlgError:
433+
exit_info = ExitInformation(EXIT_LINALG_ERROR, "Singular matrix when choosing point to replace")
434+
return knew, exit_info
435+
427436
for k in range(self.model.npt()):
428437
if skip_kopt and k == self.model.kopt:
429438
continue # skip this k
430439

431-
# Build Lagrange polynomial
432-
try:
433-
c, g = self.model.lagrange_gradient(k)
434-
except LA.LinAlgError:
435-
exit_info = ExitInformation(EXIT_LINALG_ERROR, "Singular matrix when choosing point to replace")
436-
break # end & quit
440+
# Extract Lagrange polynomial (based at xopt)
441+
c = cs[k]
442+
g = gs[:, k]
437443

438444
den = c + np.dot(g, d)
439445

@@ -445,9 +451,9 @@ def choose_point_to_replace(self, d, skip_kopt=True):
445451

446452
return knew, exit_info
447453

448-
def done_with_current_rho(self, xnew, gnew, crvmin, hq, current_iter):
454+
def done_with_current_rho(self, xnew, gnew, crvmin, H, current_iter):
449455
# (xnew, gnew, crvmin) come from trust region step
450-
# hq is Hessian of model for the full objective
456+
# H is Hessian of model for the full objective
451457

452458
# Wait at least 3 iterations between reductions of rho
453459
if current_iter <= self.last_successful_iter + 2:
@@ -466,7 +472,7 @@ def done_with_current_rho(self, xnew, gnew, crvmin, hq, current_iter):
466472
if xnew[j] == self.model.su[j]:
467473
bdtest = -gnew[j]
468474
if bdtest < bdtol:
469-
curv = hq.get_element(j, j) # curv = Hessian(j, j)
475+
curv = H[j,j]
470476
bdtest += 0.5 * curv * self.rho
471477
if bdtest < bdtol:
472478
return False
@@ -489,10 +495,10 @@ def reduce_rho(self, current_iter, params):
489495
self.last_successful_iter = current_iter # reset successful iteration check
490496
return
491497

492-
def calculate_ratio(self, current_iter, rvec_list, d, gopt, hq):
498+
def calculate_ratio(self, current_iter, rvec_list, d, gopt, H):
493499
exit_info = None
494500
f = sumsq(np.mean(rvec_list, axis=0)) # estimate actual objective value
495-
pred_reduction = - model_value(gopt, hq, d)
501+
pred_reduction = - model_value(gopt, H, d)
496502
actual_reduction = self.model.fopt() - f
497503
self.diffs = [abs(actual_reduction - pred_reduction), self.diffs[0], self.diffs[1]]
498504
if min(sqrt(sumsq(d)), self.delta) > self.rho: # if ||d|| >= rho, successful!

dfols/model.py

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import numpy as np
3636
import scipy.linalg as LA
3737

38-
from .hessian import Hessian
3938
from .trust_region import trsbox_geometry
4039
from .util import sumsq
4140

@@ -119,7 +118,7 @@ def fopt(self):
119118
def xpt(self, k, abs_coordinates=False):
120119
assert 0 <= k < self.npt(), "Invalid index %g" % k
121120
if not abs_coordinates:
122-
return self.points[k, :].copy()
121+
return np.minimum(np.maximum(self.sl, self.points[k, :].copy()), self.su)
123122
else:
124123
# Apply bounds and convert back to absolute coordinates
125124
return self.xbase + np.minimum(np.maximum(self.sl, self.points[k, :]), self.su)
@@ -285,20 +284,23 @@ def factorise_geom_system(self):
285284
return
286285

287286
def solve_geom_system(self, rhs):
287+
# To do preconditioning below, we will need to scale each column of A elementwise by the entries of some vector
288+
col_scale = lambda A, scale: (A.T*scale).T # Uses the trick that A*x scales the 0th column of A by x[0], etc.
289+
288290
if self.factorisation_current:
289291
if self.qr_of_transpose:
290292
# Growing case: solve underdetermined system W*x=rhs with W.T = Q*R
291293
# Golub & Van Loan (3rd edn), Algorithm 5.7.2
292-
Rb = LA.solve_triangular(self.R, rhs * self.left_scaling, trans='T') # R.T \ rhs
293-
return np.dot(self.Q, Rb) * self.right_scaling # minimal norm solution
294+
Rb = LA.solve_triangular(self.R, col_scale(rhs, self.left_scaling), trans='T') # R.T \ rhs
295+
return col_scale(np.dot(self.Q, Rb), self.right_scaling) # minimal norm solution
294296
else:
295297
# Normal case: solve overdetermined system W*x=rhs with W=Q*R
296-
Qb = np.dot(self.Q.T, rhs * self.left_scaling)
297-
return LA.solve_triangular(self.R, Qb) * self.right_scaling
298+
Qb = np.dot(self.Q.T, col_scale(rhs, self.left_scaling))
299+
return col_scale(LA.solve_triangular(self.R, Qb), self.right_scaling)
298300
else:
299301
logging.warning("model.solve_geom_system not using factorisation")
300302
W, left_scaling, right_scaling = self.interpolation_matrix()
301-
return LA.lstsq(W, rhs * left_scaling)[0] * right_scaling
303+
return col_scale(LA.lstsq(W, col_scale(rhs * left_scaling))[0], right_scaling)
302304

303305
def interpolate_mini_models_svd(self, verbose=False, make_full_rank=False, min_sing_val=1e-6, sing_val_frac=1.0, max_jac_cond=1e8,
304306
get_chg_J=False):
@@ -314,31 +316,26 @@ def interpolate_mini_models_svd(self, verbose=False, make_full_rank=False, min_s
314316
norm_J_error = 0.0
315317
linalg_resid = 0.0
316318

317-
318319
if make_full_rank:
319320
# Remove old full-rank components of Jacobian
320321
Y = self.xpt_directions(include_kopt=False).T
321322
Qy, Ry = LA.qr(Y, mode='full') # Qy is (n,n), Ry is (n,npt-1)=(n,p)
322323
Qhat = Qy[:, :Y.shape[1]]
323324
self.model_jac = np.dot(self.model_jac, np.dot(Qhat, Qhat.T))
324-
for m1 in range(self.m()):
325-
g_old = self.model_jac[m1, :].copy()
326-
rhs = self.fval_v[fval_row_idx, m1] # length (npt)
327-
try:
328-
dg = self.solve_geom_system(rhs)
329-
except LA.LinAlgError:
330-
return False, None, None, None, None # flag error
331-
except ValueError:
332-
return False, None, None, None, None # flag error (e.g. inf or NaN encountered)
333-
334-
if not np.all(np.isfinite(dg)): # another check for inf or NaN
335-
return False, None, None, None, None # flag error
336325

337-
self.model_jac[m1, :] = dg[1:]
338-
self.model_const[m1] = dg[0] - np.dot(self.model_jac[m1, :], xopt) # shift base to xbase
339-
if verbose or get_chg_J:
340-
norm_J_error += sumsq(dg[1:] - g_old)
341-
linalg_resid += sumsq(np.dot(W, dg) - rhs)
326+
rhs = self.fval_v[fval_row_idx, :] # size npt * m
327+
try:
328+
dg = self.solve_geom_system(rhs) # size (n+1)*m
329+
except LA.LinAlgError:
330+
return False, None, None, None, None # flag error
331+
except ValueError:
332+
return False, None, None, None, None # flag error (e.g. inf or NaN encountered)
333+
J_old = self.model_jac.copy()
334+
self.model_jac = dg[1:,:].T
335+
self.model_const = dg[0,:] - np.dot(self.model_jac, xopt) # shift base to xbase
336+
if verbose or get_chg_J:
337+
norm_J_error = np.linalg.norm(self.model_jac - J_old, ord='fro')**2
338+
linalg_resid = np.linalg.norm(W.dot(dg) - rhs)**2
342339

343340
if make_full_rank:
344341
try:
@@ -368,20 +365,29 @@ def build_full_model(self):
368365

369366
# Apply scaling based on convention for objective - this code uses sumsq(rvec) not 0.5*sumsq(rvec)
370367
g = 2.0 * np.dot(J.T, r) # n-vector
371-
hess = Hessian(self.n(), vals=2.0 * np.dot(J.T, J))
372-
return g, hess
368+
H = 2.0 * np.dot(J.T, J)
369+
return g, H
373370

374-
def lagrange_gradient(self, k, factorise_first=True):
375-
assert 0 <= k < self.npt(), "Invalid index %g" % k
371+
def lagrange_gradient(self, k=None, factorise_first=True):
376372
if factorise_first:
377373
self.factorise_geom_system()
378374

379-
rhs = np.zeros((self.npt(),))
380-
rhs[k] = 1.0
375+
if k is not None:
376+
assert 0 <= k < self.npt(), "Invalid index %g" % k
377+
rhs = np.zeros((self.npt(),))
378+
rhs[k] = 1.0
379+
else:
380+
rhs = np.eye(self.npt()) # find all Lagrange polynomials
381381
soln = self.solve_geom_system(rhs)
382-
c = soln[0]
383-
g = soln[1:]
384-
return c, g # constant, gradient [all based at xopt]
382+
383+
if k is not None:
384+
c = soln[0]
385+
g = soln[1:]
386+
return c, g # constant, gradient [all based at xopt]
387+
else:
388+
cs = soln[0, :]
389+
gs = soln[1:, :]
390+
return cs, gs # constant terms in each entry and gradient terms in each col [all based at xopt]
385391

386392
def poisedness_constant(self, delta, xbase=None, xbase_in_abs_coords=True):
387393
# Calculate the poisedness constant of the current interpolation set in B(xbase, delta)
@@ -391,8 +397,13 @@ def poisedness_constant(self, delta, xbase=None, xbase_in_abs_coords=True):
391397
xbase = self.xopt()
392398
elif xbase_in_abs_coords:
393399
xbase = xbase - self.xbase # shift to correct position
400+
# Calculate all Lagrange polynomials at once
401+
self.factorise_geom_system()
402+
rhs = np.eye(self.npt()) # values to interpolate
403+
soln = self.solve_geom_system(rhs)
394404
for k in range(self.npt()):
395-
c, g = self.lagrange_gradient(k, factorise_first=True)
405+
# Extract Lagrange poly from soln matrix (based at xopt)
406+
c = soln[0,k]; g = soln[1:, k]
396407
newc = c + np.dot(g, xbase - self.xopt()) # based at xbase
397408
# Solve problem: bounds are sl <= x <= su, and ||x-xopt|| <= delta
398409
xmax = trsbox_geometry(xbase, newc, g, self.sl, self.su, delta)

dfols/params.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __init__(self, n, npt, maxfun, objfun_has_noise=False):
3939
self.params["general.safety_step_thresh"] = 0.5 # safety step called if ||d|| <= thresh * rho
4040
self.params["general.check_objfun_for_overflow"] = True
4141
# Initialisation
42-
self.params["init.random_initial_directions"] = True
42+
self.params["init.random_initial_directions"] = True if npt > (n+1)*(n+2)//2 else False # only if needed
4343
self.params["init.run_in_parallel"] = False # only available for random directions at the moment
4444
self.params["init.random_directions_make_orthogonal"] = True # although random > orthogonal, avoid for init
4545
# Interpolation

dfols/solver.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
256256

257257

258258
# Trust region step
259-
d, gopt, hq, gnew, crvmin = control.trust_region_step()
259+
d, gopt, H, gnew, crvmin = control.trust_region_step()
260260
logging.debug("Trust region step is d = " + str(d))
261261
xnew = control.model.xopt() + d
262262
dnorm = min(LA.norm(d), control.delta)
@@ -353,7 +353,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
353353
diagnostic_info.update_iter_type(ITER_SAFETY)
354354
diagnostic_info.update_slow_iter(-1)
355355

356-
if not control.done_with_current_rho(xnew, gnew, crvmin, hq, current_iter):
356+
if not control.done_with_current_rho(xnew, gnew, crvmin, H, current_iter):
357357
distsq = (10.0 * control.rho) ** 2
358358
number_of_samples = max(nsamples(control.delta, control.rho, current_iter, nruns_so_far), 1)
359359
update_delta = True # we do reduce delta for safety steps
@@ -497,7 +497,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
497497
break # quit
498498

499499
# Estimate f in order to compute 'actual reduction'
500-
ratio, exit_info = control.calculate_ratio(current_iter, rvec_list[:num_samples_run, :], d, gopt, hq)
500+
ratio, exit_info = control.calculate_ratio(current_iter, rvec_list[:num_samples_run, :], d, gopt, H)
501501
if exit_info is not None:
502502
if exit_info.able_to_do_restart() and params("restarts.use_restarts") and params(
503503
"restarts.use_soft_restarts"):
@@ -824,11 +824,14 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
824824
if bounds is None:
825825
xl = None
826826
xu = None
827-
scaling_within_bounds = False
828827
else:
829828
assert len(bounds) == 2, "bounds must be a 2-tuple of (lower, upper), where both are arrays of size(x0)"
830-
xl = bounds[0].astype(np.float)
831-
xu = bounds[1].astype(np.float)
829+
xl = bounds[0].astype(np.float) if bounds[0] is not None else None
830+
xu = bounds[1].astype(np.float) if bounds[1] is not None else None
831+
832+
if (xl is None or xu is None) and scaling_within_bounds:
833+
scaling_within_bounds = False
834+
warnings.warn("Ignoring scaling_within_bounds=True for unconstrained problem/1-sided bounds", RuntimeWarning)
832835

833836
if xl is None:
834837
xl = -1e20 * np.ones((n,)) # unconstrained

dfols/tests/test_model.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,11 @@ def runTest(self):
280280
rosenbrock(x2)-rosenbrock(model.xbase), thresh=1e-10), 'Wrong x2 (no constant)')
281281
self.assertTrue(array_compare(model.model_value(x2 - model.xbase, d_based_at_xopt=False, with_const_term=False),
282282
rosenbrock(x2) - rosenbrock(model.xbase), thresh=1e-10), 'Wrong x2 (no constant v2)')
283-
g, hess = model.build_full_model()
283+
g, H = model.build_full_model()
284284
r = rosenbrock(x1)
285285
J = model.model_jac
286286
self.assertTrue(array_compare(g, 2.0*np.dot(J.T, r), thresh=1e-10), 'Bad gradient')
287-
self.assertTrue(array_compare(hess.as_full(), 2.0*np.dot(J.T, J)), 'Bad Hessian')
287+
self.assertTrue(array_compare(H, 2.0*np.dot(J.T, J)), 'Bad Hessian')
288288

289289

290290
class TestGeomSystem(unittest.TestCase):
@@ -426,7 +426,7 @@ def runTest(self):
426426
self.assertTrue(array_compare(A_for_interp, A_after_scaling), 'Interp matrix 1')
427427
# For reference: model based around model.xbase
428428
interp_ok, interp_error, norm_J_error, linalg_resid, ls_interp_cond_num = model.interpolate_mini_models_svd()
429-
J_true = np.linalg.lstsq(A_for_interp, b)[0]
429+
J_true = np.linalg.lstsq(A_for_interp, b, rcond=None)[0]
430430
self.assertTrue(interp_ok, 'Interpolation failed')
431431
# print(model.model_const, model.model_jac)
432432
# print(J_true[0] - np.dot(J_true[1:], model.xopt()), J_true[1:])
@@ -468,7 +468,7 @@ def runTest(self):
468468
# For reference: model based around model.xbase
469469
interp_ok, interp_error, norm_J_error, linalg_resid, ls_interp_cond_num = model.interpolate_mini_models_svd()
470470
self.assertTrue(interp_ok, 'Interpolation failed')
471-
J_true = np.linalg.lstsq(A_for_interp, b)[0]
471+
J_true = np.linalg.lstsq(A_for_interp, b, rcond=None)[0]
472472
g = J_true[1:]
473473
c = J_true[0] - np.dot(g, model.xopt()) # centred at xbase
474474
self.assertTrue(array_compare(model.model_const, np.array([c])), 'Wrong constant term')

dfols/tests/test_params.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ def runTest(self):
3434
npt = n + 1
3535
maxfun = 50 * (n + 1)
3636
p = ParameterList(n, npt, maxfun)
37-
self.assertTrue(p("init.random_initial_directions"), 'Bad init dirns/access')
38-
p("init.random_initial_directions", False) # set to False
3937
self.assertFalse(p("init.random_initial_directions"), 'Bad init dirns/access')
38+
p("init.random_initial_directions", True) # set to True
39+
self.assertTrue(p("init.random_initial_directions"), 'Bad init dirns/access')
4040

4141

4242
class TestFail(unittest.TestCase):

dfols/tests/test_solver.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def runTest(self):
8787
print(soln.x)
8888
self.assertTrue(array_compare(soln.x, xmin, thresh=1e-2), "Wrong xmin")
8989
self.assertTrue(abs(soln.f - fmin) < 1e-4, "Wrong fmin")
90-
self.assertTrue(array_compare(soln.jacobian, jacmin, thresh=1e-3), "Wrong jacobian")
90+
self.assertTrue(array_compare(soln.jacobian, jacmin, thresh=2e-2), "Wrong jacobian")
9191

9292

9393
class TestLinear(unittest.TestCase):
@@ -98,7 +98,7 @@ def runTest(self):
9898
A = np.random.rand(m, n)
9999
b = np.random.rand(m)
100100
objfun = lambda x: np.dot(A, x) - b
101-
xmin = np.linalg.lstsq(A, b)[0]
101+
xmin = np.linalg.lstsq(A, b, rcond=None)[0]
102102
fmin = np.dot(objfun(xmin), objfun(xmin))
103103
x0 = np.zeros((n,))
104104
soln = dfols.solve(objfun, x0)

0 commit comments

Comments
 (0)