Skip to content

Commit e24d6e1

Browse files
committed
ENH: SMBO: Add weighted random sampling to further improve benchmarks
1 parent 7ac80ee commit e24d6e1

File tree

3 files changed

+98
-21
lines changed

3 files changed

+98
-21
lines changed

sambo/_smbo.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
_initialize_population,
1212
_sample_population,
1313
_check_bounds,
14-
_check_random_state, _sanitize_constraints, lru_cache,
14+
_check_random_state, _sanitize_constraints, lru_cache, recompute_kde, weighted_uniform_sampling,
1515
)
1616

1717

@@ -117,7 +117,7 @@ def __init__(
117117
max_iter: int = INT32_MAX,
118118
n_init: Optional[int] = None,
119119
n_candidates: Optional[int] = None,
120-
n_iter_no_change: int = 10,
120+
n_iter_no_change: int = 5,
121121
n_models: int = 1,
122122
tol: float = FLOAT32_PRECISION,
123123
estimator: Literal['gp', 'et', 'gb'] | _SklearnLikeRegressor = None,
@@ -147,7 +147,9 @@ def __init__(
147147
rng = _check_random_state(rng)
148148

149149
if n_init is None:
150-
n_init = 0 if not callable(fun) else min(max(1, max_iter - 20), 150 * len(bounds))
150+
n_init = (0 if not callable(fun) else
151+
min(max(1, max_iter - 20),
152+
int(40 * len(bounds) * max(1, np.log2(len(bounds))))))
151153
assert max_iter >= n_init, (max_iter, n_init)
152154

153155
if n_candidates is None:
@@ -201,6 +203,9 @@ def __init__(
201203
self._y = y
202204
assert len(X) == len(y), (X, y)
203205

206+
self._kde = None
207+
self._prev_y_min = np.inf
208+
204209
# Cache methods on the _instance_
205210
self._init_once = lru_cache(1)(self._init_once)
206211
self.top_k = lru_cache(1)(self.top_k)
@@ -334,8 +339,18 @@ def ask(
334339
assert isinstance(kappa, (Real, Iterable)), kappa
335340
self._init_once()
336341

337-
n_points = max(10_000, 1000 * int(len(self.bounds)**1.2))
338-
X = _sample_population(self.bounds, n_points, self.constraints, self.rng)
342+
n_points = min(80_000, 20_000 * int(len(self.bounds)**2)) # TODO: Make this a param?
343+
nfev = len(self._X)
344+
if nfev < 10 * len(self.bounds)**2:
345+
X = _sample_population(self.bounds, n_points, self.constraints, self.rng)
346+
else:
347+
y_min = np.min(self._y)
348+
if self._kde is None or (nfev < 200 or nfev % 5 == 0 or y_min < self._prev_y_min):
349+
self._prev_y_min = y_min
350+
self._kde = recompute_kde(np.array(self._X), np.array(self._y))
351+
X = weighted_uniform_sampling(
352+
self._kde, self.bounds, n_points, self.constraints, self.rng)
353+
339354
X, mean, std = self._predict(X)
340355
criterion = acq_func(mean=mean, std=std, kappa=kappa)
341356
n_candidates = min(n_candidates, criterion.shape[1])
@@ -552,7 +567,7 @@ def smbo(
552567
max_iter: int = INT32_MAX,
553568
n_init: Optional[int] = None,
554569
n_candidates: Optional[int] = None,
555-
n_iter_no_change: int = 10,
570+
n_iter_no_change: int = 5,
556571
n_models: int = 1,
557572
tol: float = FLOAT32_PRECISION,
558573
estimator: Optional[str | _SklearnLikeRegressor] = None,

sambo/_test.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
Bounds, NonlinearConstraint, rosen, minimize as scipy_minimize,
1414
shgo as scipy_shgo,
1515
)
16+
from scipy.stats import gaussian_kde
1617

1718
from sambo import minimize, Optimizer, SamboSearchCV
1819
from sambo._space import Space
1920
from sambo._sceua import sceua
2021
from sambo._shgo import shgo
2122
from sambo._smbo import smbo
2223
from sambo.plot import plot_convergence, plot_evaluations, plot_objective, plot_regret
23-
from sambo._util import OptimizeResult
24-
24+
from sambo._util import OptimizeResult, recompute_kde, weighted_uniform_sampling
2525

2626
minimize = partial(minimize, rng=0)
2727
sceua = partial(sceua, rng=0)
@@ -214,7 +214,7 @@ def test_sceua(self):
214214

215215
def test_smbo(self):
216216
res = minimize(**ROSEN_TEST_PARAMS, method='smbo', max_iter=20, estimator='gp')
217-
check_result(res, 0, atol=55)
217+
check_result(res, 0, atol=5)
218218

219219
def test_args(self):
220220
def f(x, a):
@@ -240,7 +240,7 @@ def f(x):
240240

241241
counter = 0
242242
_ = _minimize(f, method='smbo')
243-
self.assertEqual(counter, MAX_ITER)
243+
self.assertLessEqual(counter, MAX_ITER)
244244

245245
def test_constraints(self):
246246
def f(x):
@@ -313,16 +313,11 @@ def test_our_params_match_scipy_optimize_params(self):
313313

314314
class TestSklearnEstimators(unittest.TestCase):
315315
def test_estimator_factory(self):
316-
DEFAULT_KWARGS = {'max_iter': 50, 'n_iter_no_change': 10, 'rng': 0}
317-
ESTIMATOR_KWARGS = {
318-
'gp': {},
319-
'et': {'n_iter_no_change': 40},
320-
'gb': {'n_iter_no_change': 20, 'rng': 2},
321-
}
316+
DEFAULT_KWARGS = {'max_iter': 20, 'n_iter_no_change': 5, 'rng': 0}
322317
for estimator in BUILTIN_ESTIMATORS:
323318
with self.subTest(estimator=estimator):
324319
res = smbo(lambda x: sum((x-2)**2), bounds=[(-100, 100)], estimator=estimator,
325-
**dict(DEFAULT_KWARGS, **ESTIMATOR_KWARGS[estimator]))
320+
**dict(DEFAULT_KWARGS))
326321
self.assertLess(res.fun, 1, msg=res)
327322

328323
def test_SamboSearchCV_large_param_grid(self):
@@ -353,11 +348,17 @@ def test_SamboSearchCV_large_param_grid(self):
353348

354349
class TestDocs(unittest.TestCase):
355350
def test_make_doc_plots(self):
351+
KWARGS = {
352+
'shgo': dict(n_init=30),
353+
'smbo': dict(n_init=30),
354+
'sceua': dict(n_complexes=3),
355+
}
356356
results = [
357357
minimize(
358-
rosen, bounds=[(-2., 2.), (-2., 2.)],
359-
constraints=lambda x: sum(x**2) <= 2*len(x),
360-
max_iter=120, method=method, rng=2,
358+
rosen, bounds=[(-2., 2.)]*2,
359+
constraints=lambda x: sum(x**2) <= 2**len(x),
360+
max_iter=100, method=method, rng=2,
361+
**KWARGS.get(method, {}),
361362
)
362363
for method in BUILTIN_METHODS
363364
]
@@ -392,7 +393,7 @@ def test_make_doc_plots(self):
392393
def test_website_example1(self):
393394
res = minimize(
394395
rosen, bounds=[(-2., 2.), ] * 2,
395-
constraints=lambda x: sum(x**2) <= len(x),
396+
constraints=lambda x: sum(x**2) <= 2**len(x),
396397
n_init=7, method='shgo', rng=0,
397398
)
398399
print(type(res), res, sep='\n\n')
@@ -478,5 +479,35 @@ def test_annotations(self):
478479
annot[arg], annot_ref[arg], msg=f'{fun.__qualname__} / {arg}')
479480

480481

482+
class TestUtil(unittest.TestCase):
483+
def test_weighted_uniform_sampling(self):
484+
rng = np.random.default_rng(2)
485+
X = rng.uniform(-10, 10, (100, 2))
486+
y = rng.uniform(1, 10, 100)
487+
bounds = [(-10, 10), (-10, 10)]
488+
n_samples = 10000
489+
kde = recompute_kde(X, y)
490+
sampled_points = weighted_uniform_sampling(kde, bounds, n_samples, None, 0)
491+
492+
# Verify results
493+
hist, xedges, yedges = np.histogram2d(*sampled_points.T, range=bounds)
494+
# Compare histogram density with weight distribution
495+
kde = gaussian_kde(X.T, weights=(np.max(y) - y) / np.sum(np.max(y) - y))
496+
test_grid = np.array(np.meshgrid(xedges[:-1], yedges[:-1])).T.reshape(-1, 2)
497+
pdf_values = kde(test_grid.T).reshape(hist.shape)
498+
# Normalize for direct comparison
499+
hist_normalized = hist / np.sum(hist)
500+
pdf_normalized = pdf_values / np.sum(pdf_values)
501+
# Plot results
502+
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
503+
extent = np.array(bounds).flatten()
504+
axes[0].imshow(hist_normalized, extent=extent, origin='lower', cmap='Blues')
505+
axes[1].imshow(pdf_normalized, extent=extent, origin='lower', cmap='Reds')
506+
plt.show()
507+
508+
diff = np.abs(hist_normalized - pdf_normalized).mean()
509+
self.assertLess(diff, .05)
510+
511+
481512
if __name__ == '__main__':
482513
unittest.main()

sambo/_util.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import heapq
22
from functools import lru_cache as _lru_cache, wraps
3+
from itertools import islice
34
from numbers import Integral, Real
45
from threading import Lock
56
from typing import Any, Callable, Optional, Protocol, runtime_checkable
67

78
import numpy as np
89
from scipy.optimize import Bounds, NonlinearConstraint, OptimizeResult as _OptimizeResult
10+
from scipy.stats import gaussian_kde
911
from scipy.stats.qmc import LatinHypercube
1012

1113
FLOAT32_PRECISION = 10**-np.finfo(np.float32).precision
@@ -276,3 +278,32 @@ def constraints(x, *, _c=constraints):
276278
def constraints(x, *, _c=constraints['fun']):
277279
return _c(x) == 0
278280
return constraints
281+
282+
283+
def weighted_uniform_sampling(kde, bounds, size, constraints, rng):
284+
"""Sample points from a weighted density within given bounds"""
285+
rng = _check_random_state(rng)
286+
lb, ub = np.array(bounds).T
287+
if constraints is None:
288+
def constraints(_):
289+
return True
290+
try:
291+
points = np.array(
292+
list(islice(
293+
(x for _ in range(1000)
294+
for x in kde.resample(size, seed=rng).T
295+
if constraints(x) and np.all((lb <= x) & (x <= ub))),
296+
size)))
297+
except ValueError as ex:
298+
if 'too short' in str(ex):
299+
raise RuntimeError('Constraints seemingly cannot be satisfied.')
300+
raise
301+
return points
302+
303+
304+
def recompute_kde(X, y):
305+
w = np.max(y) - y
306+
w /= np.sum(w)
307+
w **= 3
308+
kde = gaussian_kde(X.T, bw_method='silverman', weights=w)
309+
return kde

0 commit comments

Comments
 (0)