Skip to content

Commit a0bada5

Browse files
committed
Add iteration controller for BayesianOptimizer
1 parent d4d6777 commit a0bada5

File tree

2 files changed

+866
-347
lines changed

2 files changed

+866
-347
lines changed

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 272 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,24 @@
1818
Calibration with Bayesian Optimization
1919
"""
2020

21-
from dataclasses import dataclass, InitVar
21+
from dataclasses import dataclass, InitVar, field
2222
from typing import Mapping, Optional, Any, Union, List, Tuple
2323
from numbers import Number
2424
from itertools import combinations, repeat
25+
from collections import deque, namedtuple
26+
import logging
2527

2628
import pandas as pd
29+
import numpy as np
2730
import matplotlib.pyplot as plt
2831
import matplotlib.axes as maxes
29-
from bayes_opt import BayesianOptimization
32+
from bayes_opt import BayesianOptimization, Events, UtilityFunction
3033
from bayes_opt.target_space import TargetSpace
3134

32-
from .base import Output, Optimizer, OutputEvaluator
35+
from .base import Input, Output, Optimizer, OutputEvaluator
36+
37+
38+
LOGGER = logging.getLogger(__name__)
3339

3440

3541
@dataclass
@@ -176,6 +182,246 @@ def plot_single(x, y):
176182
return [plot_single(p_first, p_second) for p_first, p_second in iterable]
177183

178184

185+
Improvement = namedtuple(
186+
"Improvement", ["iteration", "sample", "random", "target", "improvement"]
187+
)
188+
189+
190+
class StopEarly(Exception):
191+
"""An exception for stopping an optimization iteration early"""
192+
193+
pass
194+
195+
196+
@dataclass(eq=False)
197+
class BayesianOptimizerController(object):
198+
"""A class for controlling the iterations of a :py:class:`BayesianOptimizer`.
199+
200+
Each iteration in the optimizer consists of a random sampling of the parameter space
201+
with :py:attr:`init_points` steps, followed by a Gaussian process sampling with
202+
:py:attr:`n_iter` steps. During the latter, the :py:attr:`kappa` parameter is
203+
reduced to reach :py:attr:`kappa_min` at the end of the iteration. The iteration is
204+
stopped prematurely if improvements of the buest guess are below
205+
:py:attr:`min_improvement` for :py:attr:`min_improvement_count` consecutive times.
206+
At the beginning of the next iteration, :py:attr:`kappa` is reset to its original
207+
value.
208+
209+
Optimization stops if :py:attr:`max_iterations` is reached or if an entire iteration
210+
saw now improvement.
211+
212+
Attributes
213+
----------
214+
init_points : int
215+
Number of randomly sampled points during each iteration.
216+
n_iter : int
217+
Maximum number of points using Gaussian process sampling during each iteration.
218+
min_improvement : float
219+
Minimal relative improvement. If improvements are below this value
220+
:py:attr:`min_improvement_count` times, the iteration is stopped.
221+
min_improvement_count : int
222+
Number of times the :py:attr:`min_improvement` must be undercut to stop the
223+
iteration.
224+
kappa : float
225+
Parameter controlling exploration of the upper-confidence-bound acquisition
226+
function of the sampling algorithm. Lower values mean less exploration of the
227+
parameter space and more exploitation of local information. This value is
228+
reduced throughout one iteration, reaching :py:attr:`kappa_min` at the
229+
last iteration step.
230+
kappa_min : float
231+
Minimal value of :py:attr:`kappa` after :py:attr:`n_iter` steps.
232+
max_iterations : int
233+
Maximum number of iterations before optimization is stopped, irrespective of
234+
convergence.
235+
utility_func_kwargs
236+
Further keyword arguments to the ``bayes_opt.UtilityFunction``.
237+
"""
238+
239+
# Init attributes
240+
init_points: int = 0
241+
n_iter: int = 0
242+
min_improvement: float = 1e-3
243+
min_improvement_count: int = 2
244+
kappa: float = 2.576
245+
kappa_min: float = 0.1
246+
max_iterations: int = 10
247+
utility_func_kwargs: dict[str, Union[int, float, str]] = field(default_factory=dict)
248+
249+
# Other attributes
250+
kappa_decay: float = field(init=False, default=0.96)
251+
steps: int = field(init=False, default=0)
252+
iterations: int = field(init=False, default=0)
253+
_improvements: deque[Improvement] = field(init=False, default_factory=deque)
254+
_last_it_improved: int = 0
255+
_last_it_end: int = 0
256+
257+
def __post_init__(self):
258+
"""Set the decay factor for :py:attr:`kappa`."""
259+
self.kappa_decay = np.exp(
260+
(np.log(self.kappa_min) - np.log(self.kappa)) / self.n_iter
261+
)
262+
263+
@classmethod
264+
def from_input(cls, inp: Input, sampling_base: float = 4, **kwargs):
265+
"""Create a controller from a calibration input
266+
267+
This uses the number of parameters to determine the appropriate values for
268+
:py:attr:`init_points` and :py:attr:`n_iter`. Both values are set to
269+
:math:`b^N`, where :math:`b` is the ``sampling_base`` parameter and :math:`N`
270+
is the number of estimated parameters.
271+
272+
Parameters
273+
----------
274+
inp : Input
275+
Input to the calibration
276+
sampling_base : float, optional
277+
Base for determining the sample size. Increase this for denser sampling.
278+
Defaults to 4.
279+
kwargs
280+
Keyword argument for the default constructor.
281+
"""
282+
num_params = len(inp.bounds)
283+
init_points = round(sampling_base**num_params)
284+
n_iter = round(sampling_base**num_params)
285+
return cls(init_points=init_points, n_iter=n_iter, **kwargs)
286+
287+
@property
288+
def _previous_max(self):
289+
"""Return the maximum target value observed"""
290+
if not self._improvements:
291+
return -np.inf
292+
return self._improvements[-1].target
293+
294+
def is_converged(self) -> bool:
295+
"""Check if convergence criteria are met"""
296+
return True
297+
298+
def optimizer_params(self) -> dict[str, Union[int, float, str, UtilityFunction]]:
299+
"""Return parameters for the optimizer"""
300+
return {
301+
"init_points": self.init_points,
302+
"n_iter": self.n_iter,
303+
"acquisition_function": UtilityFunction(
304+
kappa=self.kappa,
305+
kappa_decay=self.kappa_decay,
306+
**self.utility_func_kwargs,
307+
),
308+
}
309+
310+
def _is_random_step(self):
311+
"""Return true if we sample randomly instead of Bayesian"""
312+
return (self._last_it_end + self.steps) < self.init_points
313+
314+
def _append_improvement(self, target):
315+
"""Append a new improvement to the deque"""
316+
impr = np.inf
317+
if self._improvements:
318+
impr = (self._improvements[-1].target / target) - 1
319+
320+
self._improvements.append(
321+
Improvement(
322+
sample=self.steps,
323+
iteration=self.iterations,
324+
target=target,
325+
improvement=impr,
326+
random=self._is_random_step(),
327+
)
328+
)
329+
330+
def _is_new_max(self, instance):
331+
"""Determine if a guessed value is the new maximum"""
332+
if instance.max is None:
333+
# During constrained optimization, there might not be a maximum
334+
# value since the optimizer might've not encountered any points
335+
# that fulfill the constraints.
336+
return False
337+
338+
if instance.max["target"] > self._previous_max:
339+
return True
340+
return False
341+
342+
def _maybe_stop_early(self, instance):
343+
"""Throw if we want to stop this iteration early"""
344+
# Create sequence of last improvements
345+
last_improvements = [
346+
self._improvements[-idx]
347+
for idx in np.arange(
348+
min(self.min_improvement_count, len(self._improvements))
349+
)
350+
+ 1
351+
]
352+
if (
353+
# Same iteration
354+
np.unique([impr.iteration for impr in last_improvements]).size == 1
355+
# Not random
356+
and not any(impr.random for impr in last_improvements)
357+
# Less than min improvement
358+
and all(
359+
impr.improvement < self.min_improvement for impr in last_improvements
360+
)
361+
):
362+
LOGGER.info("Minimal improvement. Stop iteration.")
363+
instance.dispatch(Events.OPTIMIZATION_END)
364+
raise StopEarly()
365+
366+
def update(self, event, instance):
367+
"""Update the step tracker of this instance.
368+
369+
For step events, check if the latest guess is the new maximum. Also check if the
370+
iteration will be stopped early.
371+
372+
For end events, check if any improvement occured. If not, stop the optimization.
373+
374+
Parameters
375+
----------
376+
event : bayes_opt.Events
377+
The event descriptor
378+
instance : bayes_opt.BayesianOptimization
379+
Optimization instance triggering the event
380+
381+
Raises
382+
------
383+
StopEarly
384+
If the optimization only achieves minimal improvement, stop the iteration
385+
early with this exception.
386+
StopIteration
387+
If an entire iteration did not achieve improvement, stop the optimization.
388+
"""
389+
if event == Events.OPTIMIZATION_STEP:
390+
new_max = self._is_new_max(instance)
391+
if new_max:
392+
self._append_improvement(instance.max["target"])
393+
394+
self.steps += 1
395+
396+
# NOTE: Must call this after incrementing the step
397+
if new_max:
398+
self._maybe_stop_early(instance)
399+
400+
if event == Events.OPTIMIZATION_END:
401+
self.iterations += 1
402+
# Stop if we do not improve anymore
403+
if (
404+
self._last_it_end > 0
405+
and self._last_it_improved == self._improvements[-1].iteration
406+
):
407+
LOGGER.info("No improvement. Stop optimization.")
408+
raise StopIteration()
409+
410+
self._last_it_improved = self._improvements[-1].iteration
411+
self._last_it_end = self.steps
412+
413+
def improvements(self) -> pd.DataFrame:
414+
"""Return improvements as nice data
415+
416+
Returns
417+
-------
418+
improvements : pd.DataFrame
419+
"""
420+
return pd.DataFrame.from_records(
421+
data=[impr._asdict() for impr in self._improvements]
422+
).set_index("sample")
423+
424+
179425
@dataclass
180426
class BayesianOptimizer(Optimizer):
181427
"""An optimization using ``bayes_opt.BayesianOptimization``
@@ -205,13 +451,13 @@ class BayesianOptimizer(Optimizer):
205451
206452
Notes
207453
-----
208-
The following requirements apply to the parameters of :py:class:`Input` when using
209-
this class:
454+
The following requirements apply to the parameters of
455+
:py:class:`~climada.util.calibrate.base.Input` when using this class:
210456
211457
bounds
212-
Setting ``bounds`` in the ``Input`` is required because the optimizer first
213-
"explores" the bound parameter space and then narrows its search to regions
214-
where the cost function is low.
458+
Setting :py:attr:`~climada.util.calibrate.base.Input.bounds` is required
459+
because the optimizer first "explores" the bound parameter space and then
460+
narrows its search to regions where the cost function is low.
215461
constraints
216462
Must be an instance of ``scipy.minimize.LinearConstraint`` or
217463
``scipy.minimize.NonlinearConstraint``. See
@@ -253,7 +499,10 @@ def _target_func(self, data: pd.DataFrame, predicted: pd.DataFrame) -> Number:
253499
"""Invert the cost function because BayesianOptimization maximizes the target"""
254500
return -self.input.cost_func(data, predicted)
255501

256-
def run(self, **opt_kwargs) -> BayesianOptimizerOutput:
502+
def run(
503+
self,
504+
controller: BayesianOptimizerController,
505+
) -> BayesianOptimizerOutput:
257506
"""Execute the optimization
258507
259508
``BayesianOptimization`` *maximizes* a target function. Therefore, this class
@@ -262,12 +511,8 @@ def run(self, **opt_kwargs) -> BayesianOptimizerOutput:
262511
263512
Parameters
264513
----------
265-
init_points : int, optional
266-
Number of initial samples taken from the parameter space. Defaults to 10^N,
267-
where N is the number of parameters.
268-
n_iter : int, optional
269-
Number of iteration steps after initial sampling. Defaults to 10^N, where N
270-
is the number of parameters.
514+
controller : BayesianOptimizerController
515+
The controller instance used to set the optimization iteration parameters.
271516
opt_kwargs
272517
Further keyword arguments passed to ``BayesianOptimization.maximize``.
273518
@@ -277,13 +522,18 @@ def run(self, **opt_kwargs) -> BayesianOptimizerOutput:
277522
Optimization output. :py:attr:`BayesianOptimizerOutput.p_space` stores data
278523
on the sampled parameter space.
279524
"""
280-
# Retrieve parameters
281-
num_params = len(self.input.bounds)
282-
init_points = opt_kwargs.pop("init_points", 10**num_params)
283-
n_iter = opt_kwargs.pop("n_iter", 10**num_params)
284-
285-
# Run optimizer
286-
self.optimizer.maximize(init_points=init_points, n_iter=n_iter, **opt_kwargs)
525+
for event in (Events.OPTIMIZATION_STEP, Events.OPTIMIZATION_END):
526+
self.optimizer.subscribe(event, controller)
527+
528+
while controller.iterations < controller.max_iterations:
529+
try:
530+
LOGGER.info(f"Optimization iteration: {controller.iterations}")
531+
self.optimizer.maximize(**controller.optimizer_params())
532+
except StopEarly:
533+
continue
534+
except StopIteration:
535+
# Exit the loop
536+
break
287537

288538
# Return output
289539
opt = self.optimizer.max

0 commit comments

Comments
 (0)