1818Calibration with Bayesian Optimization
1919"""
2020
21- from dataclasses import dataclass , InitVar
21+ from dataclasses import dataclass , InitVar , field
2222from typing import Mapping , Optional , Any , Union , List , Tuple
2323from numbers import Number
2424from itertools import combinations , repeat
25+ from collections import deque , namedtuple
26+ import logging
2527
2628import pandas as pd
29+ import numpy as np
2730import matplotlib .pyplot as plt
2831import matplotlib .axes as maxes
29- from bayes_opt import BayesianOptimization
32+ from bayes_opt import BayesianOptimization , Events , UtilityFunction
3033from bayes_opt .target_space import TargetSpace
3134
32- from .base import Output , Optimizer , OutputEvaluator
35+ from .base import Input , Output , Optimizer , OutputEvaluator
36+
37+
38+ LOGGER = logging .getLogger (__name__ )
3339
3440
3541@dataclass
@@ -176,6 +182,246 @@ def plot_single(x, y):
176182 return [plot_single (p_first , p_second ) for p_first , p_second in iterable ]
177183
178184
185+ Improvement = namedtuple (
186+ "Improvement" , ["iteration" , "sample" , "random" , "target" , "improvement" ]
187+ )
188+
189+
190+ class StopEarly (Exception ):
191+ """An exception for stopping an optimization iteration early"""
192+
193+ pass
194+
195+
196+ @dataclass (eq = False )
197+ class BayesianOptimizerController (object ):
198+ """A class for controlling the iterations of a :py:class:`BayesianOptimizer`.
199+
200+ Each iteration in the optimizer consists of a random sampling of the parameter space
201+ with :py:attr:`init_points` steps, followed by a Gaussian process sampling with
202+ :py:attr:`n_iter` steps. During the latter, the :py:attr:`kappa` parameter is
203+ reduced to reach :py:attr:`kappa_min` at the end of the iteration. The iteration is
204+ stopped prematurely if improvements of the buest guess are below
205+ :py:attr:`min_improvement` for :py:attr:`min_improvement_count` consecutive times.
206+ At the beginning of the next iteration, :py:attr:`kappa` is reset to its original
207+ value.
208+
209+ Optimization stops if :py:attr:`max_iterations` is reached or if an entire iteration
210+ saw now improvement.
211+
212+ Attributes
213+ ----------
214+ init_points : int
215+ Number of randomly sampled points during each iteration.
216+ n_iter : int
217+ Maximum number of points using Gaussian process sampling during each iteration.
218+ min_improvement : float
219+ Minimal relative improvement. If improvements are below this value
220+ :py:attr:`min_improvement_count` times, the iteration is stopped.
221+ min_improvement_count : int
222+ Number of times the :py:attr:`min_improvement` must be undercut to stop the
223+ iteration.
224+ kappa : float
225+ Parameter controlling exploration of the upper-confidence-bound acquisition
226+ function of the sampling algorithm. Lower values mean less exploration of the
227+ parameter space and more exploitation of local information. This value is
228+ reduced throughout one iteration, reaching :py:attr:`kappa_min` at the
229+ last iteration step.
230+ kappa_min : float
231+ Minimal value of :py:attr:`kappa` after :py:attr:`n_iter` steps.
232+ max_iterations : int
233+ Maximum number of iterations before optimization is stopped, irrespective of
234+ convergence.
235+ utility_func_kwargs
236+ Further keyword arguments to the ``bayes_opt.UtilityFunction``.
237+ """
238+
239+ # Init attributes
240+ init_points : int = 0
241+ n_iter : int = 0
242+ min_improvement : float = 1e-3
243+ min_improvement_count : int = 2
244+ kappa : float = 2.576
245+ kappa_min : float = 0.1
246+ max_iterations : int = 10
247+ utility_func_kwargs : dict [str , Union [int , float , str ]] = field (default_factory = dict )
248+
249+ # Other attributes
250+ kappa_decay : float = field (init = False , default = 0.96 )
251+ steps : int = field (init = False , default = 0 )
252+ iterations : int = field (init = False , default = 0 )
253+ _improvements : deque [Improvement ] = field (init = False , default_factory = deque )
254+ _last_it_improved : int = 0
255+ _last_it_end : int = 0
256+
257+ def __post_init__ (self ):
258+ """Set the decay factor for :py:attr:`kappa`."""
259+ self .kappa_decay = np .exp (
260+ (np .log (self .kappa_min ) - np .log (self .kappa )) / self .n_iter
261+ )
262+
263+ @classmethod
264+ def from_input (cls , inp : Input , sampling_base : float = 4 , ** kwargs ):
265+ """Create a controller from a calibration input
266+
267+ This uses the number of parameters to determine the appropriate values for
268+ :py:attr:`init_points` and :py:attr:`n_iter`. Both values are set to
269+ :math:`b^N`, where :math:`b` is the ``sampling_base`` parameter and :math:`N`
270+ is the number of estimated parameters.
271+
272+ Parameters
273+ ----------
274+ inp : Input
275+ Input to the calibration
276+ sampling_base : float, optional
277+ Base for determining the sample size. Increase this for denser sampling.
278+ Defaults to 4.
279+ kwargs
280+ Keyword argument for the default constructor.
281+ """
282+ num_params = len (inp .bounds )
283+ init_points = round (sampling_base ** num_params )
284+ n_iter = round (sampling_base ** num_params )
285+ return cls (init_points = init_points , n_iter = n_iter , ** kwargs )
286+
287+ @property
288+ def _previous_max (self ):
289+ """Return the maximum target value observed"""
290+ if not self ._improvements :
291+ return - np .inf
292+ return self ._improvements [- 1 ].target
293+
294+ def is_converged (self ) -> bool :
295+ """Check if convergence criteria are met"""
296+ return True
297+
298+ def optimizer_params (self ) -> dict [str , Union [int , float , str , UtilityFunction ]]:
299+ """Return parameters for the optimizer"""
300+ return {
301+ "init_points" : self .init_points ,
302+ "n_iter" : self .n_iter ,
303+ "acquisition_function" : UtilityFunction (
304+ kappa = self .kappa ,
305+ kappa_decay = self .kappa_decay ,
306+ ** self .utility_func_kwargs ,
307+ ),
308+ }
309+
310+ def _is_random_step (self ):
311+ """Return true if we sample randomly instead of Bayesian"""
312+ return (self ._last_it_end + self .steps ) < self .init_points
313+
314+ def _append_improvement (self , target ):
315+ """Append a new improvement to the deque"""
316+ impr = np .inf
317+ if self ._improvements :
318+ impr = (self ._improvements [- 1 ].target / target ) - 1
319+
320+ self ._improvements .append (
321+ Improvement (
322+ sample = self .steps ,
323+ iteration = self .iterations ,
324+ target = target ,
325+ improvement = impr ,
326+ random = self ._is_random_step (),
327+ )
328+ )
329+
330+ def _is_new_max (self , instance ):
331+ """Determine if a guessed value is the new maximum"""
332+ if instance .max is None :
333+ # During constrained optimization, there might not be a maximum
334+ # value since the optimizer might've not encountered any points
335+ # that fulfill the constraints.
336+ return False
337+
338+ if instance .max ["target" ] > self ._previous_max :
339+ return True
340+ return False
341+
342+ def _maybe_stop_early (self , instance ):
343+ """Throw if we want to stop this iteration early"""
344+ # Create sequence of last improvements
345+ last_improvements = [
346+ self ._improvements [- idx ]
347+ for idx in np .arange (
348+ min (self .min_improvement_count , len (self ._improvements ))
349+ )
350+ + 1
351+ ]
352+ if (
353+ # Same iteration
354+ np .unique ([impr .iteration for impr in last_improvements ]).size == 1
355+ # Not random
356+ and not any (impr .random for impr in last_improvements )
357+ # Less than min improvement
358+ and all (
359+ impr .improvement < self .min_improvement for impr in last_improvements
360+ )
361+ ):
362+ LOGGER .info ("Minimal improvement. Stop iteration." )
363+ instance .dispatch (Events .OPTIMIZATION_END )
364+ raise StopEarly ()
365+
366+ def update (self , event , instance ):
367+ """Update the step tracker of this instance.
368+
369+ For step events, check if the latest guess is the new maximum. Also check if the
370+ iteration will be stopped early.
371+
372+ For end events, check if any improvement occured. If not, stop the optimization.
373+
374+ Parameters
375+ ----------
376+ event : bayes_opt.Events
377+ The event descriptor
378+ instance : bayes_opt.BayesianOptimization
379+ Optimization instance triggering the event
380+
381+ Raises
382+ ------
383+ StopEarly
384+ If the optimization only achieves minimal improvement, stop the iteration
385+ early with this exception.
386+ StopIteration
387+ If an entire iteration did not achieve improvement, stop the optimization.
388+ """
389+ if event == Events .OPTIMIZATION_STEP :
390+ new_max = self ._is_new_max (instance )
391+ if new_max :
392+ self ._append_improvement (instance .max ["target" ])
393+
394+ self .steps += 1
395+
396+ # NOTE: Must call this after incrementing the step
397+ if new_max :
398+ self ._maybe_stop_early (instance )
399+
400+ if event == Events .OPTIMIZATION_END :
401+ self .iterations += 1
402+ # Stop if we do not improve anymore
403+ if (
404+ self ._last_it_end > 0
405+ and self ._last_it_improved == self ._improvements [- 1 ].iteration
406+ ):
407+ LOGGER .info ("No improvement. Stop optimization." )
408+ raise StopIteration ()
409+
410+ self ._last_it_improved = self ._improvements [- 1 ].iteration
411+ self ._last_it_end = self .steps
412+
413+ def improvements (self ) -> pd .DataFrame :
414+ """Return improvements as nice data
415+
416+ Returns
417+ -------
418+ improvements : pd.DataFrame
419+ """
420+ return pd .DataFrame .from_records (
421+ data = [impr ._asdict () for impr in self ._improvements ]
422+ ).set_index ("sample" )
423+
424+
179425@dataclass
180426class BayesianOptimizer (Optimizer ):
181427 """An optimization using ``bayes_opt.BayesianOptimization``
@@ -205,13 +451,13 @@ class BayesianOptimizer(Optimizer):
205451
206452 Notes
207453 -----
208- The following requirements apply to the parameters of :py:class:`Input` when using
209- this class:
454+ The following requirements apply to the parameters of
455+ :py:class:`~climada.util.calibrate.base.Input` when using this class:
210456
211457 bounds
212- Setting `` bounds`` in the ``Input`` is required because the optimizer first
213- "explores" the bound parameter space and then narrows its search to regions
214- where the cost function is low.
458+ Setting :py:attr:`~climada.util.calibrate.base.Input. bounds` is required
459+ because the optimizer first "explores" the bound parameter space and then
460+ narrows its search to regions where the cost function is low.
215461 constraints
216462 Must be an instance of ``scipy.minimize.LinearConstraint`` or
217463 ``scipy.minimize.NonlinearConstraint``. See
@@ -253,7 +499,10 @@ def _target_func(self, data: pd.DataFrame, predicted: pd.DataFrame) -> Number:
253499 """Invert the cost function because BayesianOptimization maximizes the target"""
254500 return - self .input .cost_func (data , predicted )
255501
256- def run (self , ** opt_kwargs ) -> BayesianOptimizerOutput :
502+ def run (
503+ self ,
504+ controller : BayesianOptimizerController ,
505+ ) -> BayesianOptimizerOutput :
257506 """Execute the optimization
258507
259508 ``BayesianOptimization`` *maximizes* a target function. Therefore, this class
@@ -262,12 +511,8 @@ def run(self, **opt_kwargs) -> BayesianOptimizerOutput:
262511
263512 Parameters
264513 ----------
265- init_points : int, optional
266- Number of initial samples taken from the parameter space. Defaults to 10^N,
267- where N is the number of parameters.
268- n_iter : int, optional
269- Number of iteration steps after initial sampling. Defaults to 10^N, where N
270- is the number of parameters.
514+ controller : BayesianOptimizerController
515+ The controller instance used to set the optimization iteration parameters.
271516 opt_kwargs
272517 Further keyword arguments passed to ``BayesianOptimization.maximize``.
273518
@@ -277,13 +522,18 @@ def run(self, **opt_kwargs) -> BayesianOptimizerOutput:
277522 Optimization output. :py:attr:`BayesianOptimizerOutput.p_space` stores data
278523 on the sampled parameter space.
279524 """
280- # Retrieve parameters
281- num_params = len (self .input .bounds )
282- init_points = opt_kwargs .pop ("init_points" , 10 ** num_params )
283- n_iter = opt_kwargs .pop ("n_iter" , 10 ** num_params )
284-
285- # Run optimizer
286- self .optimizer .maximize (init_points = init_points , n_iter = n_iter , ** opt_kwargs )
525+ for event in (Events .OPTIMIZATION_STEP , Events .OPTIMIZATION_END ):
526+ self .optimizer .subscribe (event , controller )
527+
528+ while controller .iterations < controller .max_iterations :
529+ try :
530+ LOGGER .info (f"Optimization iteration: { controller .iterations } " )
531+ self .optimizer .maximize (** controller .optimizer_params ())
532+ except StopEarly :
533+ continue
534+ except StopIteration :
535+ # Exit the loop
536+ break
287537
288538 # Return output
289539 opt = self .optimizer .max
0 commit comments