Skip to content

Commit 67fdbf5

Browse files
committed
Added target space and corresponding tests. Simplified optimization code
1 parent 2e860f6 commit 67fdbf5

File tree

5 files changed

+490
-168
lines changed

5 files changed

+490
-168
lines changed

bayes_opt/bayesian_optimization.py

Lines changed: 60 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -4,94 +4,9 @@
44
import numpy as np
55
from sklearn.gaussian_process import GaussianProcessRegressor
66
from sklearn.gaussian_process.kernels import Matern
7-
from .helpers import UtilityFunction, unique_rows, PrintLog, acq_max
8-
9-
10-
class TargetSpace(object):
11-
"""
12-
Holds the param-space coordinates (X) and target values (Y)
13-
"""
14-
def __init__(self, f, pbounds, random_state):
15-
16-
self.random_state = random_state
17-
18-
# Function to be evaluate
19-
self.f = f
20-
21-
# Initialize bounds
22-
self.keys = list(pbounds.keys())
23-
self.bounds = np.array(list(pbounds.values()), dtype=np.float)
24-
self.dim = len(pbounds)
25-
26-
# Place append new values in constant-time
27-
self.new_Xs = []
28-
self.new_Ys = []
29-
30-
# Place to consolidate and concatanate all values
31-
self.X_arr = None
32-
self.Y_arr = None
33-
34-
self.plog = PrintLog(self.keys)
35-
36-
@property
37-
def X(self):
38-
self._consolidate()
39-
return self.X_arr
40-
41-
@property
42-
def Y(self):
43-
self._consolidate()
44-
return self.Y_arr
45-
46-
def unique_XY(self):
47-
X = self.X
48-
Y = self.Y
49-
ur = unique_rows(X)
50-
return X[ur], Y[ur]
51-
52-
def _consolidate(self):
53-
"""
54-
If there are any new values appends them to the contiguous array
55-
"""
56-
if self.new_Xs:
57-
assert len(self.new_Xs) == len(self.new_Ys)
58-
if self.X is None and self.Y is None:
59-
self.X = np.empty((0, self.bounds.shape[0]))
60-
self.Y = np.empty(0)
61-
self.new_Xs = np.vstack([self.X_arr] + self.new_Xs)
62-
self.new_Ys = np.hstack([self.Y_arr] + self.new_Ys)
63-
64-
def set_bounds(self, new_bounds):
65-
# Loop through the all bounds and reset the min-max bound matrix
66-
for row, key in enumerate(self.keys):
67-
if key in new_bounds:
68-
self.bounds[row] = new_bounds[key]
69-
70-
def add_observation(self, x, y, pwarning=False):
71-
np.asarray(x).reshape((1, -1))
72-
self.new_Xs.append(x)
73-
self.new_Ys.append(y)
74-
if self.verbose:
75-
self.plog.print_step(x, y, pwarning)
76-
77-
def observe_point(self, x, pwarning=False):
78-
"""
79-
Evaulates a single point x, to obtain the value y and then records them
80-
as observations.
81-
"""
82-
x = np.asarray(x).reshape((1, -1))
83-
y = self.f(**dict(zip(self.keys, x)))
84-
self.add_observation(x, y, pwarning)
85-
86-
def random_points(self, num):
87-
l = [self.random_state.uniform(x[0], x[1], size=num)
88-
for x in self.bounds]
89-
return list(map(list, zip(*l)))
90-
91-
def max_point(self):
92-
return {'max_val': self.Y.max(),
93-
'max_params': dict(zip(self.keys,
94-
self.X[self.Y.argmax()]))}
7+
from .helpers import (UtilityFunction, PrintLog, unique_rows, acq_max,
8+
ensure_rng)
9+
from .target_space import TargetSpace
9510

9611

9712
class BayesianOptimization(object):
@@ -112,27 +27,11 @@ def __init__(self, f, pbounds, random_state=None, verbose=1):
11227
# Store the original dictionary
11328
self.pbounds = pbounds
11429

115-
if random_state is None:
116-
self.random_state = np.random.RandomState()
117-
elif isinstance(random_state, int):
118-
self.random_state = np.random.RandomState(random_state)
119-
else:
120-
self.random_state = random_state
121-
122-
# Get the name of the parameters
123-
self.keys = list(pbounds.keys())
124-
125-
# Find number of parameters
126-
self.dim = len(pbounds)
127-
128-
# Create an array with parameters bounds
129-
self.bounds = []
130-
for key in self.pbounds.keys():
131-
self.bounds.append(self.pbounds[key])
132-
self.bounds = np.asarray(self.bounds)
30+
self.random_state = ensure_rng(random_state)
13331

134-
# Some function to be optimized
135-
self.f = f
32+
# Data structure containing the function to be optimized, the bounds of
33+
# its domain, and a record of the evaluations we have done so far
34+
self.space = TargetSpace(f, pbounds, random_state)
13635

13736
# Initialization flag
13837
self.initialized = False
@@ -142,8 +41,6 @@ def __init__(self, f, pbounds, random_state=None, verbose=1):
14241
self.x_init = []
14342
self.y_init = []
14443

145-
self.space = TargetSpace(f, pbounds, random_state)
146-
14744
# Counter of iterations
14845
self.i = 0
14946

@@ -157,16 +54,15 @@ def __init__(self, f, pbounds, random_state=None, verbose=1):
15754
# Utility Function placeholder
15855
self.util = None
15956

160-
# PrintLog object
161-
self.plog = self.space.plog
162-
16357
# Output dictionary
16458
self.res = {}
16559
# Output dictionary
16660
self.res['max'] = {'max_val': None,
16761
'max_params': None}
16862
self.res['all'] = {'values': [], 'params': []}
16963

64+
self.plog = PrintLog(self.space.keys)
65+
17066
# Verbose
17167
self.verbose = verbose
17268

@@ -177,58 +73,53 @@ def init(self, init_points):
17773
17874
:param init_points:
17975
Number of random points to probe.
180-
181-
Example:
182-
pbounds = {'p1': (0, 1), 'p2': (1, 100)}
183-
bounds = np.array(list(pbounds.values()))
184-
init_points = 10
18576
"""
186-
# Generate random points
18777
# Concatenate new random points to possible existing
18878
# points from self.explore method.
18979
rand_points = self.space.random_points(init_points)
190-
self.init_points += rand_points
80+
self.init_points.extend(rand_points)
19181

82+
# Evaluate target function at all initialization points
83+
for x in self.init_points:
84+
y = self._observe_point(x)
85+
86+
# Add the points from `self.initialize` to the observations
19287
x_init = np.vstack(self.x_init)
19388
y_init = np.hstack(self.y_init)
19489
for x, y in zip(x_init, y_init):
19590
self.space.add_observation(x, y)
196-
197-
# Evaluate target function at all initialization
198-
# points (random + explore)
199-
for x in self.init_points:
200-
self.space.observe_point(x)
91+
if self.verbose:
92+
self.plog.print_step(x, y)
20193

20294
# Updates the flag
20395
self.initialized = True
20496

205-
def explore(self, points_dict):
206-
"""Method to explore user defined points
97+
def _observe_point(self, x):
98+
y = self.space.observe_point(x)
99+
if self.verbose:
100+
self.plog.print_step(x, y)
101+
return y
207102

208-
This is executed lazily.
103+
def explore(self, points_dict):
104+
"""Method to lazy explore user defined points.
209105
210106
:param points_dict:
211107
"""
108+
points = self.space._dict_to_points(points_dict)
109+
self.init_points = points
212110

213-
# Consistency check
214-
param_tup_lens = []
215-
216-
for key in self.keys:
217-
param_tup_lens.append(len(list(points_dict[key])))
218-
219-
if all([e == param_tup_lens[0] for e in param_tup_lens]):
220-
pass
221-
else:
222-
raise ValueError('The same number of initialization points '
223-
'must be entered for every parameter.')
111+
def explore_eager(self, points_dict):
112+
"""Method to eagerly explore more points
224113
225-
# Turn into list of lists
226-
all_points = []
227-
for key in self.keys:
228-
all_points.append(points_dict[key])
114+
:param points_dict:
115+
"""
116+
self.plog.reset_timer()
117+
if self.verbose:
118+
self.plog.print_header(initialization=True)
229119

230-
# Take transpose of list
231-
self.init_points = list(map(list, zip(*all_points)))
120+
points = self.space._dict_to_points(points_dict)
121+
for x in points:
122+
self._observe_point(x)
232123

233124
def initialize(self, points_dict):
234125
"""
@@ -252,7 +143,7 @@ def initialize(self, points_dict):
252143
self.y_init.extend(points_dict['target'])
253144
for i in range(len(points_dict['target'])):
254145
all_points = []
255-
for key in self.keys:
146+
for key in self.space.keys:
256147
all_points.append(points_dict[key][i])
257148
self.x_init.append(all_points)
258149

@@ -280,7 +171,7 @@ def initialize_df(self, points_df):
280171
self.y_init.append(points_df.loc[i, 'target'])
281172

282173
all_points = []
283-
for key in self.keys:
174+
for key in self.space.keys:
284175
all_points.append(points_df.loc[i, key])
285176

286177
self.x_init.append(all_points)
@@ -293,14 +184,9 @@ def set_bounds(self, new_bounds):
293184
A dictionary with the parameter name and its new bounds
294185
295186
"""
296-
self.space.set_bounds(new_bounds)
297-
298187
# Update the internal object stored dict
299188
self.pbounds.update(new_bounds)
300-
# # Loop through the all bounds and reset the min-max bound matrix
301-
# for row, key in enumerate(self.pbounds.keys()):
302-
# # Reset all entries, even if the same.
303-
# self.bounds[row] = self.pbounds[key]
189+
self.space.set_bounds(new_bounds)
304190

305191
def maximize(self,
306192
init_points=5,
@@ -333,6 +219,13 @@ def maximize(self,
333219
Returns
334220
-------
335221
:return: Nothing
222+
223+
Example:
224+
>>> xs = np.linspace(-2, 10, 10000)
225+
>>> f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2/10) + 1/ (xs**2 + 1)
226+
>>> bo = BayesianOptimization(f=lambda x: f[int(x)],
227+
>>> pbounds={"x": (0, len(f)-1)})
228+
>>> bo.maximize(init_points=2, n_iter=25, acq="ucb", kappa=1)
336229
"""
337230
# Reset timer
338231
self.plog.reset_timer()
@@ -352,13 +245,13 @@ def maximize(self,
352245
self.gp.set_params(**gp_params)
353246

354247
# Find unique rows of X to avoid GP from breaking
355-
self.gp.fit(*self.space.unique_XY())
248+
self.gp.fit(self.space.X, self.space.Y)
356249

357250
# Finding argmax of the acquisition function.
358251
x_max = acq_max(ac=self.util.utility,
359252
gp=self.gp,
360253
y_max=y_max,
361-
bounds=self.bounds,
254+
bounds=self.space.bounds,
362255
random_state=self.random_state)
363256

364257
# Print new header
@@ -374,15 +267,22 @@ def maximize(self,
374267
# Test if x_max is repeated, if it is, draw another one at random
375268
# If it is repeated, print a warning
376269
pwarning = False
377-
if np.any((self.space.X - x_max).sum(axis=1) == 0):
270+
while x_max in self.space:
378271
x_max = self.space.random_points(1)[0]
379272
pwarning = True
380273

381274
# Append most recently generated values to X and Y arrays
382-
self.space.observe_point(x_max, pwarning)
275+
y = self.space.observe_point(x_max)
276+
if self.verbose:
277+
self.plog.print_step(x_max, y, pwarning)
383278

384279
# Updating the GP.
385-
self.gp.fit(*self.space.unique_XY())
280+
self.gp.fit(self.space.X, self.space.Y)
281+
282+
# Update the best params seen so far
283+
self.res['max'] = self.space.max_point()
284+
self.res['all']['values'].append(y)
285+
self.res['all']['params'].append(dict(zip(self.space.keys, x_max)))
386286

387287
# Update maximum value to search for next probe point.
388288
if self.space.Y[-1] > y_max:
@@ -392,16 +292,12 @@ def maximize(self,
392292
x_max = acq_max(ac=self.util.utility,
393293
gp=self.gp,
394294
y_max=y_max,
395-
bounds=self.bounds,
295+
bounds=self.space.bounds,
396296
random_state=self.random_state)
397297

398298
# Keep track of total number of iterations
399299
self.i += 1
400300

401-
self.res['max'] = self.space.max_point()
402-
self.res['all']['values'].append(self.space.Y[-1])
403-
self.res['all']['params'].append(dict(zip(self.keys, self.space.X[-1])))
404-
405301
# Print a final report if verbose active.
406302
if self.verbose:
407303
self.plog.print_summary()
@@ -418,5 +314,5 @@ def points_to_csv(self, file_name):
418314
"""
419315

420316
points = np.hstack((self.space.X, np.expand_dims(self.space.Y, axis=1)))
421-
header = ', '.join(self.keys + ['target'])
317+
header = ', '.join(self.space.keys + ['target'])
422318
np.savetxt(file_name, points, header=header, delimiter=',')

bayes_opt/helpers.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ def unique_rows(a):
117117
118118
:return: mask of unique rows
119119
"""
120+
if a.size == 0:
121+
return np.empty((0,))
120122

121123
# Sort array and kep track of where things should go back to
122124
order = np.lexsort(a.T)
@@ -130,6 +132,21 @@ def unique_rows(a):
130132
return ui[reorder]
131133

132134

135+
def ensure_rng(random_state=None):
136+
"""
137+
Creates a random number generator based on an optional seed. This can be
138+
an integer or another random state for a seeded rng, or None for an
139+
unseeded rng.
140+
"""
141+
if random_state is None:
142+
random_state = np.random.RandomState()
143+
elif isinstance(random_state, int):
144+
random_state = np.random.RandomState(random_state)
145+
else:
146+
assert isinstance(random_state, np.random.RandomState)
147+
return random_state
148+
149+
133150
class BColours(object):
134151
BLUE = '\033[94m'
135152
CYAN = '\033[36m'

0 commit comments

Comments
 (0)