|
96 | 96 | "name": "stdout", |
97 | 97 | "output_type": "stream", |
98 | 98 | "text": [ |
99 | | - "Next point to probe is: {'x': -0.331911981189704, 'y': 1.3219469606529486}\n" |
| 99 | + "Next point to probe is: {'x': np.float64(-0.331911981189704), 'y': np.float64(1.3219469606529486)}\n" |
100 | 100 | ] |
101 | 101 | } |
102 | 102 | ], |
|
167 | 167 | "name": "stdout", |
168 | 168 | "output_type": "stream", |
169 | 169 | "text": [ |
170 | | - "-18.503835804889988 {'x': 1.953072105336, 'y': -2.9609778030491904}\n", |
171 | | - "-1.0819533157901717 {'x': 0.22703572807626315, 'y': 2.4249238905875123}\n", |
172 | | - "-6.50219704520679 {'x': -1.9991881984624875, 'y': 2.872282989383577}\n", |
173 | | - "-5.747604713731052 {'x': -1.994467585936897, 'y': -0.664242699361514}\n", |
174 | | - "-2.9682431497650823 {'x': 1.9737252084307952, 'y': 1.269540259274744}\n", |
175 | | - "{'target': 0.7861845912690544, 'params': {'x': -0.331911981189704, 'y': 1.3219469606529486}}\n" |
| 170 | + "-18.707136686093495 {'x': np.float64(1.9261486197444082), 'y': np.float64(-2.9996360060323246)}\n", |
| 171 | + "0.750594563473972 {'x': np.float64(-0.3763326769822668), 'y': np.float64(1.328297354179696)}\n", |
| 172 | + "-6.559031075654336 {'x': np.float64(1.979183535803597), 'y': np.float64(2.9083667381450318)}\n", |
| 173 | + "-6.915481333972961 {'x': np.float64(-1.9686133847781613), 'y': np.float64(-1.009985740060171)}\n", |
| 174 | + "-6.8600832617014085 {'x': np.float64(-1.9763198875239296), 'y': np.float64(2.9885278383464513)}\n", |
| 175 | + "{'target': np.float64(0.7861845912690544), 'params': {'x': np.float64(-0.331911981189704), 'y': np.float64(1.3219469606529486)}}\n" |
176 | 176 | ] |
177 | 177 | } |
178 | 178 | ], |
|
190 | 190 | "cell_type": "markdown", |
191 | 191 | "metadata": {}, |
192 | 192 | "source": [ |
193 | | - "## 2. Dealing with discrete parameters\n", |
194 | | - "\n", |
195 | | - "**There is no principled way of dealing with discrete parameters using this package.**\n", |
196 | | - "\n", |
197 | | - "Ok, now that we got that out of the way, how do you do it? You're bound to be in a situation where some of your function's parameters may only take on discrete values. Unfortunately, the nature of bayesian optimization with gaussian processes doesn't allow for an easy/intuitive way of dealing with discrete parameters - but that doesn't mean it is impossible. The example below showcases a simple, yet reasonably adequate, way to dealing with discrete parameters." |
198 | | - ] |
199 | | - }, |
200 | | - { |
201 | | - "cell_type": "code", |
202 | | - "execution_count": 9, |
203 | | - "metadata": {}, |
204 | | - "outputs": [], |
205 | | - "source": [ |
206 | | - "def func_with_discrete_params(x, y, d):\n", |
207 | | - " # Simulate necessity of having d being discrete.\n", |
208 | | - " assert type(d) == int\n", |
209 | | - " \n", |
210 | | - " return ((x + y + d) // (1 + d)) / (1 + (x + y) ** 2)" |
211 | | - ] |
212 | | - }, |
213 | | - { |
214 | | - "cell_type": "code", |
215 | | - "execution_count": 10, |
216 | | - "metadata": {}, |
217 | | - "outputs": [], |
218 | | - "source": [ |
219 | | - "def function_to_be_optimized(x, y, w):\n", |
220 | | - " d = int(w)\n", |
221 | | - " return func_with_discrete_params(x, y, d)" |
222 | | - ] |
223 | | - }, |
224 | | - { |
225 | | - "cell_type": "code", |
226 | | - "execution_count": 11, |
227 | | - "metadata": {}, |
228 | | - "outputs": [], |
229 | | - "source": [ |
230 | | - "optimizer = BayesianOptimization(\n", |
231 | | - " f=function_to_be_optimized,\n", |
232 | | - " pbounds={'x': (-10, 10), 'y': (-10, 10), 'w': (0, 5)},\n", |
233 | | - " verbose=2,\n", |
234 | | - " random_state=1,\n", |
235 | | - ")" |
236 | | - ] |
237 | | - }, |
238 | | - { |
239 | | - "cell_type": "code", |
240 | | - "execution_count": 12, |
241 | | - "metadata": {}, |
242 | | - "outputs": [ |
243 | | - { |
244 | | - "name": "stdout", |
245 | | - "output_type": "stream", |
246 | | - "text": [ |
247 | | - "| iter | target | w | x | y |\n", |
248 | | - "-------------------------------------------------------------\n", |
249 | | - "| \u001b[30m1 | \u001b[30m-0.06199 | \u001b[30m2.085 | \u001b[30m4.406 | \u001b[30m-9.998 |\n", |
250 | | - "| \u001b[35m2 | \u001b[35m-0.0344 | \u001b[35m1.512 | \u001b[35m-7.065 | \u001b[35m-8.153 |\n", |
251 | | - "| \u001b[30m3 | \u001b[30m-0.2177 | \u001b[30m0.9313 | \u001b[30m-3.089 | \u001b[30m-2.065 |\n", |
252 | | - "| \u001b[35m4 | \u001b[35m0.1865 | \u001b[35m2.694 | \u001b[35m-1.616 | \u001b[35m3.704 |\n", |
253 | | - "| \u001b[30m5 | \u001b[30m-0.2187 | \u001b[30m1.022 | \u001b[30m7.562 | \u001b[30m-9.452 |\n", |
254 | | - "| \u001b[35m6 | \u001b[35m0.2488 | \u001b[35m2.684 | \u001b[35m-2.188 | \u001b[35m3.925 |\n" |
255 | | - ] |
256 | | - }, |
257 | | - { |
258 | | - "name": "stdout", |
259 | | - "output_type": "stream", |
260 | | - "text": [ |
261 | | - "| \u001b[35m7 | \u001b[35m0.2948 | \u001b[35m2.683 | \u001b[35m-2.534 | \u001b[35m4.08 |\n", |
262 | | - "| \u001b[35m8 | \u001b[35m0.3202 | \u001b[35m2.514 | \u001b[35m-3.83 | \u001b[35m5.287 |\n", |
263 | | - "| \u001b[30m9 | \u001b[30m0.0 | \u001b[30m4.057 | \u001b[30m-4.458 | \u001b[30m3.928 |\n", |
264 | | - "| \u001b[35m10 | \u001b[35m0.4802 | \u001b[35m2.296 | \u001b[35m-3.518 | \u001b[35m4.558 |\n", |
265 | | - "| \u001b[30m11 | \u001b[30m0.0 | \u001b[30m1.084 | \u001b[30m-3.737 | \u001b[30m4.472 |\n", |
266 | | - "| \u001b[30m12 | \u001b[30m0.0 | \u001b[30m2.649 | \u001b[30m-3.861 | \u001b[30m4.353 |\n", |
267 | | - "| \u001b[30m13 | \u001b[30m0.0 | \u001b[30m2.442 | \u001b[30m-3.658 | \u001b[30m4.599 |\n", |
268 | | - "| \u001b[30m14 | \u001b[30m-0.05801 | \u001b[30m1.935 | \u001b[30m-0.4758 | \u001b[30m-8.755 |\n", |
269 | | - "| \u001b[30m15 | \u001b[30m0.0 | \u001b[30m2.337 | \u001b[30m7.973 | \u001b[30m-8.96 |\n", |
270 | | - "| \u001b[30m16 | \u001b[30m0.07699 | \u001b[30m0.6926 | \u001b[30m5.59 | \u001b[30m6.854 |\n", |
271 | | - "| \u001b[30m17 | \u001b[30m-0.02025 | \u001b[30m3.534 | \u001b[30m-8.943 | \u001b[30m1.987 |\n", |
272 | | - "| \u001b[30m18 | \u001b[30m0.0 | \u001b[30m2.59 | \u001b[30m-7.339 | \u001b[30m5.941 |\n", |
273 | | - "| \u001b[30m19 | \u001b[30m0.0929 | \u001b[30m2.237 | \u001b[30m-4.535 | \u001b[30m9.065 |\n", |
274 | | - "| \u001b[30m20 | \u001b[30m0.1538 | \u001b[30m0.477 | \u001b[30m2.931 | \u001b[30m2.683 |\n", |
275 | | - "| \u001b[30m21 | \u001b[30m0.0 | \u001b[30m0.9999 | \u001b[30m4.397 | \u001b[30m-3.971 |\n", |
276 | | - "| \u001b[30m22 | \u001b[30m-0.01894 | \u001b[30m3.764 | \u001b[30m-7.043 | \u001b[30m-3.184 |\n", |
277 | | - "| \u001b[30m23 | \u001b[30m0.03683 | \u001b[30m1.851 | \u001b[30m5.783 | \u001b[30m7.966 |\n", |
278 | | - "| \u001b[30m24 | \u001b[30m-0.04359 | \u001b[30m1.615 | \u001b[30m-5.133 | \u001b[30m-6.556 |\n", |
279 | | - "| \u001b[30m25 | \u001b[30m0.02617 | \u001b[30m3.863 | \u001b[30m0.1052 | \u001b[30m8.579 |\n", |
280 | | - "| \u001b[30m26 | \u001b[30m-0.1071 | \u001b[30m0.8131 | \u001b[30m-0.7949 | \u001b[30m-9.292 |\n", |
281 | | - "| \u001b[30m27 | \u001b[30m0.0 | \u001b[30m4.969 | \u001b[30m8.778 | \u001b[30m-8.467 |\n", |
282 | | - "| \u001b[30m28 | \u001b[30m-0.1372 | \u001b[30m0.9475 | \u001b[30m-1.019 | \u001b[30m-7.018 |\n", |
283 | | - "| \u001b[30m29 | \u001b[30m0.08078 | \u001b[30m1.917 | \u001b[30m-0.2606 | \u001b[30m6.272 |\n", |
284 | | - "| \u001b[30m30 | \u001b[30m0.02003 | \u001b[30m4.278 | \u001b[30m3.8 | \u001b[30m8.398 |\n", |
285 | | - "=============================================================\n" |
286 | | - ] |
287 | | - } |
288 | | - ], |
289 | | - "source": [ |
290 | | - "optimizer.set_gp_params(alpha=1e-3)\n", |
291 | | - "optimizer.maximize()" |
292 | | - ] |
293 | | - }, |
294 | | - { |
295 | | - "cell_type": "markdown", |
296 | | - "metadata": {}, |
297 | | - "source": [ |
298 | | - "## 3. Tuning the underlying Gaussian Process\n", |
| 193 | + "## 2. Tuning the underlying Gaussian Process\n", |
299 | 194 | "\n", |
300 | 195 | "The bayesian optimization algorithm works by performing a gaussian process regression of the observed combination of parameters and their associated target values. The predicted parameter $\\rightarrow$ target hyper-surface (and its uncertainty) is then used to guide the next best point to probe." |
301 | 196 | ] |
|
304 | 199 | "cell_type": "markdown", |
305 | 200 | "metadata": {}, |
306 | 201 | "source": [ |
307 | | - "### 3.1 Passing parameter to the GP\n", |
| 202 | + "### 2.1 Passing parameter to the GP\n", |
308 | 203 | "\n", |
309 | 204 | "Depending on the problem it could be beneficial to change the default parameters of the underlying GP. You can use the `optimizer.set_gp_params` method to do this:" |
310 | 205 | ] |
311 | 206 | }, |
312 | 207 | { |
313 | 208 | "cell_type": "code", |
314 | | - "execution_count": 13, |
| 209 | + "execution_count": 9, |
315 | 210 | "metadata": {}, |
316 | 211 | "outputs": [ |
317 | 212 | { |
|
320 | 215 | "text": [ |
321 | 216 | "| iter | target | x | y |\n", |
322 | 217 | "-------------------------------------------------\n", |
323 | | - "| \u001b[30m1 | \u001b[30m0.7862 | \u001b[30m-0.3319 | \u001b[30m1.322 |\n", |
324 | | - "| \u001b[30m2 | \u001b[30m-18.19 | \u001b[30m1.957 | \u001b[30m-2.919 |\n", |
325 | | - "| \u001b[30m3 | \u001b[30m-12.05 | \u001b[30m-1.969 | \u001b[30m-2.029 |\n", |
326 | | - "| \u001b[30m4 | \u001b[30m-7.463 | \u001b[30m0.6032 | \u001b[30m-1.846 |\n", |
327 | | - "| \u001b[30m5 | \u001b[30m-1.093 | \u001b[30m1.444 | \u001b[30m1.096 |\n", |
328 | | - "| \u001b[35m6 | \u001b[35m0.8586 | \u001b[35m-0.2165 | \u001b[35m1.307 |\n", |
| 218 | + "| \u001b[39m1 \u001b[39m | \u001b[39m0.7862 \u001b[39m | \u001b[39m-0.331911\u001b[39m | \u001b[39m1.3219469\u001b[39m |\n", |
| 219 | + "| \u001b[39m2 \u001b[39m | \u001b[39m-18.34 \u001b[39m | \u001b[39m1.9021640\u001b[39m | \u001b[39m-2.965222\u001b[39m |\n", |
| 220 | + "| \u001b[35m3 \u001b[39m | \u001b[35m0.8731 \u001b[39m | \u001b[35m-0.298167\u001b[39m | \u001b[35m1.1948749\u001b[39m |\n", |
| 221 | + "| \u001b[39m4 \u001b[39m | \u001b[39m-6.497 \u001b[39m | \u001b[39m1.9876938\u001b[39m | \u001b[39m2.8830942\u001b[39m |\n", |
| 222 | + "| \u001b[39m5 \u001b[39m | \u001b[39m-4.286 \u001b[39m | \u001b[39m-1.995643\u001b[39m | \u001b[39m-0.141769\u001b[39m |\n", |
| 223 | + "| \u001b[39m6 \u001b[39m | \u001b[39m-6.781 \u001b[39m | \u001b[39m-1.953302\u001b[39m | \u001b[39m2.9913127\u001b[39m |\n", |
329 | 224 | "=================================================\n" |
330 | 225 | ] |
331 | 226 | } |
|
348 | 243 | "cell_type": "markdown", |
349 | 244 | "metadata": {}, |
350 | 245 | "source": [ |
351 | | - "### 3.2 Tuning the `alpha` parameter\n", |
| 246 | + "### 2.2 Tuning the `alpha` parameter\n", |
352 | 247 | "\n", |
353 | 248 | "When dealing with functions with discrete parameters,or particularly erratic target space it might be beneficial to increase the value of the `alpha` parameter. This parameters controls how much noise the GP can handle, so increase it whenever you think that extra flexibility is needed." |
354 | 249 | ] |
|
358 | 253 | "cell_type": "markdown", |
359 | 254 | "metadata": {}, |
360 | 255 | "source": [ |
361 | | - "### 3.3 Changing kernels\n", |
| 256 | + "### 2.3 Changing kernels\n", |
362 | 257 | "\n", |
363 | 258 | "By default this package uses the Matern 2.5 kernel. Depending on your use case you may find that tuning the GP kernel could be beneficial. You're on your own here since these are very specific solutions to very specific problems. You should start with the [scikit learn docs](https://scikit-learn.org/stable/modules/gaussian_process.html#kernels-for-gaussian-processes)." |
364 | 259 | ] |
|
376 | 271 | }, |
377 | 272 | { |
378 | 273 | "cell_type": "code", |
379 | | - "execution_count": 14, |
| 274 | + "execution_count": 10, |
380 | 275 | "metadata": {}, |
381 | 276 | "outputs": [], |
382 | 277 | "source": [ |
|
385 | 280 | }, |
386 | 281 | { |
387 | 282 | "cell_type": "code", |
388 | | - "execution_count": 15, |
| 283 | + "execution_count": 11, |
389 | 284 | "metadata": {}, |
390 | 285 | "outputs": [], |
391 | 286 | "source": [ |
|
399 | 294 | }, |
400 | 295 | { |
401 | 296 | "cell_type": "code", |
402 | | - "execution_count": 16, |
| 297 | + "execution_count": 12, |
403 | 298 | "metadata": {}, |
404 | 299 | "outputs": [], |
405 | 300 | "source": [ |
|
411 | 306 | }, |
412 | 307 | { |
413 | 308 | "cell_type": "code", |
414 | | - "execution_count": 17, |
| 309 | + "execution_count": 13, |
415 | 310 | "metadata": {}, |
416 | 311 | "outputs": [], |
417 | 312 | "source": [ |
|
433 | 328 | }, |
434 | 329 | { |
435 | 330 | "cell_type": "code", |
436 | | - "execution_count": 18, |
| 331 | + "execution_count": 14, |
437 | 332 | "metadata": {}, |
438 | 333 | "outputs": [], |
439 | 334 | "source": [ |
|
449 | 344 | }, |
450 | 345 | { |
451 | 346 | "cell_type": "code", |
452 | | - "execution_count": 19, |
| 347 | + "execution_count": 15, |
453 | 348 | "metadata": {}, |
454 | 349 | "outputs": [ |
455 | 350 | { |
|
476 | 371 | }, |
477 | 372 | { |
478 | 373 | "cell_type": "code", |
479 | | - "execution_count": 20, |
| 374 | + "execution_count": 16, |
480 | 375 | "metadata": {}, |
481 | 376 | "outputs": [ |
482 | 377 | { |
|
485 | 380 | "['optimization:start', 'optimization:step', 'optimization:end']" |
486 | 381 | ] |
487 | 382 | }, |
488 | | - "execution_count": 20, |
| 383 | + "execution_count": 16, |
489 | 384 | "metadata": {}, |
490 | 385 | "output_type": "execute_result" |
491 | 386 | } |
|
497 | 392 | ], |
498 | 393 | "metadata": { |
499 | 394 | "kernelspec": { |
500 | | - "display_name": "Python 3 (ipykernel)", |
| 395 | + "display_name": "bayesian-optimization-t6LLJ9me-py3.10", |
501 | 396 | "language": "python", |
502 | 397 | "name": "python3" |
503 | 398 | }, |
|
511 | 406 | "name": "python", |
512 | 407 | "nbconvert_exporter": "python", |
513 | 408 | "pygments_lexer": "ipython3", |
514 | | - "version": "3.1.undefined" |
| 409 | + "version": "3.10.13" |
515 | 410 | }, |
516 | 411 | "nbdime-conflicts": { |
517 | 412 | "local_diff": [ |
|
0 commit comments