Skip to content

Commit babb541

Browse files
committed
modAL.models.BayesianOptimizer docstrings added
1 parent 04f741f commit babb541

File tree

1 file changed

+89
-76
lines changed

1 file changed

+89
-76
lines changed

modAL/models.py

Lines changed: 89 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -21,79 +21,6 @@
2121

2222

2323
class BaseLearner(ABC, BaseEstimator):
24-
"""
25-
This class is an abstract model of a general active learning algorithm.
26-
27-
Parameters
28-
----------
29-
estimator: scikit-learn estimator
30-
The estimator to be used in the active learning loop.
31-
32-
query_strategy: function
33-
Function providing the query strategy for the active learning
34-
loop, for instance modAL.uncertainty.uncertainty_sampling.
35-
36-
X_training: None or numpy.ndarray of shape (n_samples, n_features)
37-
Initial training samples, if available.
38-
39-
y_training: None or numpy.ndarray of shape (n_samples, )
40-
Initial training labels corresponding to initial training samples
41-
42-
bootstrap_init: boolean
43-
If initial training data is available, bootstrapping can be done
44-
during the first training. Useful when building Committee models
45-
with bagging.
46-
47-
fit_kwargs: keyword arguments for the fit method
48-
49-
Attributes
50-
----------
51-
estimator: scikit-learn estimator
52-
The estimator to be used in the active learning loop.
53-
54-
query_strategy: function
55-
Function providing the query strategy for the active learning
56-
loop, for instance modAL.query.max_uncertainty.
57-
58-
X_training: None numpy.ndarray of shape (n_samples, n_features)
59-
If the model hasn't been fitted yet: None
60-
If the model has been fitted already: numpy.ndarray containing the
61-
samples which the model has been trained on
62-
63-
y_training: None or numpy.ndarray of shape (n_samples, )
64-
If the model hasn't been fitted yet: None
65-
If the model has been fitted already: numpy.ndarray containing the
66-
labels corresponding to _training_samples
67-
68-
Examples
69-
--------
70-
>>> from sklearn.datasets import load_iris
71-
>>> from sklearn.ensemble import RandomForestClassifier
72-
>>> from modAL.models import ActiveLearner
73-
>>>
74-
>>> iris = load_iris()
75-
>>> # give initial training examples
76-
>>> X_training = iris['data'][[0, 50, 100]]
77-
>>> y_training = iris['target'][[0, 50, 100]]
78-
>>>
79-
>>> # initialize active learner
80-
>>> learner = ActiveLearner(
81-
... estimator=RandomForestClassifier(),
82-
... X_training=X_training, y_training=y_training
83-
... )
84-
>>>
85-
>>> # querying for labels
86-
>>> query_idx, query_sample = learner.query(iris['data'])
87-
>>>
88-
>>> # ...obtaining new labels from the Oracle...
89-
>>>
90-
>>> # teaching newly labelled examples
91-
>>> learner.teach(
92-
... X=iris['data'][query_idx].reshape(1, -1),
93-
... y=iris['target'][query_idx].reshape(1, )
94-
... )
95-
"""
96-
9724
def __init__(
9825
self,
9926
estimator, # scikit-learner estimator object
@@ -343,9 +270,9 @@ class ActiveLearner(BaseLearner):
343270
Initial training samples, if available.
344271
345272
y_training: None or numpy.ndarray of shape (n_samples, )
346-
Initial training labels corresponding to initial training samples
273+
Initial training labels corresponding to initial training samples.
347274
348-
bootstrap_init: boolean-
275+
bootstrap_init: boolean
349276
If initial training data is available, bootstrapping can be done
350277
during the first training. Useful when building Committee models
351278
with bagging.
@@ -399,6 +326,7 @@ class ActiveLearner(BaseLearner):
399326
... y=iris['target'][query_idx].reshape(1, )
400327
... )
401328
"""
329+
402330
def teach(self, X, y, bootstrap=False, only_new=False, **fit_kwargs):
403331
"""
404332
Adds X and y to the known training data and retrains the predictor
@@ -432,6 +360,92 @@ def teach(self, X, y, bootstrap=False, only_new=False, **fit_kwargs):
432360

433361

434362
class BayesianOptimizer(BaseLearner):
363+
"""
364+
This class is an abstract model of a Bayesian optimizer algorithm.
365+
366+
Parameters
367+
----------
368+
estimator: scikit-learn regressor
369+
The regressor to be used in the Bayesian optimization algorithm.
370+
371+
query_strategy: function
372+
Function providing the query strategy for the Bayesian optimization
373+
loop, for instance modAL.acquisition.max_PI.
374+
375+
X_training: None or numpy.ndarray of shape (n_samples, n_features)
376+
Initial training samples, if available.
377+
378+
y_training: None or numpy.ndarray of shape (n_samples, )
379+
Initial values corresponding to initial training samples.
380+
381+
bootstrap_init: boolean
382+
If initial training data is available, bootstrapping can be done
383+
during the first training.
384+
385+
fit_kwargs: keyword arguments for the fit method
386+
387+
Attributes
388+
----------
389+
estimator: scikit-learn regressor
390+
The regressor to be used in the Bayesian optimization algorithm.
391+
392+
query_strategy: function
393+
Function providing the query strategy for the Bayesian optimization
394+
loop, for instance modAL.acquisition.max_PI.
395+
396+
X_training: None numpy.ndarray of shape (n_samples, n_features)
397+
If the model hasn't been fitted yet: None
398+
If the model has been fitted already: numpy.ndarray containing the
399+
samples which the model has been trained on
400+
401+
y_training: None or numpy.ndarray of shape (n_samples, )
402+
If the model hasn't been fitted yet: None
403+
If the model has been fitted already: numpy.ndarray containing the
404+
labels corresponding to _training_samples
405+
406+
Examples
407+
--------
408+
>>> import numpy as np
409+
>>> from functools import partial
410+
>>> from sklearn.gaussian_process import GaussianProcessRegressor
411+
>>> from sklearn.gaussian_process.kernels import Matern
412+
>>> from modAL.models import BayesianOptimizer
413+
>>> from modAL.acquisition import PI, EI, UCB, max_PI, max_EI, max_UCB
414+
>>>
415+
>>> # generating the data
416+
>>> X = np.linspace(0, 20, 1000).reshape(-1, 1)
417+
>>> y = np.sin(X)/2 - ((10 - X)**2)/50 + 2
418+
>>>
419+
>>> # assembling initial training set
420+
>>> X_initial, y_initial = X[150].reshape(1, -1), y[150].reshape(1, -1)
421+
>>>
422+
>>> # defining the kernel for the Gaussian process
423+
>>> kernel = Matern(length_scale=1.0)
424+
>>>
425+
>>> tr = 0.1
426+
>>> PI_tr = partial(PI, tradeoff=tr)
427+
>>> PI_tr.__name__ = 'PI, tradeoff = %1.1f' % tr
428+
>>> max_PI_tr = partial(max_PI, tradeoff=tr)
429+
>>>
430+
>>> acquisitions = zip(
431+
... [PI_tr, EI, UCB],
432+
... [max_PI_tr, max_EI, max_UCB],
433+
... )
434+
>>>
435+
>>> for acquisition, query_strategy in acquisitions:
436+
... # initializing the optimizer
437+
... optimizer = BayesianOptimizer(
438+
... estimator=GaussianProcessRegressor(kernel=kernel),
439+
... X_training=X_initial, y_training=y_initial,
440+
... query_strategy=query_strategy
441+
... )
442+
...
443+
... for n_query in range(5):
444+
... # query
445+
... query_idx, query_inst = optimizer.query(X)
446+
... optimizer.teach(X[query_idx].reshape(1, -1), y[query_idx].reshape(1, -1))
447+
"""
448+
435449
def __init__(self, *args, **kwargs):
436450
super(BayesianOptimizer, self).__init__(*args, **kwargs)
437451
# setting the maximum value
@@ -834,7 +848,6 @@ def vote_proba(self, X, **predict_proba_kwargs):
834848
-------
835849
vote_proba: numpy.ndarray of shape (n_samples, n_learners, n_classes)
836850
Probabilities of each class for each learner and each instance.
837-
838851
"""
839852

840853
# get dimensions

0 commit comments

Comments
 (0)