|
21 | 21 |
|
22 | 22 |
|
23 | 23 | class BaseLearner(ABC, BaseEstimator): |
24 | | - """ |
25 | | - This class is an abstract model of a general active learning algorithm. |
26 | | -
|
27 | | - Parameters |
28 | | - ---------- |
29 | | - estimator: scikit-learn estimator |
30 | | - The estimator to be used in the active learning loop. |
31 | | -
|
32 | | - query_strategy: function |
33 | | - Function providing the query strategy for the active learning |
34 | | - loop, for instance modAL.uncertainty.uncertainty_sampling. |
35 | | -
|
36 | | - X_training: None or numpy.ndarray of shape (n_samples, n_features) |
37 | | - Initial training samples, if available. |
38 | | -
|
39 | | - y_training: None or numpy.ndarray of shape (n_samples, ) |
40 | | - Initial training labels corresponding to initial training samples |
41 | | -
|
42 | | - bootstrap_init: boolean |
43 | | - If initial training data is available, bootstrapping can be done |
44 | | - during the first training. Useful when building Committee models |
45 | | - with bagging. |
46 | | -
|
47 | | - fit_kwargs: keyword arguments for the fit method |
48 | | -
|
49 | | - Attributes |
50 | | - ---------- |
51 | | - estimator: scikit-learn estimator |
52 | | - The estimator to be used in the active learning loop. |
53 | | -
|
54 | | - query_strategy: function |
55 | | - Function providing the query strategy for the active learning |
56 | | - loop, for instance modAL.query.max_uncertainty. |
57 | | -
|
58 | | - X_training: None numpy.ndarray of shape (n_samples, n_features) |
59 | | - If the model hasn't been fitted yet: None |
60 | | - If the model has been fitted already: numpy.ndarray containing the |
61 | | - samples which the model has been trained on |
62 | | -
|
63 | | - y_training: None or numpy.ndarray of shape (n_samples, ) |
64 | | - If the model hasn't been fitted yet: None |
65 | | - If the model has been fitted already: numpy.ndarray containing the |
66 | | - labels corresponding to _training_samples |
67 | | -
|
68 | | - Examples |
69 | | - -------- |
70 | | - >>> from sklearn.datasets import load_iris |
71 | | - >>> from sklearn.ensemble import RandomForestClassifier |
72 | | - >>> from modAL.models import ActiveLearner |
73 | | - >>> |
74 | | - >>> iris = load_iris() |
75 | | - >>> # give initial training examples |
76 | | - >>> X_training = iris['data'][[0, 50, 100]] |
77 | | - >>> y_training = iris['target'][[0, 50, 100]] |
78 | | - >>> |
79 | | - >>> # initialize active learner |
80 | | - >>> learner = ActiveLearner( |
81 | | - ... estimator=RandomForestClassifier(), |
82 | | - ... X_training=X_training, y_training=y_training |
83 | | - ... ) |
84 | | - >>> |
85 | | - >>> # querying for labels |
86 | | - >>> query_idx, query_sample = learner.query(iris['data']) |
87 | | - >>> |
88 | | - >>> # ...obtaining new labels from the Oracle... |
89 | | - >>> |
90 | | - >>> # teaching newly labelled examples |
91 | | - >>> learner.teach( |
92 | | - ... X=iris['data'][query_idx].reshape(1, -1), |
93 | | - ... y=iris['target'][query_idx].reshape(1, ) |
94 | | - ... ) |
95 | | - """ |
96 | | - |
97 | 24 | def __init__( |
98 | 25 | self, |
99 | 26 | estimator, # scikit-learner estimator object |
@@ -343,9 +270,9 @@ class ActiveLearner(BaseLearner): |
343 | 270 | Initial training samples, if available. |
344 | 271 |
|
345 | 272 | y_training: None or numpy.ndarray of shape (n_samples, ) |
346 | | - Initial training labels corresponding to initial training samples |
| 273 | + Initial training labels corresponding to initial training samples. |
347 | 274 |
|
348 | | - bootstrap_init: boolean- |
| 275 | + bootstrap_init: boolean |
349 | 276 | If initial training data is available, bootstrapping can be done |
350 | 277 | during the first training. Useful when building Committee models |
351 | 278 | with bagging. |
@@ -399,6 +326,7 @@ class ActiveLearner(BaseLearner): |
399 | 326 | ... y=iris['target'][query_idx].reshape(1, ) |
400 | 327 | ... ) |
401 | 328 | """ |
| 329 | + |
402 | 330 | def teach(self, X, y, bootstrap=False, only_new=False, **fit_kwargs): |
403 | 331 | """ |
404 | 332 | Adds X and y to the known training data and retrains the predictor |
@@ -432,6 +360,92 @@ def teach(self, X, y, bootstrap=False, only_new=False, **fit_kwargs): |
432 | 360 |
|
433 | 361 |
|
434 | 362 | class BayesianOptimizer(BaseLearner): |
| 363 | + """ |
| 364 | + This class is an abstract model of a Bayesian optimizer algorithm. |
| 365 | +
|
| 366 | + Parameters |
| 367 | + ---------- |
| 368 | + estimator: scikit-learn regressor |
| 369 | + The regressor to be used in the Bayesian optimization algorithm. |
| 370 | +
|
| 371 | + query_strategy: function |
| 372 | + Function providing the query strategy for the Bayesian optimization |
| 373 | + loop, for instance modAL.acquisition.max_PI. |
| 374 | +
|
| 375 | + X_training: None or numpy.ndarray of shape (n_samples, n_features) |
| 376 | + Initial training samples, if available. |
| 377 | +
|
| 378 | + y_training: None or numpy.ndarray of shape (n_samples, ) |
| 379 | + Initial values corresponding to initial training samples. |
| 380 | +
|
| 381 | + bootstrap_init: boolean |
| 382 | + If initial training data is available, bootstrapping can be done |
| 383 | + during the first training. |
| 384 | +
|
| 385 | + fit_kwargs: keyword arguments for the fit method |
| 386 | +
|
| 387 | + Attributes |
| 388 | + ---------- |
| 389 | + estimator: scikit-learn regressor |
| 390 | + The regressor to be used in the Bayesian optimization algorithm. |
| 391 | +
|
| 392 | + query_strategy: function |
| 393 | + Function providing the query strategy for the Bayesian optimization |
| 394 | + loop, for instance modAL.acquisition.max_PI. |
| 395 | +
|
| 396 | + X_training: None numpy.ndarray of shape (n_samples, n_features) |
| 397 | + If the model hasn't been fitted yet: None |
| 398 | + If the model has been fitted already: numpy.ndarray containing the |
| 399 | + samples which the model has been trained on |
| 400 | +
|
| 401 | + y_training: None or numpy.ndarray of shape (n_samples, ) |
| 402 | + If the model hasn't been fitted yet: None |
| 403 | + If the model has been fitted already: numpy.ndarray containing the |
| 404 | + labels corresponding to _training_samples |
| 405 | +
|
| 406 | + Examples |
| 407 | + -------- |
| 408 | + >>> import numpy as np |
| 409 | + >>> from functools import partial |
| 410 | + >>> from sklearn.gaussian_process import GaussianProcessRegressor |
| 411 | + >>> from sklearn.gaussian_process.kernels import Matern |
| 412 | + >>> from modAL.models import BayesianOptimizer |
| 413 | + >>> from modAL.acquisition import PI, EI, UCB, max_PI, max_EI, max_UCB |
| 414 | + >>> |
| 415 | + >>> # generating the data |
| 416 | + >>> X = np.linspace(0, 20, 1000).reshape(-1, 1) |
| 417 | + >>> y = np.sin(X)/2 - ((10 - X)**2)/50 + 2 |
| 418 | + >>> |
| 419 | + >>> # assembling initial training set |
| 420 | + >>> X_initial, y_initial = X[150].reshape(1, -1), y[150].reshape(1, -1) |
| 421 | + >>> |
| 422 | + >>> # defining the kernel for the Gaussian process |
| 423 | + >>> kernel = Matern(length_scale=1.0) |
| 424 | + >>> |
| 425 | + >>> tr = 0.1 |
| 426 | + >>> PI_tr = partial(PI, tradeoff=tr) |
| 427 | + >>> PI_tr.__name__ = 'PI, tradeoff = %1.1f' % tr |
| 428 | + >>> max_PI_tr = partial(max_PI, tradeoff=tr) |
| 429 | + >>> |
| 430 | + >>> acquisitions = zip( |
| 431 | + ... [PI_tr, EI, UCB], |
| 432 | + ... [max_PI_tr, max_EI, max_UCB], |
| 433 | + ... ) |
| 434 | + >>> |
| 435 | + >>> for acquisition, query_strategy in acquisitions: |
| 436 | + ... # initializing the optimizer |
| 437 | + ... optimizer = BayesianOptimizer( |
| 438 | + ... estimator=GaussianProcessRegressor(kernel=kernel), |
| 439 | + ... X_training=X_initial, y_training=y_initial, |
| 440 | + ... query_strategy=query_strategy |
| 441 | + ... ) |
| 442 | + ... |
| 443 | + ... for n_query in range(5): |
| 444 | + ... # query |
| 445 | + ... query_idx, query_inst = optimizer.query(X) |
| 446 | + ... optimizer.teach(X[query_idx].reshape(1, -1), y[query_idx].reshape(1, -1)) |
| 447 | + """ |
| 448 | + |
435 | 449 | def __init__(self, *args, **kwargs): |
436 | 450 | super(BayesianOptimizer, self).__init__(*args, **kwargs) |
437 | 451 | # setting the maximum value |
@@ -834,7 +848,6 @@ def vote_proba(self, X, **predict_proba_kwargs): |
834 | 848 | ------- |
835 | 849 | vote_proba: numpy.ndarray of shape (n_samples, n_learners, n_classes) |
836 | 850 | Probabilities of each class for each learner and each instance. |
837 | | -
|
838 | 851 | """ |
839 | 852 |
|
840 | 853 | # get dimensions |
|
0 commit comments