Skip to content

Commit 278f88a

Browse files
authored
Merge pull request #510 from ahn1340/extend
Example extending auto-sklearn
2 parents 3cf42b5 + 9b652d5 commit 278f88a

File tree

3 files changed

+354
-0
lines changed

3 files changed

+354
-0
lines changed
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
"""
2+
====================================================================
3+
Extending Auto-Sklearn with Classification Component
4+
====================================================================
5+
6+
The following example demonstrates how to create a new classification
7+
component for using in auto-sklearn.
8+
"""
9+
10+
from ConfigSpace.configuration_space import ConfigurationSpace
11+
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
12+
UniformIntegerHyperparameter, UniformFloatHyperparameter
13+
14+
import sklearn.metrics
15+
import autosklearn.classification
16+
import autosklearn.pipeline.components.classification
17+
from autosklearn.pipeline.components.base \
18+
import AutoSklearnClassificationAlgorithm
19+
from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \
20+
PREDICTIONS
21+
22+
23+
# Create MLP classifier component for auto-sklearn.
24+
class MLPClassifier(AutoSklearnClassificationAlgorithm):
25+
def __init__(self,
26+
hidden_layer_depth,
27+
num_nodes_per_layer,
28+
activation,
29+
alpha,
30+
solver,
31+
random_state=None,
32+
):
33+
self.hidden_layer_depth = hidden_layer_depth
34+
self.num_nodes_per_layer = num_nodes_per_layer
35+
self.activation = activation
36+
self.alpha = alpha
37+
self.solver = solver
38+
self.random_state = random_state
39+
40+
def fit(self, X, y):
41+
self.num_nodes_per_layer = int(self.num_nodes_per_layer)
42+
self.hidden_layer_depth = int(self.hidden_layer_depth)
43+
self.alpha = float(self.alpha)
44+
45+
from sklearn.neural_network import MLPClassifier
46+
hidden_layer_sizes = tuple(self.num_nodes_per_layer \
47+
for i in range(self.hidden_layer_depth))
48+
49+
self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
50+
activation=self.activation,
51+
alpha=self.alpha,
52+
solver=self.solver,
53+
random_state=self.random_state,
54+
)
55+
self.estimator.fit(X, y)
56+
return self
57+
58+
def predict(self, X):
59+
if self.estimator is None:
60+
raise NotImplementedError()
61+
return self.estimator.predict(X)
62+
63+
def predict_proba(self, X):
64+
if self.estimator is None:
65+
raise NotImplementedError()
66+
return self.estimator.predict_proba(X)
67+
68+
@staticmethod
69+
def get_properties(dataset_properties=None):
70+
return {'shortname':'MLP Classifier',
71+
'name': 'MLP CLassifier',
72+
'handles_regression': False,
73+
'handles_classification': True,
74+
'handles_multiclass': True,
75+
'handles_multilabel': False,
76+
'is_deterministic': False,
77+
# Both input and output must be tuple(iterable)
78+
'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA],
79+
'output': [PREDICTIONS]
80+
}
81+
82+
@staticmethod
83+
def get_hyperparameter_search_space(dataset_properties=None):
84+
cs = ConfigurationSpace()
85+
hidden_layer_depth = UniformIntegerHyperparameter(
86+
name="hidden_layer_depth", lower=1, upper=3, default_value=1
87+
)
88+
num_nodes_per_layer = UniformIntegerHyperparameter(
89+
name="num_nodes_per_layer", lower=16, upper=216, default_value=32
90+
)
91+
activation = CategoricalHyperparameter(
92+
name="activation", choices=['identity', 'logistic', 'tanh', 'relu'],
93+
default_value='relu'
94+
)
95+
alpha = UniformFloatHyperparameter(
96+
name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
97+
)
98+
solver = CategoricalHyperparameter(
99+
name="solver", choices=['lbfgs', 'sgd', 'adam'], default_value='adam'
100+
)
101+
cs.add_hyperparameters([hidden_layer_depth,
102+
num_nodes_per_layer,
103+
activation,
104+
alpha,
105+
solver,
106+
])
107+
return cs
108+
109+
110+
# Add MLP classifier component to auto-sklearn.
111+
autosklearn.pipeline.components.classification.add_classifier(MLPClassifier)
112+
cs = MLPClassifier.get_hyperparameter_search_space()
113+
print(cs)
114+
115+
# Generate data.
116+
from sklearn.datasets import load_breast_cancer
117+
from sklearn.model_selection import train_test_split
118+
X, y = load_breast_cancer(return_X_y=True)
119+
X_train, X_test, y_train, y_test = train_test_split(X, y)
120+
121+
# Fit MLP classifier to the data.
122+
clf = autosklearn.classification.AutoSklearnClassifier(
123+
time_left_for_this_task=20,
124+
per_run_time_limit=10,
125+
include_estimators=['MLPClassifier'],
126+
)
127+
clf.fit(X_train, y_train)
128+
129+
# Print test accuracy and statistics.
130+
y_pred = clf.predict(X_test)
131+
print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
132+
print(clf.sprint_statistics())
133+
print(clf.show_models())
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""
2+
====================================================================
3+
Extending Auto-Sklearn with Preprocessor Component
4+
====================================================================
5+
6+
The following example demonstrates how to create a wrapper around the linear
7+
discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor
8+
in auto-sklearn.
9+
"""
10+
11+
from ConfigSpace.configuration_space import ConfigurationSpace
12+
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
13+
UniformIntegerHyperparameter, CategoricalHyperparameter
14+
15+
import sklearn.metrics
16+
import autosklearn.classification
17+
import autosklearn.metrics
18+
import autosklearn.pipeline.components.feature_preprocessing
19+
from autosklearn.pipeline.components.base \
20+
import AutoSklearnPreprocessingAlgorithm
21+
from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \
22+
UNSIGNED_DATA
23+
24+
25+
# Create LDA component for auto-sklearn.
26+
class LDA(AutoSklearnPreprocessingAlgorithm):
27+
def __init__(self, shrinkage, solver, n_components, tol, random_state=None):
28+
self.solver = solver
29+
self.shrinkage = shrinkage
30+
self.n_components = n_components
31+
self.tol = tol
32+
self.random_state = random_state
33+
self.preprocessor = None
34+
35+
def fit(self, X, y=None):
36+
self.shrinkage = float(self.shrinkage)
37+
self.n_components = int(self.n_components)
38+
self.tol = float(self.tol)
39+
40+
import sklearn.discriminant_analysis
41+
self.preprocessor = \
42+
sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
43+
shrinkage=self.shrinkage,
44+
solver=self.solver,
45+
n_components=self.n_components,
46+
tol=self.tol,
47+
)
48+
self.preprocessor.fit(X, y)
49+
return self
50+
51+
def transform(self, X):
52+
if self.preprocessor is None:
53+
raise NotImplementedError()
54+
return self.preprocessor.transform(X)
55+
56+
@staticmethod
57+
def get_properties(dataset_properties=None):
58+
return {'shortname': 'LDA',
59+
'name': 'Linear Discriminant Analysis',
60+
'handles_regression': False,
61+
'handles_classification': True,
62+
'handles_multiclass': False,
63+
'handles_multilabel': False,
64+
'is_deterministic': True,
65+
'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA),
66+
'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)}
67+
68+
@staticmethod
69+
def get_hyperparameter_search_space(dataset_properties=None):
70+
cs = ConfigurationSpace()
71+
solver = CategoricalHyperparameter(
72+
name="solver", choices=['svd','lsqr','eigen'], default_value='svd'
73+
)
74+
shrinkage = UniformFloatHyperparameter(
75+
name="shrinkage", lower=0.0, upper=1.0, default_value=0.5
76+
)
77+
n_components = UniformIntegerHyperparameter(
78+
name="n_components", lower=1, upper=29, default_value=10
79+
)
80+
tol = UniformFloatHyperparameter(
81+
name="tol", lower=0.0001, upper=1, default_value=0.0001
82+
)
83+
cs.add_hyperparameters([solver, shrinkage, n_components, tol])
84+
return cs
85+
86+
87+
# Add LDA component to auto-sklearn.
88+
autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA)
89+
90+
# Create dataset.
91+
from sklearn.datasets import load_breast_cancer
92+
from sklearn.model_selection import train_test_split
93+
X, y = load_breast_cancer(return_X_y=True)
94+
X_train, X_test, y_train, y_test = train_test_split(X, y)
95+
96+
# Configuration space.
97+
cs = LDA.get_hyperparameter_search_space()
98+
print(cs)
99+
100+
# Fit the model using LDA as preprocessor.
101+
clf = autosklearn.classification.AutoSklearnClassifier(
102+
time_left_for_this_task=30,
103+
include_preprocessors=['LDA'],
104+
)
105+
clf.fit(X_train, y_train)
106+
107+
# Print prediction score and statistics.
108+
y_pred = clf.predict(X_test)
109+
print("accracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
110+
print(clf.sprint_statistics())
111+
print(clf.show_models())
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""
2+
====================================================================
3+
Extending Auto-Sklearn with Regression Component
4+
====================================================================
5+
6+
The following example demonstrates how to create a new regression
7+
component for using in auto-sklearn.
8+
"""
9+
10+
from ConfigSpace.configuration_space import ConfigurationSpace
11+
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
12+
UniformIntegerHyperparameter, CategoricalHyperparameter
13+
14+
import sklearn.metrics
15+
import autosklearn.regression
16+
import autosklearn.pipeline.components.regression
17+
from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
18+
from autosklearn.pipeline.constants import SPARSE, DENSE, \
19+
SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
20+
21+
22+
# Implement kernel ridge regression component for auto-sklearn.
23+
class KernelRidgeRegression(AutoSklearnRegressionAlgorithm):
24+
def __init__(self, alpha, kernel, gamma, degree, random_state=None):
25+
self.alpha = alpha
26+
self.kernel = kernel
27+
self.gamma = gamma
28+
self.degree = degree
29+
self.random_state = random_state
30+
self.estimator = None
31+
32+
def fit(self, X, y):
33+
self.alpha = float(self.alpha)
34+
self.gamma = float(self.gamma)
35+
self.degree = int(self.degree)
36+
37+
import sklearn.kernel_ridge
38+
self.estimator = sklearn.kernel_ridge.KernelRidge(alpha=self.alpha,
39+
kernel=self.kernel,
40+
gamma=self.gamma,
41+
degree=self.degree,
42+
)
43+
self.estimator.fit(X, y)
44+
return self
45+
46+
def predict(self, X):
47+
if self.estimator is None:
48+
raise NotImplementedError
49+
return self.estimator.predict(X)
50+
51+
@staticmethod
52+
def get_properties(dataset_properties=None):
53+
return {'shortname': 'KRR',
54+
'name': 'Kernel Ridge Regression',
55+
'handles_regression': True,
56+
'handles_classification': False,
57+
'handles_multiclass': False,
58+
'handles_multilabel': False,
59+
'is_deterministic': True,
60+
'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA),
61+
'output': (PREDICTIONS,)}
62+
63+
@staticmethod
64+
def get_hyperparameter_search_space(dataset_properties=None):
65+
cs = ConfigurationSpace()
66+
alpha = UniformFloatHyperparameter(
67+
name='alpha', lower=10 ** -5, upper=1, log=True, default_value=0.1)
68+
kernel = CategoricalHyperparameter(
69+
name='kernel',
70+
choices=['linear',
71+
'rbf',
72+
'sigmoid',
73+
'polynomial',
74+
],
75+
default_value='linear'
76+
)
77+
gamma = UniformFloatHyperparameter(
78+
name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True
79+
)
80+
degree = UniformIntegerHyperparameter(
81+
name='degree', lower=2, upper=5, default_value=3
82+
)
83+
cs.add_hyperparameters([alpha, kernel, gamma, degree])
84+
return cs
85+
86+
87+
# Add KRR component to auto-sklearn.
88+
autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression)
89+
cs = KernelRidgeRegression.get_hyperparameter_search_space()
90+
print(cs)
91+
92+
# Generate data.
93+
from sklearn.datasets import load_diabetes
94+
from sklearn.model_selection import train_test_split
95+
X, y = load_diabetes(return_X_y=True)
96+
X_train, X_test, y_train, y_test = train_test_split(X, y)
97+
98+
# Fit the model using KRR.
99+
reg = autosklearn.regression.AutoSklearnRegressor(
100+
time_left_for_this_task=30,
101+
per_run_time_limit=10,
102+
include_estimators=['KernelRidgeRegression'],
103+
)
104+
reg.fit(X_train, y_train)
105+
106+
# Print prediction score and statistics.
107+
y_pred = reg.predict(X_test)
108+
print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test))
109+
print(reg.sprint_statistics())
110+
print(reg.show_models())

0 commit comments

Comments
 (0)