Skip to content

Commit dbc7170

Browse files
bitsbuffermfeurershubhmay-potdar-aera
authored
Issue#1100 Provide argument in AutoSklearnClassifier and AutoSklearnRegressor to pass callback function (#1140)
* Fix ASKL2 link in the docs * implementation for including callback function after each trial * Added Typing information to get_trials_callback in AutoSklearnEstimator init * added get_trials_callback in docstring, used tempfile in test cases * removed delete_output_folder_after_terminate while creating AutoSklearnClassifier instance from the test case * added get_trials_callback in test_check_askl2_same_arguments_as_askl, changed code formatting * removed unwanted imports Co-authored-by: Matthias Feurer <[email protected]> Co-authored-by: Shubhmay Potdar <[email protected]>
1 parent b2c5c3c commit dbc7170

File tree

6 files changed

+91
-5
lines changed

6 files changed

+91
-5
lines changed

autosklearn/automl.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def __init__(self,
139139
smac_scenario_args=None,
140140
logging_config=None,
141141
metric=None,
142-
scoring_functions=None
142+
scoring_functions=None,
143+
get_trials_callback=None
143144
):
144145
super(AutoML, self).__init__()
145146
self.configuration_space = None
@@ -181,6 +182,7 @@ def __init__(self,
181182
"'disable_evaluator_output' must be one "
182183
"of " + str(allowed_elements))
183184
self._get_smac_object_callback = get_smac_object_callback
185+
self._get_trials_callback = get_trials_callback
184186
self._smac_scenario_args = smac_scenario_args
185187
self.logging_config = logging_config
186188

@@ -758,6 +760,7 @@ def fit(
758760
port=self._logger_port,
759761
pynisher_context=self._multiprocessing_context,
760762
ensemble_callback=proc_ensemble,
763+
trials_callback=self._get_trials_callback
761764
)
762765

763766
try:

autosklearn/estimators.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(
5050
metric=None,
5151
scoring_functions: Optional[List[Scorer]] = None,
5252
load_models: bool = True,
53+
get_trials_callback=None
5354
):
5455
"""
5556
Parameters
@@ -223,6 +224,12 @@ def __init__(
223224
224225
load_models : bool, optional (True)
225226
Whether to load the models after fitting Auto-sklearn.
227+
228+
get_trials_callback: callable
229+
Callback function to create an object of subclass defined in module
230+
`smac.callbacks <https://automl.github.io/SMAC3/master/apidoc/smac.callbacks.html>`_.
231+
This is an advanced feature. Use only if you are familiar with
232+
`SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
226233
227234
Attributes
228235
----------
@@ -264,6 +271,7 @@ def __init__(
264271
self.metric = metric
265272
self.scoring_functions = scoring_functions
266273
self.load_models = load_models
274+
self.get_trials_callback = get_trials_callback
267275

268276
self.automl_ = None # type: Optional[AutoML]
269277

@@ -314,7 +322,8 @@ def build_automl(self):
314322
logging_config=self.logging_config,
315323
metadata_directory=self.metadata_directory,
316324
metric=self.metric,
317-
scoring_functions=self.scoring_functions
325+
scoring_functions=self.scoring_functions,
326+
get_trials_callback=self.get_trials_callback
318327
)
319328

320329
return automl

autosklearn/smbo.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from smac.scenario.scenario import Scenario
1919
from smac.tae.serial_runner import SerialRunner
2020
from smac.tae.dask_runner import DaskParallelRunner
21+
from smac.callbacks import IncorporateRunResultCallback
2122

2223

2324
import autosklearn.metalearning
@@ -241,6 +242,7 @@ def __init__(self, config_space, dataset_name,
241242
scoring_functions=None,
242243
pynisher_context='spawn',
243244
ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
245+
trials_callback: typing.Optional[IncorporateRunResultCallback] = None
244246
):
245247
super(AutoMLSMBO, self).__init__()
246248
# data related
@@ -288,6 +290,7 @@ def __init__(self, config_space, dataset_name,
288290
self.pynisher_context = pynisher_context
289291

290292
self.ensemble_callback = ensemble_callback
293+
self.trials_callback = trials_callback
291294

292295
dataset_name_ = "" if dataset_name is None else dataset_name
293296
logger_name = '%s(%d):%s' % (self.__class__.__name__, self.seed, ":" + dataset_name_)
@@ -499,6 +502,8 @@ def run_smbo(self):
499502

500503
if self.ensemble_callback is not None:
501504
smac.register_callback(self.ensemble_callback)
505+
if self.trials_callback is not None:
506+
smac.register_callback(self.trials_callback)
502507

503508
smac.optimize()
504509

doc/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ reference to the following paper:
9797
If you are using Auto-sklearn 2.0, please also cite
9898

9999

100-
Auto-Sklearn 2.0: The Next Generation, Feurer *et al.*, to appear (2020).
100+
`Auto-Sklearn 2.0: The Next Generation <https://arxiv.org/abs/2007.04074>`_, Feurer *et al.*, (arXiv, 2020).
101101

102102
Bibtex entry::
103103

@@ -107,7 +107,7 @@ If you are using Auto-sklearn 2.0, please also cite
107107
Falkner, Stefan and Lindauer, Marius and Hutter, Frank},
108108
booktitle = {Advances in Neural Information Processing Systems 28},
109109
year = {2020},
110-
journal = {arXiv:2006.???? [cs.LG]},
110+
journal = {arXiv:2007.04074 [cs.LG]},
111111
}
112112

113113
Contributing

test/test_automl/test_estimators.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,7 +700,8 @@ def test_check_askl2_same_arguments_as_askl():
700700
'get_smac_object_callback',
701701
'initial_configurations_via_metalearning',
702702
'resampling_strategy',
703-
'metadata_directory']
703+
'metadata_directory',
704+
'get_trials_callback']
704705
unexpected_args = set(extra_arguments) - set(expected_extra_args)
705706
assert len(unexpected_args) == 0, unexpected_args
706707

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import os
2+
import tempfile
3+
import unittest
4+
5+
import pandas as pd
6+
from smac.callbacks import IncorporateRunResultCallback
7+
from smac.optimizer.smbo import SMBO
8+
from smac.runhistory.runhistory import RunInfo, RunValue
9+
from smac.tae.base import StatusType
10+
11+
import autosklearn.pipeline.util as putil
12+
from autosklearn.classification import AutoSklearnClassifier
13+
14+
15+
class AutoMLTrialsCallBack(IncorporateRunResultCallback):
16+
17+
def __init__(self, fname):
18+
self.trials_num = 1
19+
self.fname = fname
20+
with open(fname, "w") as fp:
21+
fp.write("TrialNo, "
22+
"StartTime, "
23+
"EndTime, "
24+
"Status, "
25+
"TrainLoss, "
26+
"ValidLoss, "
27+
"TestLoss, "
28+
"Classifier")
29+
30+
def __call__(
31+
self, smbo: 'SMBO',
32+
run_info: RunInfo,
33+
result: RunValue,
34+
time_left: float,
35+
) -> None:
36+
train_loss, valid_loss, test_loss = None, None, None
37+
trial_start_time = result.starttime
38+
trial_end_time = result.endtime
39+
trial_status = result.status.name
40+
if trial_status == StatusType.SUCCESS.name:
41+
train_loss = result.additional_info.get('train_loss')
42+
valid_loss = result.cost
43+
test_loss = result.additional_info.get('test_loss')
44+
trial_classifier = run_info.config.get_dictionary()['classifier:__choice__']
45+
with open(self.fname, "a+") as fp:
46+
fp.write(f"\n {self.trials_num}, {trial_start_time}, {trial_end_time}, {trial_status}, "
47+
f"{train_loss}, {valid_loss}, {test_loss}, {trial_classifier}")
48+
self.trials_num += 1
49+
50+
51+
class VerifyTrialsCallBack(unittest.TestCase):
52+
53+
def test_trials_callback_execution(self):
54+
trials_summary_fname = os.path.join(tempfile.gettempdir(), "trials.csv")
55+
X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer')
56+
cls = AutoSklearnClassifier(time_left_for_this_task=30,
57+
initial_configurations_via_metalearning=0,
58+
per_run_time_limit=10,
59+
memory_limit=1024,
60+
delete_tmp_folder_after_terminate=False,
61+
n_jobs=1,
62+
include_estimators=["sgd"],
63+
include_preprocessors=["no_preprocessing"],
64+
get_trials_callback=AutoMLTrialsCallBack(trials_summary_fname)
65+
)
66+
cls.fit(X_train, Y_train, X_test, Y_test)
67+
trials = pd.read_csv(trials_summary_fname)
68+
assert trials.shape[0] > 0, f"Auto-Sklearn explored {trials.shape[0] - 1} trials"

0 commit comments

Comments
 (0)