Skip to content

Commit fd0fe3f

Browse files
authored
Merge pull request #579 from automl/development
Create Release 0.4.1
2 parents 202918e + 8aae9d6 commit fd0fe3f

32 files changed

+962
-209
lines changed

.travis.yml

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@ matrix:
1616
- os: linux
1717
env: DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
1818
- os: linux
19-
env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
19+
env: DISTRIB="conda" PYTHON_VERSION="3.6" DOCPUSH="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
2020
- os: linux
2121
env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
22+
- os: linux
23+
env: DISTRIB="conda" PYTHON_VERSION="3.7" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
24+
- os: linux
25+
env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
26+
2227

2328
# Temporarily disabling OSX builds because thy take too long
2429
# Set language to generic to not break travis-ci
@@ -58,17 +63,29 @@ before_install:
5863
install:
5964
# Install general requirements the way setup.py suggests
6065
- pip install pep8 codecov
66+
# Temporarily pin the numpy version for travis-ci
67+
- pip install "numpy<=1.14.5"
6168
- cat requirements.txt | xargs -n 1 -L 1 pip install
6269
# Install openml dependency for metadata generation unittest
63-
- pip install xmltodict requests
64-
- pip install git+https://github.com/renatopp/liac-arff
70+
- pip install xmltodict requests liac-arff
6571
- pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps
6672
- mkdir ~/.openml
6773
- echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config
74+
- pip install flake8
6875
# Debug output to know all exact package versions!
6976
- pip freeze
7077
- python setup.py install
71-
78+
7279
script: bash ci_scripts/test.sh
73-
after_success: source ci_scripts/success.sh
80+
after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result"
7481

82+
deploy:
83+
provider: pages
84+
skip-cleanup: true
85+
github-token: $GITHUB_TOKEN # set in the settings page of my repository
86+
keep-hisotry: true
87+
commiter-from-gh: true
88+
on:
89+
all_branches: true
90+
condition: $doc_result = "success"
91+
local_dir: doc/$TRAVIS_BRANCH

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ auto-sklearn is an automated machine learning toolkit and a drop-in replacement
44

55
Find the documentation [here](http://automl.github.io/auto-sklearn/)
66

7-
Status for master branch:
7+
Status for master branch
88

99
[![Build Status](https://travis-ci.org/automl/auto-sklearn.svg?branch=master)](https://travis-ci.org/automl/auto-sklearn)
1010
[![Code Health](https://landscape.io/github/automl/auto-sklearn/master/landscape.png)](https://landscape.io/github/automl/auto-sklearn/master)

autosklearn/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Version information."""
22

33
# The following line *must* be the last in the module, exactly as formatted:
4-
__version__ = "0.4.0"
4+
__version__ = "0.4.1"

autosklearn/automl.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def __init__(self,
8282
disable_evaluator_output=False,
8383
get_smac_object_callback=None,
8484
smac_scenario_args=None,
85+
logging_config=None,
8586
):
8687
super(AutoML, self).__init__()
8788
self._backend = backend
@@ -110,6 +111,7 @@ def __init__(self,
110111
self._disable_evaluator_output = disable_evaluator_output
111112
self._get_smac_object_callback = get_smac_object_callback
112113
self._smac_scenario_args = smac_scenario_args
114+
self.logging_config = logging_config
113115

114116
self._datamanager = None
115117
self._dataset_name = None
@@ -235,7 +237,10 @@ def fit_on_datamanager(self, datamanager, metric):
235237

236238
def _get_logger(self, name):
237239
logger_name = 'AutoML(%d):%s' % (self._seed, name)
238-
setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
240+
setup_logger(os.path.join(self._backend.temporary_directory,
241+
'%s.log' % str(logger_name)),
242+
self.logging_config,
243+
)
239244
return get_logger(logger_name)
240245

241246
@staticmethod

autosklearn/ensemble_builder.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,8 @@ def main(self):
171171
while True:
172172

173173
#maximal number of iterations
174-
if (
175-
self.max_iterations is not None
176-
and self.max_iterations > 0
177-
and iteration >= self.max_iterations
178-
):
174+
if (self.max_iterations is not None
175+
and 0 < self.max_iterations <= iteration):
179176
self.logger.info("Terminate ensemble building because of max iterations: %d of %d",
180177
self.max_iterations,
181178
iteration)
@@ -300,7 +297,7 @@ def read_ensemble_preds(self):
300297
Y_TEST: None,
301298
# Lazy keys so far:
302299
# 0 - not loaded
303-
# 1 - loaded and ind memory
300+
# 1 - loaded and in memory
304301
# 2 - loaded but dropped again
305302
"loaded": 0
306303
}
@@ -372,14 +369,18 @@ def get_n_best_preds(self):
372369
],
373370
key=lambda x: x[1],
374371
)))
375-
# remove all that are at most as good as random, cannot assume a
376-
# minimum number here because all kinds of metric can be used
377-
sorted_keys = filter(lambda x: x[1] > 0.001, sorted_keys)
372+
# remove all that are at most as good as random
373+
# note: dummy model must have run_id=1 (there is not run_id=0)
374+
dummy_score = list(filter(lambda x: x[2] == 1, sorted_keys))[0]
375+
self.logger.debug("Use %f as dummy score" %
376+
dummy_score[1])
377+
sorted_keys = filter(lambda x: x[1] > dummy_score[1], sorted_keys)
378378
# remove Dummy Classifier
379379
sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys))
380380
if not sorted_keys:
381-
# no model left; try to use dummy classifier (num_run==0)
382-
self.logger.warning("No models better than random - using Dummy Classifier!")
381+
# no model left; try to use dummy score (num_run==0)
382+
self.logger.warning("No models better than random - "
383+
"using Dummy Score!")
383384
sorted_keys = [
384385
(k, v["ens_score"], v["num_run"]) for k, v in self.read_preds.items()
385386
if v["seed"] == self.seed and v["num_run"] == 1

autosklearn/estimators.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
55
from autosklearn.util.backend import create
6+
from sklearn.utils.multiclass import type_of_target
67

78

89
class AutoSklearnEstimator(BaseEstimator):
@@ -28,7 +29,9 @@ def __init__(self,
2829
shared_mode=False,
2930
disable_evaluator_output=False,
3031
get_smac_object_callback=None,
31-
smac_scenario_args=None):
32+
smac_scenario_args=None,
33+
logging_config=None,
34+
):
3235
"""
3336
Parameters
3437
----------
@@ -168,6 +171,11 @@ def __init__(self,
168171
This is an advanced feature. Use only if you are familiar with
169172
`SMAC <https://automl.github.io/SMAC3/stable/index.html>`_.
170173
174+
logging_config : dict, optional (None)
175+
dictionary object specifying the logger configuration. If None,
176+
the default logging.yaml file is used, which can be found in
177+
the directory ``util/logging.yaml`` relative to the installation.
178+
171179
Attributes
172180
----------
173181
@@ -199,6 +207,7 @@ def __init__(self,
199207
self.disable_evaluator_output = disable_evaluator_output
200208
self.get_smac_object_callback = get_smac_object_callback
201209
self.smac_scenario_args = smac_scenario_args
210+
self.logging_config = logging_config
202211

203212
self._automl = None
204213
super().__init__()
@@ -238,7 +247,8 @@ def build_automl(self):
238247
shared_mode=self.shared_mode,
239248
get_smac_object_callback=self.get_smac_object_callback,
240249
disable_evaluator_output=self.disable_evaluator_output,
241-
smac_scenario_args=self.smac_scenario_args
250+
smac_scenario_args=self.smac_scenario_args,
251+
logging_config=self.logging_config,
242252
)
243253

244254
return automl
@@ -456,6 +466,18 @@ def fit(self, X, y,
456466
self
457467
458468
"""
469+
# Before running anything else, first check that the
470+
# type of data is compatible with auto-sklearn. Legal target
471+
# types are: binary, multiclass, multilabel-indicator.
472+
target_type = type_of_target(y)
473+
if target_type in ['multiclass-multioutput',
474+
'continuous',
475+
'continuous-multioutput',
476+
'unknown',
477+
]:
478+
raise ValueError("classification with data of type %s is"
479+
" not supported" % target_type)
480+
459481
super().fit(
460482
X=X,
461483
y=y,
@@ -559,6 +581,18 @@ def fit(self, X, y,
559581
self
560582
561583
"""
584+
# Before running anything else, first check that the
585+
# type of data is compatible with auto-sklearn. Legal target
586+
# types are: continuous, binary, multiclass.
587+
target_type = type_of_target(y)
588+
if target_type in ['multiclass-multioutput',
589+
'multilabel-indicator',
590+
'continuous-multioutput',
591+
'unknown',
592+
]:
593+
raise ValueError("regression with data of type %s is not"
594+
" supported" % target_type)
595+
562596
# Fit is supposed to be idempotent!
563597
# But not if we use share_mode.
564598
super().fit(

autosklearn/metalearning/metalearning/clustering/gmeans.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def fit(self, X):
6969
break
7070

7171
# Refinement
72-
KMeans = sklearn.cluster.KMeans(n_clusters=1, n_init=1,
72+
KMeans = sklearn.cluster.KMeans(n_clusters=len(cluster_centers), n_init=1,
7373
init=np.array(cluster_centers),
7474
random_state=self.random_state)
7575
KMeans.fit(X)

autosklearn/pipeline/create_searchspace_util.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ def find_active_choices(matches, node, node_idx, dataset_properties, \
117117

118118
choices = []
119119
for c_idx, component in enumerate(available_components):
120-
slices = [slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
121-
for idx in range(len(matches.shape))]
120+
slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
121+
for idx in range(len(matches.shape)))
122122

123123
if np.sum(matches[slices]) > 0:
124124
choices.append(component)
@@ -200,10 +200,10 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
200200
for product in itertools.product(*num_node_choices):
201201
for node_idx, choice_idx in enumerate(product):
202202
node_idx += start_idx
203-
slices_ = [
203+
slices_ = tuple(
204204
slice(None) if idx != node_idx else
205205
slice(choice_idx, choice_idx + 1) for idx in
206-
range(len(matches.shape))]
206+
range(len(matches.shape)))
207207

208208
if np.sum(matches[slices_]) == 0:
209209
skip_array[product] = 1
@@ -212,13 +212,11 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
212212
if skip_array[product]:
213213
continue
214214

215-
slices = []
216-
for idx in range(len(matches.shape)):
217-
if idx not in indices:
218-
slices.append(slice(None))
219-
else:
220-
slices.append(slice(product[idx - start_idx],
221-
product[idx - start_idx] + 1))
215+
slices = tuple(
216+
slice(None) if idx not in indices else
217+
slice(product[idx - start_idx],
218+
product[idx - start_idx] + 1) for idx in
219+
range(len(matches.shape)))
222220

223221
# This prints the affected nodes
224222
# print [node_choice_names[i][product[i]]

autosklearn/util/backend.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,17 @@ def _prepare_directories(self, temporary_directory, output_directory):
7171

7272
self.__temporary_directory = temporary_directory \
7373
if temporary_directory \
74-
else '/tmp/autosklearn_tmp_%d_%d' % (pid, random_number)
74+
else os.path.join(
75+
tempfile.gettempdir(),
76+
'autosklearn_tmp_%d_%d' % (pid, random_number)
77+
)
7578

7679
self.__output_directory = output_directory \
7780
if output_directory \
78-
else '/tmp/autosklearn_output_%d_%d' % (pid, random_number)
81+
else os.path.join(
82+
tempfile.gettempdir(),
83+
'autosklearn_output_%d_%d' % (pid, random_number)
84+
)
7985

8086
def create_directories(self):
8187
if self.shared_mode:
@@ -401,9 +407,10 @@ def save_ensemble(self, ensemble, idx, seed):
401407
except Exception:
402408
pass
403409

404-
filepath = os.path.join(self.get_ensemble_dir(),
405-
'%s.%s.ensemble' % (str(seed),
406-
str(idx)))
410+
filepath = os.path.join(
411+
self.get_ensemble_dir(),
412+
'%s.%s.ensemble' % (str(seed), str(idx).zfill(10))
413+
)
407414
with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname(
408415
filepath), delete=False) as fh:
409416
pickle.dump(ensemble, fh)
@@ -460,4 +467,4 @@ def write_txt_file(self, filepath, data, name):
460467
self.logger.debug('Created %s file %s' % (name, filepath))
461468
else:
462469
self.logger.debug('%s file already present %s' %
463-
(name, filepath))
470+
(name, filepath))

autosklearn/util/logging_.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,23 @@
77
import yaml
88

99

10-
def setup_logger(output_file=None):
11-
with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'),
12-
'r') as fh:
13-
config = yaml.load(fh)
14-
if output_file is not None:
15-
config['handlers']['file_handler']['filename'] = output_file
16-
logging.config.dictConfig(config)
10+
def setup_logger(output_file=None, logging_config=None):
11+
# logging_config must be a dictionary object specifying the configuration
12+
# for the loggers to be used in auto-sklearn.
13+
if logging_config is not None:
14+
if output_file is not None:
15+
logging_config['handlers']['file_handler']['filename'] = output_file
16+
logging.config.dictConfig(logging_config)
17+
else:
18+
with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'),
19+
'r') as fh:
20+
logging_config = yaml.safe_load(fh)
21+
if output_file is not None:
22+
logging_config['handlers']['file_handler']['filename'] = output_file
23+
logging.config.dictConfig(logging_config)
1724

1825

1926
def _create_logger(name):
20-
logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
21-
'message)s', datefmt='%H:%M:%S')
2227
return logging.getLogger(name)
2328

2429

0 commit comments

Comments
 (0)