Skip to content

Commit 5d1931a

Browse files
authored
Merge pull request #197 from automl/development
MAINT prepare release of version 0.1.1
2 parents bc873f6 + bf9593d commit 5d1931a

27 files changed

+913
-1008
lines changed

.travis.yml

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,49 +2,63 @@ language: python
22

33
sudo: false
44

5-
os:
6-
- linux
7-
- osx
8-
95
matrix:
106
allow_failures:
117
- os: osx
8+
9+
include:
10+
- os: linux
11+
env: DISTRIB="conda" PYTHON_VERSION="3.4" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
12+
- os: linux
13+
env: DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
14+
15+
# Set language to generic to not break travis-ci
16+
# https://github.com/travis-ci/travis-ci/issues/2312#issuecomment-195620855
17+
# so far, this issue is still open and there is no good solution
18+
# python will then be installed by anaconda
19+
- os: osx
20+
sudo: required
21+
language: generic
22+
env: DISTRIB="conda" PYTHON_VERSION="3.4" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
23+
- os: osx
24+
sudo: required
25+
language: generic
26+
env: DISTRIB="conda" PYTHON_VERSION="3.5" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
1227

1328
cache:
14-
apt: true
1529
# We use three different cache directory
1630
# to work around a Travis bug with multi-platform cache
1731
directories:
1832
- $HOME/.cache/pip
1933
- $HOME/download
2034
pip: true
2135

22-
# command to install dependencies
23-
addons:
24-
apt:
25-
sources:
26-
- ubuntu-toolchain-r-test
27-
packages:
28-
- gcc-4.8
29-
- g++-4.8
30-
- libatlas-dev
31-
- liblapack-dev
32-
- libatlas-base-dev
33-
- gfortran
36+
git:
37+
depth: 5
3438

3539
env:
3640
global:
3741
# Directory where tests are run from
3842
- TEST_DIR=/tmp/test_dir/
3943
- MODULE=autosklearn
40-
matrix:
41-
- DISTRIB="conda" PYTHON_VERSION="3.4"
42-
- DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true"
44+
45+
before_install:
46+
- wget $MINICONDA_URL -O miniconda.sh
47+
- bash miniconda.sh -b -p $HOME/miniconda
48+
- export PATH="$HOME/miniconda/bin:$PATH"
49+
- if [[ `which conda` ]]; then echo 'Conda installation successful'; else exit 1; fi
50+
- conda update --yes conda
51+
- conda create -n testenv --yes python=$PYTHON_VERSION pip wheel nose
52+
- source activate testenv
53+
- conda install --yes gcc
54+
- echo "Using GCC at "`which gcc`
55+
- export CC=`which gcc`
4356

4457
install:
45-
# Necessary for random forest
46-
- export CXX="g++-4.8" CC="gcc-4.8"
47-
- source ci_scripts/install.sh
58+
- pip install coverage pep8 python-coveralls
59+
- cat requirements.txt | xargs -n 1 -L 1 pip install
60+
- python setup.py install
61+
4862
script: bash ci_scripts/test.sh
4963
after_success: source ci_scripts/success.sh
5064

autosklearn/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,14 @@
11
# -*- encoding: utf-8 -*-
2-
__version__ = '0.1.0'
2+
from autosklearn.util import dependencies
3+
4+
__version__ = '0.1.1'
5+
6+
__MANDATORY_PACKAGES__ = '''
7+
scikit-learn==0.17.1
8+
smac==0.2.1
9+
lockfile>=0.10
10+
ConfigSpace>=0.2.1
11+
pyrfr==0.2.0
12+
'''
13+
14+
dependencies.verify_packages(__MANDATORY_PACKAGES__)

autosklearn/automl.py

Lines changed: 68 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@
1919
from autosklearn.data.data_manager_factory import get_data_manager
2020
from autosklearn.data.competition_data_manager import CompetitionDataManager
2121
from autosklearn.data.xy_data_manager import XYDataManager
22-
from autosklearn.evaluation import resampling, eval_with_limits
22+
from autosklearn.evaluation import resampling, ExecuteTaFuncWithQueue
2323
from autosklearn.evaluation import calculate_score
2424
from autosklearn.util import StopWatch, get_logger, setup_logger, \
2525
pipeline
2626
from autosklearn.ensemble_builder import EnsembleBuilder
2727
from autosklearn.smbo import AutoMLSMBO
28+
from autosklearn.util.hash import hash_numpy_array
2829

2930

3031
class AutoML(BaseEstimator):
@@ -71,7 +72,8 @@ def __init__(self,
7172
self._include_estimators = include_estimators
7273
self._include_preprocessors = include_preprocessors
7374
self._resampling_strategy = resampling_strategy
74-
self._resampling_strategy_arguments = resampling_strategy_arguments
75+
self._resampling_strategy_arguments = resampling_strategy_arguments \
76+
if resampling_strategy_arguments is not None else {}
7577
self._max_iter_smac = max_iter_smac
7678
#self.delete_tmp_folder_after_terminate = \
7779
# delete_tmp_folder_after_terminate
@@ -147,9 +149,7 @@ def fit(self, X, y,
147149
self._backend.context.create_directories()
148150

149151
if dataset_name is None:
150-
m = hashlib.md5()
151-
m.update(X.data)
152-
dataset_name = m.hexdigest()
152+
dataset_name = hash_numpy_array(X)
153153

154154
self._backend.save_start_time(self._seed)
155155
self._stopwatch = StopWatch()
@@ -232,37 +232,32 @@ def _print_load_time(basename, time_left_for_this_task,
232232
def _do_dummy_prediction(self, datamanager, num_run):
233233

234234
self._logger.info("Starting to create dummy predictions.")
235-
time_limit = int(self._time_for_task / 6.)
235+
# time_limit = int(self._time_for_task / 6.)
236236
memory_limit = int(self._ml_memory_limit)
237-
238-
_info = eval_with_limits(datamanager, self._backend, 1,
239-
self._seed, num_run,
240-
self._resampling_strategy,
241-
self._resampling_strategy_arguments,
242-
memory_limit, time_limit,
243-
logger=self._logger)
244-
if _info[4] == StatusType.SUCCESS:
245-
self._logger.info("Finished creating dummy prediction 1/2.")
246-
else:
247-
self._logger.error('Error creating dummy prediction 1/2:%s ',
248-
_info[3])
249-
250-
num_run += 1
251-
252-
_info = eval_with_limits(datamanager, self._backend, 2,
253-
self._seed, num_run,
254-
self._resampling_strategy,
255-
self._resampling_strategy_arguments,
256-
memory_limit, time_limit,
257-
logger=self._logger)
258-
if _info[4] == StatusType.SUCCESS:
259-
self._logger.info("Finished creating dummy prediction 2/2.")
237+
ta = ExecuteTaFuncWithQueue(backend=self._backend,
238+
autosklearn_seed=self._seed,
239+
resampling_strategy=self._resampling_strategy,
240+
initial_num_run=num_run,
241+
logger=self._logger,
242+
**self._resampling_strategy_arguments)
243+
244+
status, cost, runtime, additional_info = \
245+
ta.run(1, cutoff=self._time_for_task, memory_limit=memory_limit)
246+
if status == StatusType.SUCCESS:
247+
self._logger.info("Finished creating dummy predictions.")
260248
else:
261-
self._logger.error('Error creating dummy prediction 2/2 %s',
262-
_info[3])
249+
self._logger.error('Error creating dummy predictions:%s ',
250+
additional_info)
263251

264-
num_run += 1
265-
return num_run
252+
#status, cost, runtime, additional_info = \
253+
# ta.run(2, cutoff=time_limit, memory_limit=memory_limit)
254+
#if status == StatusType.SUCCESS:
255+
# self._logger.info("Finished creating dummy prediction 2/2.")
256+
#else:
257+
# self._logger.error('Error creating dummy prediction 2/2 %s',
258+
# additional_info)
259+
260+
return ta.num_run
266261

267262
def _fit(self, datamanager):
268263
# Reset learnt stuff
@@ -374,7 +369,7 @@ def _fit(self, datamanager):
374369
if time_left_for_smac <= 0:
375370
self._logger.warning("Not starting SMAC because there is no time "
376371
"left.")
377-
self._proc_smac = None
372+
_proc_smac = None
378373
else:
379374
if self._per_run_time_limit is None or \
380375
self._per_run_time_limit > time_left_for_smac:
@@ -385,25 +380,25 @@ def _fit(self, datamanager):
385380
else:
386381
per_run_time_limit = self._per_run_time_limit
387382

388-
self._proc_smac = AutoMLSMBO(config_space=self.configuration_space,
389-
dataset_name=self._dataset_name,
390-
backend=self._backend,
391-
total_walltime_limit=time_left_for_smac,
392-
func_eval_time_limit=per_run_time_limit,
393-
memory_limit=self._ml_memory_limit,
394-
data_memory_limit=self._data_memory_limit,
395-
watcher=self._stopwatch,
396-
start_num_run=num_run,
397-
num_metalearning_cfgs=self._initial_configurations_via_metalearning,
398-
config_file=configspace_path,
399-
smac_iters=self._max_iter_smac,
400-
seed=self._seed,
401-
metadata_directory=self._metadata_directory,
402-
resampling_strategy=self._resampling_strategy,
403-
resampling_strategy_args=self._resampling_strategy_arguments,
404-
acquisition_function=self.acquisition_function,
405-
shared_mode=self._shared_mode)
406-
self._proc_smac.run_smbo()
383+
_proc_smac = AutoMLSMBO(config_space=self.configuration_space,
384+
dataset_name=self._dataset_name,
385+
backend=self._backend,
386+
total_walltime_limit=time_left_for_smac,
387+
func_eval_time_limit=per_run_time_limit,
388+
memory_limit=self._ml_memory_limit,
389+
data_memory_limit=self._data_memory_limit,
390+
watcher=self._stopwatch,
391+
start_num_run=num_run,
392+
num_metalearning_cfgs=self._initial_configurations_via_metalearning,
393+
config_file=configspace_path,
394+
smac_iters=self._max_iter_smac,
395+
seed=self._seed,
396+
metadata_directory=self._metadata_directory,
397+
resampling_strategy=self._resampling_strategy,
398+
resampling_strategy_args=self._resampling_strategy_arguments,
399+
acquisition_function=self.acquisition_function,
400+
shared_mode=self._shared_mode)
401+
self.runhistory_ = _proc_smac.run_smbo()
407402

408403
self._proc_ensemble = None
409404
self._load_models()
@@ -418,12 +413,25 @@ def refit(self, X, y):
418413
self.ensemble_ is None:
419414
self._load_models()
420415

416+
random_state = np.random.RandomState(self._seed)
421417
for identifier in self.models_:
422418
if identifier in self.ensemble_.get_model_identifiers():
423419
model = self.models_[identifier]
424420
# this updates the model inplace, it can then later be used in
425421
# predict method
426-
model.fit(X.copy(), y.copy())
422+
423+
# try to fit the model. If it fails, shuffle the data. This
424+
# could alleviate the problem in algorithms that depend on
425+
# the ordering of the data.
426+
for i in range(10):
427+
try:
428+
model.fit(X.copy(), y.copy())
429+
break
430+
except ValueError:
431+
indices = list(range(X.shape[0]))
432+
random_state.shuffle(indices)
433+
X = X[indices]
434+
y = y[indices]
427435

428436
self._can_predict = True
429437
return self
@@ -561,8 +569,8 @@ def grid_scores_(self):
561569
scores_per_config = defaultdict(list)
562570
config_list = list()
563571

564-
for run_key in self._proc_smac.runhistory.data:
565-
run_value = self._proc_smac.runhistory.data[run_key]
572+
for run_key in self.runhistory_.data:
573+
run_value = self.runhistory_.data[run_key]
566574

567575
config_id = run_key.config_id
568576
cost = run_value.cost
@@ -575,7 +583,7 @@ def grid_scores_(self):
575583
for config_id in config_list:
576584
scores = [1 - score for score in scores_per_config[config_id]]
577585
mean_score = np.mean(scores)
578-
config = self._proc_smac.runhistory.ids_config[config_id]
586+
config = self.runhistory_.ids_config[config_id]
579587

580588
grid_score = _CVScoreTuple(config.get_dictionary(), mean_score,
581589
scores)
@@ -616,10 +624,10 @@ def cv_results_(self):
616624
mean_fit_time = []
617625
params = []
618626
status = []
619-
for run_key in self._proc_smac.runhistory.data:
620-
run_value = self._proc_smac.runhistory.data[run_key]
627+
for run_key in self.runhistory_.data:
628+
run_value = self.runhistory_.data[run_key]
621629
config_id = run_key.config_id
622-
config = self._proc_smac.runhistory.ids_config[config_id]
630+
config = self.runhistory_.ids_config[config_id]
623631

624632
param_dict = config.get_dictionary()
625633
params.append(param_dict)

autosklearn/ensemble_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def main(self):
129129
if dir_ensemble_file.endswith("/"):
130130
dir_ensemble_file = dir_ensemble_file[:-1]
131131
if not dir_ensemble_file.endswith(".npy"):
132-
self.logger.warning('Error loading file (not .npy): %s', dir_ensemble_file)
132+
self.logger.info('Error loading file (not .npy): %s', dir_ensemble_file)
133133
continue
134134

135135
dir_ensemble_model_files.append(dir_ensemble_file)

0 commit comments

Comments
 (0)