Skip to content

Commit f5c13cc

Browse files
ahn1340mfeurer
authored andcommitted
Fix#317 (#484)
* . * . * AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self. * Initial commit. Work in Progress. * Fix minor printing error in sprint_statistics. * Revert "Fix#460" * first commit. work in progress. * . * Autosklearn raises exception if it fails to create tmp, output dirs. * Safeguarding against Auto-sklearn's file creation and deletion. * . * . * / * . * Debugging. * . * . * . * . * Fix Tests & Add shared_mode checking * . * . * Minor changes for passing Codacy check. * Added suggested changes * .
1 parent a019073 commit f5c13cc

File tree

7 files changed

+160
-94
lines changed

7 files changed

+160
-94
lines changed

autosklearn/automl.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,7 @@ def fit(
145145
dataset_name=None,
146146
only_return_configuration_space=False,
147147
):
148-
if not self._shared_mode:
149-
self._backend.context.delete_directories()
150-
else:
148+
if self._shared_mode:
151149
# If this fails, it's likely that this is the first call to get
152150
# the data manager
153151
try:
@@ -156,8 +154,6 @@ def fit(
156154
except IOError:
157155
pass
158156

159-
self._backend.context.create_directories()
160-
161157
if dataset_name is None:
162158
dataset_name = hash_array_or_matrix(X)
163159

autosklearn/estimators.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ def build_automl(self):
217217
backend = create(temporary_directory=self.tmp_folder,
218218
output_directory=self.output_folder,
219219
delete_tmp_folder_after_terminate=self.delete_tmp_folder_after_terminate,
220-
delete_output_folder_after_terminate=self.delete_output_folder_after_terminate)
220+
delete_output_folder_after_terminate=self.delete_output_folder_after_terminate,
221+
shared_mode = self.shared_mode)
221222
automl = self._get_automl_class()(
222223
backend=backend,
223224
time_left_for_this_task=self.time_left_for_this_task,

autosklearn/util/backend.py

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
def create(temporary_directory,
1919
output_directory,
2020
delete_tmp_folder_after_terminate=True,
21-
delete_output_folder_after_terminate=True):
21+
delete_output_folder_after_terminate=True,
22+
shared_mode=False):
2223
context = BackendContext(temporary_directory, output_directory,
2324
delete_tmp_folder_after_terminate,
24-
delete_output_folder_after_terminate)
25+
delete_output_folder_after_terminate,
26+
shared_mode)
2527
backend = Backend(context)
2628

2729
return backend
@@ -33,20 +35,35 @@ def __init__(self,
3335
temporary_directory,
3436
output_directory,
3537
delete_tmp_folder_after_terminate,
36-
delete_output_folder_after_terminate):
37-
self._prepare_directories(temporary_directory, output_directory)
38+
delete_output_folder_after_terminate,
39+
shared_mode=False):
40+
41+
# Check that the names of tmp_dir and output_dir is not the same.
42+
if temporary_directory == output_directory \
43+
and temporary_directory is not None:
44+
raise ValueError("The temporary and the output directory "
45+
"must be different.")
46+
3847
self.delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate
3948
self.delete_output_folder_after_terminate = delete_output_folder_after_terminate
49+
self.shared_mode = shared_mode
50+
# attributes to check that directories were created by autosklearn.
51+
self._tmp_dir_created = False
52+
self._output_dir_created = False
53+
54+
self._prepare_directories(temporary_directory, output_directory)
4055
self._logger = logging.get_logger(__name__)
4156
self.create_directories()
4257

4358
@property
4459
def output_directory(self):
45-
return self.__output_directory
60+
# make sure that tilde does not appear on the path.
61+
return os.path.expanduser(os.path.expandvars(self.__output_directory))
4662

4763
@property
4864
def temporary_directory(self):
49-
return self.__temporary_directory
65+
# make sure that tilde does not appear on the path.
66+
return os.path.expanduser(os.path.expandvars(self.__temporary_directory))
5067

5168
def _prepare_directories(self, temporary_directory, output_directory):
5269
random_number = random.randint(0, 10000)
@@ -61,20 +78,39 @@ def _prepare_directories(self, temporary_directory, output_directory):
6178
else '/tmp/autosklearn_output_%d_%d' % (pid, random_number)
6279

6380
def create_directories(self):
64-
try:
81+
if self.shared_mode:
82+
# If shared_mode == True, the tmp and output dir will be shared
83+
# by different instances of auto-sklearn.
84+
try:
85+
os.makedirs(self.temporary_directory)
86+
except OSError:
87+
pass
88+
try:
89+
os.makedirs(self.output_directory)
90+
except OSError:
91+
pass
92+
93+
else:
94+
# Exception is raised if self.temporary_directory already exists.
6595
os.makedirs(self.temporary_directory)
66-
except OSError:
67-
pass
68-
try:
96+
self._tmp_dir_created = True
97+
98+
# Exception is raised if self.output_directory already exists.
6999
os.makedirs(self.output_directory)
70-
except OSError:
71-
pass
100+
self._output_dir_created = True
101+
72102

73103
def __del__(self):
74104
self.delete_directories(force=False)
75105

76106
def delete_directories(self, force=True):
77107
if self.delete_output_folder_after_terminate or force:
108+
if self._output_dir_created is False and self.shared_mode is False:
109+
raise ValueError("Failed to delete output dir: %s "
110+
"because auto-sklearn did not create it. "
111+
"Please make sure that the specified output "
112+
"dir does not exist when instantiating "
113+
"auto-sklearn." % self.output_directory)
78114
try:
79115
shutil.rmtree(self.output_directory)
80116
except Exception:
@@ -86,6 +122,12 @@ def delete_directories(self, force=True):
86122
self.output_directory)
87123

88124
if self.delete_tmp_folder_after_terminate or force:
125+
if self._tmp_dir_created is False and self.shared_mode is False:
126+
raise ValueError("Failed to delete tmp dir: % s "
127+
"because auto-sklearn did not create it. "
128+
"Please make sure that the specified tmp "
129+
"dir does not exist when instantiating "
130+
"auto-sklearn." % self.temporary_directory)
89131
try:
90132
shutil.rmtree(self.temporary_directory)
91133
except Exception:
@@ -418,4 +460,4 @@ def write_txt_file(self, filepath, data, name):
418460
self.logger.debug('Created %s file %s' % (name, filepath))
419461
else:
420462
self.logger.debug('%s file already present %s' %
421-
(name, filepath))
463+
(name, filepath))

test/test_automl/base.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import shutil
44
import time
55
import unittest
6+
from autosklearn.util.backend import create
67

78

89
class Base(unittest.TestCase):
@@ -19,25 +20,33 @@ def setUp(self):
1920
except Exception:
2021
self.travis = False
2122

22-
def _setUp(self, output):
23-
if os.path.exists(output):
23+
def _setUp(self, dir):
24+
if os.path.exists(dir):
2425
for i in range(10):
2526
try:
26-
shutil.rmtree(output)
27+
shutil.rmtree(dir)
2728
break
2829
except OSError:
2930
time.sleep(1)
30-
try:
31-
os.makedirs(output)
32-
except OSError:
33-
pass
3431

32+
def _create_backend(self, test_name):
33+
tmp = os.path.join(self.test_dir, '..', '.tmp._%s' % test_name)
34+
output = os.path.join(self.test_dir, '..', '.output._%s' % test_name)
35+
# Make sure the folders we wanna create do not already exist.
36+
self._setUp(tmp)
37+
self._setUp(output)
38+
backend = create(tmp, output)
39+
return backend
3540

36-
def _tearDown(self, output):
37-
if os.path.exists(output):
41+
def _tearDown(self, dir):
42+
"""
43+
Delete the temporary and the output directories manually
44+
in case they are not deleted.
45+
"""
46+
if os.path.exists(dir):
3847
for i in range(10):
3948
try:
40-
shutil.rmtree(output)
49+
shutil.rmtree(dir)
4150
break
4251
except OSError:
4352
time.sleep(1)

test/test_automl/test_automl.py

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,12 @@ def setUp(self):
4343
self.automl._delete_output_directories = lambda: 0
4444

4545
def test_refit_shuffle_on_fail(self):
46-
output = os.path.join(self.test_dir, '..', '.tmp_refit_shuffle_on_fail')
47-
context = BackendContext(output, output, False, False)
48-
backend = Backend(context)
46+
backend_api = self._create_backend('test_refit_shuffle_on_fail')
4947

5048
failing_model = unittest.mock.Mock()
5149
failing_model.fit.side_effect = [ValueError(), ValueError(), None]
5250

53-
auto = AutoML(backend, 20, 5)
51+
auto = AutoML(backend_api, 20, 5)
5452
ensemble_mock = unittest.mock.Mock()
5553
auto.ensemble_ = ensemble_mock
5654
ensemble_mock.get_selected_model_identifiers.return_value = [1]
@@ -63,6 +61,10 @@ def test_refit_shuffle_on_fail(self):
6361

6462
self.assertEqual(failing_model.fit.call_count, 3)
6563

64+
del auto
65+
self._tearDown(backend_api.temporary_directory)
66+
self._tearDown(backend_api.output_directory)
67+
6668
def test_only_loads_ensemble_models(self):
6769
identifiers = [(1, 2), (3, 4)]
6870

@@ -96,11 +98,9 @@ def test_raises_if_no_models(self):
9698
self.automl._load_models()
9799

98100
def test_fit(self):
99-
output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
100-
self._setUp(output)
101+
backend_api = self._create_backend('test_fit')
101102

102103
X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
103-
backend_api = backend.create(output, output)
104104
automl = autosklearn.automl.AutoML(backend_api, 20, 5)
105105
automl.fit(
106106
X_train, Y_train, metric=accuracy, task=MULTICLASS_CLASSIFICATION,
@@ -110,7 +110,8 @@ def test_fit(self):
110110
self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)
111111

112112
del automl
113-
self._tearDown(output)
113+
self._tearDown(backend_api.temporary_directory)
114+
self._tearDown(backend_api.output_directory)
114115

115116
def test_fit_roar(self):
116117
def get_roar_object_callback(
@@ -129,11 +130,9 @@ def get_roar_object_callback(
129130
tae_runner=ta,
130131
)
131132

132-
output = os.path.join(self.test_dir, '..', '.tmp_test_fit_roar')
133-
self._setUp(output)
133+
backend_api = self._create_backend('test_fit_roar')
134134

135135
X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
136-
backend_api = backend.create(output, output)
137136
automl = autosklearn.automl.AutoML(
138137
backend=backend_api,
139138
time_left_for_this_task=20,
@@ -149,16 +148,15 @@ def get_roar_object_callback(
149148
self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)
150149

151150
del automl
152-
self._tearDown(output)
151+
self._tearDown(backend_api.temporary_directory)
152+
self._tearDown(backend_api.output_directory)
153153

154154
def test_binary_score_and_include(self):
155155
"""
156156
Test fix for binary classification prediction
157157
taking the index 1 of second dimension in prediction matrix
158158
"""
159-
160-
output = os.path.join(self.test_dir, '..', '.tmp_test_binary_score')
161-
self._setUp(output)
159+
backend_api = self._create_backend('test_binary_score_and_include')
162160

163161
data = sklearn.datasets.make_classification(
164162
n_samples=400, n_features=10, n_redundant=1, n_informative=3,
@@ -168,7 +166,6 @@ def test_binary_score_and_include(self):
168166
X_test = data[0][200:]
169167
Y_test = data[1][200:]
170168

171-
backend_api = backend.create(output, output)
172169
automl = autosklearn.automl.AutoML(backend_api, 20, 5,
173170
include_estimators=['sgd'],
174171
include_preprocessors=['no_preprocessing'])
@@ -182,18 +179,17 @@ def test_binary_score_and_include(self):
182179
self.assertGreaterEqual(score, 0.4)
183180

184181
del automl
185-
self._tearDown(output)
182+
self._tearDown(backend_api.temporary_directory)
183+
self._tearDown(backend_api.output_directory)
186184

187185
def test_automl_outputs(self):
188-
output = os.path.join(self.test_dir, '..',
189-
'.tmp_test_automl_outputs')
190-
self._setUp(output)
186+
backend_api = self._create_backend('test_automl_outputs')
187+
191188
name = '31_bac'
192189
dataset = os.path.join(self.test_dir, '..', '.data', name)
193-
data_manager_file = os.path.join(output, '.auto-sklearn',
190+
data_manager_file = os.path.join(backend_api.temporary_directory, '.auto-sklearn',
194191
'datamanager.pkl')
195192

196-
backend_api = backend.create(output, output)
197193
auto = autosklearn.automl.AutoML(
198194
backend_api, 20, 5,
199195
initial_configurations_via_metalearning=0,
@@ -212,43 +208,41 @@ def test_automl_outputs(self):
212208
'start_time_100', 'datamanager.pkl',
213209
'predictions_ensemble',
214210
'ensembles', 'predictions_test', 'models']
215-
self.assertEqual(sorted(os.listdir(os.path.join(output,
211+
self.assertEqual(sorted(os.listdir(os.path.join(backend_api.temporary_directory,
216212
'.auto-sklearn'))),
217213
sorted(fixture))
218214

219215
# At least one ensemble, one validation, one test prediction and one
220216
# model and one ensemble
221-
fixture = os.listdir(os.path.join(output, '.auto-sklearn',
217+
fixture = os.listdir(os.path.join(backend_api.temporary_directory, '.auto-sklearn',
222218
'predictions_ensemble'))
223219
self.assertIn('predictions_ensemble_100_1.npy', fixture)
224220

225-
fixture = os.listdir(os.path.join(output, '.auto-sklearn',
221+
fixture = os.listdir(os.path.join(backend_api.temporary_directory, '.auto-sklearn',
226222
'models'))
227223
self.assertIn('100.1.model', fixture)
228224

229-
fixture = os.listdir(os.path.join(output, '.auto-sklearn',
225+
fixture = os.listdir(os.path.join(backend_api.temporary_directory, '.auto-sklearn',
230226
'ensembles'))
231227
self.assertIn('100.0.ensemble', fixture)
232228

233229
# Start time
234-
start_time_file_path = os.path.join(output, '.auto-sklearn',
230+
start_time_file_path = os.path.join(backend_api.temporary_directory, '.auto-sklearn',
235231
"start_time_100")
236232
with open(start_time_file_path, 'r') as fh:
237233
start_time = float(fh.read())
238234
self.assertGreaterEqual(time.time() - start_time, 10)
239235

240236
del auto
241-
self._tearDown(output)
237+
self._tearDown(backend_api.temporary_directory)
238+
self._tearDown(backend_api.output_directory)
242239

243240
def test_do_dummy_prediction(self):
244241
for name in ['401_bac', '31_bac', 'adult', 'cadata']:
245-
output = os.path.join(self.test_dir, '..',
246-
'.tmp_test_do_dummy_prediction')
247-
self._setUp(output)
242+
backend_api = self._create_backend('test_do_dummy_prediction')
248243

249244
dataset = os.path.join(self.test_dir, '..', '.data', name)
250245

251-
backend_api = backend.create(output, output)
252246
auto = autosklearn.automl.AutoML(
253247
backend_api, 20, 5,
254248
initial_configurations_via_metalearning=25)
@@ -260,14 +254,17 @@ def test_do_dummy_prediction(self):
260254
auto._do_dummy_prediction(D, 1)
261255

262256
# Ensure that the dummy predictions are not in the current working
263-
# directory, but in the output directory (under output)
257+
# directory, but in the temporary directory.
264258
self.assertFalse(os.path.exists(os.path.join(os.getcwd(),
265259
'.auto-sklearn')))
266260
self.assertTrue(os.path.exists(os.path.join(
267-
output, '.auto-sklearn', 'predictions_ensemble',
261+
backend_api.temporary_directory, '.auto-sklearn', 'predictions_ensemble',
268262
'predictions_ensemble_1_1.npy')))
269263

270264
del auto
271-
self._tearDown(output)
265+
self._tearDown(backend_api.temporary_directory)
266+
self._tearDown(backend_api.output_directory)
272267

273268

269+
if __name__=="__main__":
270+
unittest.main()

0 commit comments

Comments
 (0)