Skip to content

Commit ccb419c

Browse files
Remove the output directory argument (#1159)
* Remove file output * output folder * don't remove everything * Move to ubuntu latest * fix binary * revert latest
1 parent 3cbdcc8 commit ccb419c

20 files changed

+30
-187
lines changed

autosklearn/automl.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ def __init__(self,
145145
self.configuration_space = None
146146
self._backend = backend
147147
# self._tmp_dir = tmp_dir
148-
# self._output_dir = output_dir
149148
self._time_for_task = time_left_for_this_task
150149
self._per_run_time_limit = per_run_time_limit
151150
self._initial_configurations_via_metalearning = \
@@ -254,9 +253,6 @@ def __init__(self,
254253
# By default try to use the TCP logging port or get a new port
255254
self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
256255

257-
# After assigning and checking variables...
258-
# self._backend = Backend(self._output_dir, self._tmp_dir)
259-
260256
# Num_run tell us how many runs have been launched
261257
# It can be seen as an identifier for each configuration
262258
# saved to disk
@@ -573,7 +569,6 @@ def fit(
573569
raise ValueError('Unable to read requirement: %s' % requirement)
574570
self._logger.debug('Done printing environment information')
575571
self._logger.debug('Starting to print arguments to auto-sklearn')
576-
self._logger.debug(' output_folder: %s', self._backend.context._output_directory)
577572
self._logger.debug(' tmp_folder: %s', self._backend.context._temporary_directory)
578573
self._logger.debug(' time_left_for_this_task: %f', self._time_for_task)
579574
self._logger.debug(' per_run_time_limit: %f', self._per_run_time_limit)

autosklearn/ensemble_builder.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,14 +1329,6 @@ def predict(self, set_: str,
13291329
y = ensemble.predict(predictions)
13301330
if self.task_type == BINARY_CLASSIFICATION:
13311331
y = y[:, 1]
1332-
if self.SAVE2DISC:
1333-
self.backend.save_predictions_as_txt(
1334-
predictions=y,
1335-
subset=set_,
1336-
idx=index_run,
1337-
prefix=self.dataset_name,
1338-
precision=8,
1339-
)
13401332
return y
13411333
else:
13421334
self.logger.info(

autosklearn/estimators.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ def __init__(
3939
resampling_strategy='holdout',
4040
resampling_strategy_arguments=None,
4141
tmp_folder=None,
42-
output_folder=None,
4342
delete_tmp_folder_after_terminate=True,
44-
delete_output_folder_after_terminate=True,
4543
n_jobs: Optional[int] = None,
4644
dask_client: Optional[dask.distributed.Client] = None,
4745
disable_evaluator_output=False,
@@ -161,18 +159,10 @@ def __init__(
161159
folder to store configuration output and log files, if ``None``
162160
automatically use ``/tmp/autosklearn_tmp_$pid_$random_number``
163161
164-
output_folder : string, optional (None)
165-
folder to store predictions for optional test set, if ``None``
166-
no output will be generated
167-
168162
delete_tmp_folder_after_terminate: string, optional (True)
169163
remove tmp_folder, when finished. If tmp_folder is None
170164
tmp_dir will always be deleted
171165
172-
delete_output_folder_after_terminate: bool, optional (True)
173-
remove output_folder, when finished. If output_folder is None
174-
output_dir will always be deleted
175-
176166
n_jobs : int, optional, experimental
177167
The number of jobs to run in parallel for ``fit()``. ``-1`` means
178168
using all processors. By default, Auto-sklearn uses a single core
@@ -263,9 +253,7 @@ def __init__(
263253
self.resampling_strategy = resampling_strategy
264254
self.resampling_strategy_arguments = resampling_strategy_arguments
265255
self.tmp_folder = tmp_folder
266-
self.output_folder = output_folder
267256
self.delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate
268-
self.delete_output_folder_after_terminate = delete_output_folder_after_terminate
269257
self.n_jobs = n_jobs
270258
self.dask_client = dask_client
271259
self.disable_evaluator_output = disable_evaluator_output
@@ -299,9 +287,7 @@ def build_automl(self):
299287

300288
backend = create(
301289
temporary_directory=self.tmp_folder,
302-
output_directory=self.output_folder,
303290
delete_tmp_folder_after_terminate=self.delete_tmp_folder_after_terminate,
304-
delete_output_folder_after_terminate=self.delete_output_folder_after_terminate,
305291
)
306292

307293
automl = self._get_automl_class()(

autosklearn/experimental/askl2.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,7 @@ def __init__(
170170
seed: int = 1,
171171
memory_limit: int = 3072,
172172
tmp_folder: Optional[str] = None,
173-
output_folder: Optional[str] = None,
174173
delete_tmp_folder_after_terminate: bool = True,
175-
delete_output_folder_after_terminate: bool = True,
176174
n_jobs: Optional[int] = None,
177175
dask_client: Optional[dask.distributed.Client] = None,
178176
disable_evaluator_output: bool = False,
@@ -230,18 +228,10 @@ def __init__(
230228
folder to store configuration output and log files, if ``None``
231229
automatically use ``/tmp/autosklearn_tmp_$pid_$random_number``
232230
233-
output_folder : string, optional (None)
234-
folder to store predictions for optional test set, if ``None``
235-
no output will be generated
236-
237231
delete_tmp_folder_after_terminate: string, optional (True)
238232
remove tmp_folder, when finished. If tmp_folder is None
239233
tmp_dir will always be deleted
240234
241-
delete_output_folder_after_terminate: bool, optional (True)
242-
remove output_folder, when finished. If output_folder is None
243-
output_dir will always be deleted
244-
245235
n_jobs : int, optional, experimental
246236
The number of jobs to run in parallel for ``fit()``. ``-1`` means
247237
using all processors. By default, Auto-sklearn uses a single core
@@ -324,9 +314,7 @@ def __init__(
324314
resampling_strategy=None,
325315
resampling_strategy_arguments=None,
326316
tmp_folder=tmp_folder,
327-
output_folder=output_folder,
328317
delete_tmp_folder_after_terminate=delete_tmp_folder_after_terminate,
329-
delete_output_folder_after_terminate=delete_output_folder_after_terminate,
330318
n_jobs=n_jobs,
331319
dask_client=dask_client,
332320
disable_evaluator_output=disable_evaluator_output,

autosklearn/util/backend.py

Lines changed: 1 addition & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,10 @@
2424

2525
def create(
2626
temporary_directory: str,
27-
output_directory: Optional[str],
2827
delete_tmp_folder_after_terminate: bool = True,
29-
delete_output_folder_after_terminate: bool = True,
3028
) -> 'Backend':
31-
context = BackendContext(temporary_directory, output_directory,
29+
context = BackendContext(temporary_directory,
3230
delete_tmp_folder_after_terminate,
33-
delete_output_folder_after_terminate,
3431
)
3532
backend = Backend(context)
3633

@@ -58,28 +55,18 @@ class BackendContext(object):
5855

5956
def __init__(self,
6057
temporary_directory: str,
61-
output_directory: Optional[str],
6258
delete_tmp_folder_after_terminate: bool,
63-
delete_output_folder_after_terminate: bool,
6459
):
6560

66-
# Check that the names of tmp_dir and output_dir is not the same.
67-
if temporary_directory == output_directory and temporary_directory is not None:
68-
raise ValueError("The temporary and the output directory "
69-
"must be different.")
70-
7161
self.delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate
72-
self.delete_output_folder_after_terminate = delete_output_folder_after_terminate
7362
# attributes to check that directories were created by autosklearn.
7463
self._tmp_dir_created = False
75-
self._output_dir_created = False
7664

7765
self._temporary_directory = (
7866
get_randomized_directory_name(
7967
temporary_directory=temporary_directory,
8068
)
8169
)
82-
self._output_directory = output_directory
8370
# Auto-Sklearn logs through the use of a PicklableClientLogger
8471
# For this reason we need a port to communicate with the server
8572
# When the backend is created, this port is not available
@@ -94,14 +81,6 @@ def setup_logger(self, port: int) -> None:
9481
port=port,
9582
)
9683

97-
@property
98-
def output_directory(self) -> Optional[str]:
99-
if self._output_directory is not None:
100-
# make sure that tilde does not appear on the path.
101-
return os.path.expanduser(os.path.expandvars(self._output_directory))
102-
else:
103-
return None
104-
10584
@property
10685
def temporary_directory(self) -> str:
10786
# make sure that tilde does not appear on the path.
@@ -112,29 +91,7 @@ def create_directories(self) -> None:
11291
os.makedirs(self.temporary_directory)
11392
self._tmp_dir_created = True
11493

115-
# Exception is raised if self.output_directory already exists.
116-
if self.output_directory is not None:
117-
os.makedirs(self.output_directory)
118-
self._output_dir_created = True
119-
12094
def delete_directories(self, force: bool = True) -> None:
121-
if self.output_directory and (self.delete_output_folder_after_terminate or force):
122-
if self._output_dir_created is False:
123-
raise ValueError("Failed to delete output dir: %s because auto-sklearn did not "
124-
"create it. Please make sure that the specified output dir does "
125-
"not exist when instantiating auto-sklearn."
126-
% self.output_directory)
127-
try:
128-
shutil.rmtree(self.output_directory)
129-
except Exception:
130-
try:
131-
if self._logger is not None:
132-
self._logger.warning("Could not delete output dir: %s" %
133-
self.output_directory)
134-
else:
135-
print("Could not delete output dir: %s" % self.output_directory)
136-
except Exception:
137-
print("Could not delete output dir: %s" % self.output_directory)
13895

13996
if self.delete_tmp_folder_after_terminate or force:
14097
if self._tmp_dir_created is False:
@@ -175,10 +132,6 @@ def __init__(self, context: BackendContext):
175132
os.makedirs(self.temporary_directory)
176133
except Exception:
177134
pass
178-
# This does not have to exist or be specified
179-
if self.output_directory is not None:
180-
if not os.path.exists(self.output_directory):
181-
raise ValueError("Output directory %s does not exist." % self.output_directory)
182135

183136
self.internals_directory = os.path.join(self.temporary_directory, ".auto-sklearn")
184137
self._make_internals_directory()
@@ -190,10 +143,6 @@ def setup_logger(self, port: int) -> None:
190143
)
191144
self.context.setup_logger(port)
192145

193-
@property
194-
def output_directory(self) -> Optional[str]:
195-
return self.context.output_directory
196-
197146
@property
198147
def temporary_directory(self) -> str:
199148
return self.context.temporary_directory
@@ -466,31 +415,6 @@ def get_prediction_filename(self, subset: str,
466415
) -> str:
467416
return 'predictions_%s_%s_%s_%s.npy' % (subset, automl_seed, idx, budget)
468417

469-
def save_predictions_as_txt(self,
470-
predictions: np.ndarray,
471-
subset: str,
472-
idx: int, precision: int,
473-
prefix: Optional[str] = None) -> None:
474-
if not self.output_directory:
475-
return
476-
# Write prediction scores in prescribed format
477-
filepath = os.path.join(
478-
self.output_directory,
479-
('%s_' % prefix if prefix else '') + '%s_%s.predict' % (subset, str(idx)),
480-
)
481-
482-
format_string = '{:.%dg} ' % precision
483-
with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(
484-
filepath), delete=False) as output_file:
485-
for row in predictions:
486-
if not isinstance(row, np.ndarray) and not isinstance(row, list):
487-
row = [row]
488-
for val in row:
489-
output_file.write(format_string.format(float(val)))
490-
output_file.write('\n')
491-
tempname = output_file.name
492-
os.rename(tempname, filepath)
493-
494418
def write_txt_file(self, filepath: str, data: str, name: str) -> None:
495419
with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(
496420
filepath), delete=False) as fh:

examples/20_basic/example_classification.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
time_left_for_this_task=120,
3030
per_run_time_limit=30,
3131
tmp_folder='/tmp/autosklearn_classification_example_tmp',
32-
output_folder='/tmp/autosklearn_classification_example_out',
3332
)
3433
automl.fit(X_train, y_train, dataset_name='breast_cancer')
3534

examples/20_basic/example_multioutput_regression.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
time_left_for_this_task=120,
3333
per_run_time_limit=30,
3434
tmp_folder='/tmp/autosklearn_multioutput_regression_example_tmp',
35-
output_folder='/tmp/autosklearn_multioutput_regression_example_out',
3635
)
3736
automl.fit(X_train, y_train, dataset_name='synthetic')
3837

examples/20_basic/example_regression.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
time_left_for_this_task=120,
3131
per_run_time_limit=30,
3232
tmp_folder='/tmp/autosklearn_regression_example_tmp',
33-
output_folder='/tmp/autosklearn_regression_example_out',
3433
)
3534
automl.fit(X_train, y_train, dataset_name='diabetes')
3635

examples/40_advanced/example_resampling.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
time_left_for_this_task=120,
3434
per_run_time_limit=30,
3535
tmp_folder='/tmp/autosklearn_resampling_example_tmp',
36-
output_folder='/tmp/autosklearn_resampling_example_out',
3736
disable_evaluator_output=False,
3837
# 'holdout' with 'train_size'=0.67 is the default argument setting
3938
# for AutoSklearnClassifier. It is explicitly specified in this example
@@ -59,7 +58,6 @@
5958
time_left_for_this_task=120,
6059
per_run_time_limit=30,
6160
tmp_folder='/tmp/autosklearn_resampling_example_tmp',
62-
output_folder='/tmp/autosklearn_resampling_example_out',
6361
disable_evaluator_output=False,
6462
resampling_strategy='cv',
6563
resampling_strategy_arguments={'folds': 5},
@@ -107,7 +105,6 @@
107105
time_left_for_this_task=120,
108106
per_run_time_limit=30,
109107
tmp_folder='/tmp/autosklearn_resampling_example_tmp',
110-
output_folder='/tmp/autosklearn_resampling_example_out',
111108
disable_evaluator_output=False,
112109
resampling_strategy=resampling_strategy,
113110
resampling_strategy_arguments=resampling_strategy_arguments,

examples/60_search/example_parallel_manual_spawning_cli.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
from autosklearn.constants import MULTICLASS_CLASSIFICATION
6464

6565
tmp_folder = '/tmp/autosklearn_parallel_3_example_tmp'
66-
output_folder = '/tmp/autosklearn_parallel_3_example_out'
6766

6867
worker_processes = []
6968

@@ -180,7 +179,6 @@ def cli_start_worker(scheduler_file_name):
180179
per_run_time_limit=10,
181180
memory_limit=1024,
182181
tmp_folder=tmp_folder,
183-
output_folder=output_folder,
184182
seed=777,
185183
# n_jobs is ignored internally as we pass a dask client.
186184
n_jobs=1,

0 commit comments

Comments
 (0)