Skip to content

Commit 9952f67

Browse files
authored
Improve data handling (#994)
* Improve managing disk space * fixes a bug in the ensemble builder that would cause the ensemble building to break when giving a limit on the disk space to use * allow more fine-grained control over what files to save on disk * make the output directory optional and only create it if it is actively passed in by the user * fix bug in logging function * Improve cv models directories (#993) * restructure directories for models and predictions * pep8 and mypy * fix tests, include offline feedback * update unittests after rebase * add forgotten files * add forgotten file * fix tests * fix merge issues * fix unit tests * minor improvements * remove print statement
1 parent 37c879f commit 9952f67

35 files changed

+332
-511
lines changed

autosklearn/automl.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings
3838
from autosklearn.evaluation.train_evaluator import _fit_with_budget
3939
from autosklearn.metrics import calculate_score
40+
from autosklearn.util.backend import Backend
4041
from autosklearn.util.stopwatch import StopWatch
4142
from autosklearn.util.logging_ import get_logger, setup_logger
4243
from autosklearn.util import pipeline, RE_PATTERN
@@ -95,7 +96,7 @@ def send_warnings_to_log(
9596
class AutoML(BaseEstimator):
9697

9798
def __init__(self,
98-
backend,
99+
backend: Backend,
99100
time_left_for_this_task,
100101
per_run_time_limit,
101102
initial_configurations_via_metalearning=25,
@@ -179,7 +180,7 @@ def __init__(self,
179180
raise ValueError('disable_evaluator_output must be of type bool '
180181
'or list.')
181182
if isinstance(self._disable_evaluator_output, list):
182-
allowed_elements = ['model', 'y_optimization']
183+
allowed_elements = ['model', 'cv_model', 'y_optimization', 'y_test', 'y_valid']
183184
for element in self._disable_evaluator_output:
184185
if element not in allowed_elements:
185186
raise ValueError("List member '%s' for argument "
@@ -313,7 +314,7 @@ def _do_dummy_prediction(self, datamanager, num_run):
313314
cost_for_crash=get_cost_of_crash(self._metric),
314315
**self._resampling_strategy_arguments)
315316

316-
status, cost, runtime, additional_info = ta.run(1, cutoff=self._time_for_task)
317+
status, cost, runtime, additional_info = ta.run(num_run, cutoff=self._time_for_task)
317318
if status == StatusType.SUCCESS:
318319
self._logger.info("Finished creating dummy predictions.")
319320
else:
@@ -511,14 +512,6 @@ def fit(
511512
)
512513

513514
self._backend._make_internals_directory()
514-
try:
515-
os.makedirs(self._backend.get_model_dir())
516-
except (OSError, FileExistsError):
517-
raise
518-
try:
519-
os.makedirs(self._backend.get_cv_model_dir())
520-
except (OSError, FileExistsError):
521-
raise
522515

523516
self._task = datamanager.info['task']
524517
self._label_num = datamanager.info['label_num']
@@ -942,9 +935,9 @@ def _load_best_individual_model(self):
942935
# SingleBest contains the best model found by AutoML
943936
ensemble = SingleBest(
944937
metric=self._metric,
945-
random_state=self._seed,
938+
seed=self._seed,
946939
run_history=self.runhistory_,
947-
model_dir=self._backend.get_model_dir(),
940+
backend=self._backend,
948941
)
949942
self._logger.warning(
950943
"No valid ensemble was created. Please check the log"

0 commit comments

Comments
 (0)