Skip to content

Commit 8edb58b

Browse files
committed
replace fork+mock by spawn+hardcoded patch in main code
1 parent 3c0ab4b commit 8edb58b

File tree

2 files changed

+36
-22
lines changed

2 files changed

+36
-22
lines changed

autosklearn/ensemble_builder.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def build_ensemble(
156156
self,
157157
dask_client: dask.distributed.Client,
158158
pynisher_context: str = 'spawn',
159+
unit_test: bool = False
159160
) -> None:
160161

161162
# The second criteria is elapsed time
@@ -227,6 +228,7 @@ def build_ensemble(
227228
priority=100,
228229
pynisher_context=pynisher_context,
229230
logger_port=self.logger_port,
231+
unit_test=unit_test,
230232
))
231233

232234
logger.info(
@@ -265,6 +267,7 @@ def fit_and_return_ensemble(
265267
return_predictions: bool,
266268
pynisher_context: str,
267269
logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
270+
unit_test: bool = False,
268271
) -> Tuple[
269272
List[Tuple[int, float, float, float]],
270273
int,
@@ -321,6 +324,11 @@ def fit_and_return_ensemble(
321324
Context to use for multiprocessing, can be either fork, spawn or forkserver.
322325
logger_port: int
323326
The port where the logging server is listening to.
327+
unit_test: bool
328+
Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
329+
Having this is very bad coding style, but I did not find a way to make
330+
unittest.mock work through the pynisher with all spawn contexts. If you know a
331+
better solution, please let us know by opening an issue.
324332
325333
Returns
326334
-------
@@ -343,6 +351,7 @@ def fit_and_return_ensemble(
343351
read_at_most=read_at_most,
344352
random_state=random_state,
345353
logger_port=logger_port,
354+
unit_test=unit_test,
346355
).run(
347356
end_at=end_at,
348357
iteration=iteration,
@@ -354,21 +363,22 @@ def fit_and_return_ensemble(
354363

355364
class EnsembleBuilder(object):
356365
def __init__(
357-
self,
358-
backend: Backend,
359-
dataset_name: str,
360-
task_type: int,
361-
metric: Scorer,
362-
ensemble_size: int = 10,
363-
ensemble_nbest: int = 100,
364-
max_models_on_disc: int = 100,
365-
performance_range_threshold: float = 0,
366-
seed: int = 1,
367-
precision: int = 32,
368-
memory_limit: Optional[int] = 1024,
369-
read_at_most: int = 5,
370-
random_state: Optional[Union[int, np.random.RandomState]] = None,
371-
logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
366+
self,
367+
backend: Backend,
368+
dataset_name: str,
369+
task_type: int,
370+
metric: Scorer,
371+
ensemble_size: int = 10,
372+
ensemble_nbest: int = 100,
373+
max_models_on_disc: int = 100,
374+
performance_range_threshold: float = 0,
375+
seed: int = 1,
376+
precision: int = 32,
377+
memory_limit: Optional[int] = 1024,
378+
read_at_most: int = 5,
379+
random_state: Optional[Union[int, np.random.RandomState]] = None,
380+
logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
381+
unit_test: bool = False,
372382
):
373383
"""
374384
Constructor
@@ -416,6 +426,11 @@ def __init__(
416426
read at most n new prediction files in each iteration
417427
logger_port: int
418428
port that receives logging records
429+
unit_test: bool
430+
Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
431+
Having this is very bad coding style, but I did not find a way to make
432+
unittest.mock work through the pynisher with all spawn contexts. If you know a
433+
better solution, please let us know by opening an issue.
419434
"""
420435

421436
super(EnsembleBuilder, self).__init__()
@@ -454,6 +469,7 @@ def __init__(
454469
self.memory_limit = memory_limit
455470
self.read_at_most = read_at_most
456471
self.random_state = check_random_state(random_state)
472+
self.unit_test = unit_test
457473

458474
# Setup the logger
459475
self.logger_port = logger_port
@@ -1196,6 +1212,9 @@ def fit_ensemble(self, selected_keys: list):
11961212
trained Ensemble
11971213
"""
11981214

1215+
if self.unit_test:
1216+
raise MemoryError()
1217+
11991218
predictions_train = [self.read_preds[k][Y_ENSEMBLE] for k in selected_keys]
12001219
include_num_runs = [
12011220
(

test/test_ensemble_builder/test_ensemble.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -764,9 +764,7 @@ def test_ensemble_builder_process_realrun(dask_client_single_worker, ensemble_ba
764764
assert history[0]['ensemble_test_score'] == 0.9
765765

766766

767-
@unittest.mock.patch('autosklearn.ensemble_builder.EnsembleBuilder.fit_ensemble')
768767
def test_ensemble_builder_nbest_remembered(
769-
fit_ensemble,
770768
ensemble_backend,
771769
dask_client_single_worker,
772770
):
@@ -775,8 +773,6 @@ def test_ensemble_builder_nbest_remembered(
775773
This way, we can remember it and not waste more time trying big ensemble sizes
776774
"""
777775

778-
fit_ensemble.side_effect = MemoryError
779-
780776
manager = EnsembleBuilderManager(
781777
start_time=time.time(),
782778
time_left_for_ensembles=1000,
@@ -795,15 +791,14 @@ def test_ensemble_builder_nbest_remembered(
795791
max_iterations=None,
796792
)
797793

798-
# Use fork context in the next line to allow for the mock to work
799-
manager.build_ensemble(dask_client_single_worker, 'fork')
794+
manager.build_ensemble(dask_client_single_worker, unit_test=True)
800795
future = manager.futures[0]
801796
dask.distributed.wait([future]) # wait for the ensemble process to finish
802797
assert future.result() == ([], 5, None, None, None)
803798
file_path = os.path.join(ensemble_backend.internals_directory, 'ensemble_read_preds.pkl')
804799
assert not os.path.exists(file_path)
805800

806-
manager.build_ensemble(dask_client_single_worker, 'fork')
801+
manager.build_ensemble(dask_client_single_worker, unit_test=True)
807802

808803
future = manager.futures[0]
809804
dask.distributed.wait([future]) # wait for the ensemble process to finish

0 commit comments

Comments
 (0)