@@ -156,6 +156,7 @@ def build_ensemble(
156156 self ,
157157 dask_client : dask .distributed .Client ,
158158 pynisher_context : str = 'spawn' ,
159+ unit_test : bool = False
159160 ) -> None :
160161
161162 # The second criteria is elapsed time
@@ -227,6 +228,7 @@ def build_ensemble(
227228 priority = 100 ,
228229 pynisher_context = pynisher_context ,
229230 logger_port = self .logger_port ,
231+ unit_test = unit_test ,
230232 ))
231233
232234 logger .info (
@@ -265,6 +267,7 @@ def fit_and_return_ensemble(
265267 return_predictions : bool ,
266268 pynisher_context : str ,
267269 logger_port : int = logging .handlers .DEFAULT_TCP_LOGGING_PORT ,
270+ unit_test : bool = False ,
268271) -> Tuple [
269272 List [Tuple [int , float , float , float ]],
270273 int ,
@@ -321,6 +324,11 @@ def fit_and_return_ensemble(
321324 Context to use for multiprocessing, can be either fork, spawn or forkserver.
322325 logger_port: int
323326 The port where the logging server is listening to.
327+ unit_test: bool
328+ Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
329+ Having this is very bad coding style, but I did not find a way to make
330+ unittest.mock work through the pynisher with all spawn contexts. If you know a
331+ better solution, please let us know by opening an issue.
324332
325333 Returns
326334 -------
@@ -343,6 +351,7 @@ def fit_and_return_ensemble(
343351 read_at_most = read_at_most ,
344352 random_state = random_state ,
345353 logger_port = logger_port ,
354+ unit_test = unit_test ,
346355 ).run (
347356 end_at = end_at ,
348357 iteration = iteration ,
@@ -354,21 +363,22 @@ def fit_and_return_ensemble(
354363
355364class EnsembleBuilder (object ):
356365 def __init__ (
357- self ,
358- backend : Backend ,
359- dataset_name : str ,
360- task_type : int ,
361- metric : Scorer ,
362- ensemble_size : int = 10 ,
363- ensemble_nbest : int = 100 ,
364- max_models_on_disc : int = 100 ,
365- performance_range_threshold : float = 0 ,
366- seed : int = 1 ,
367- precision : int = 32 ,
368- memory_limit : Optional [int ] = 1024 ,
369- read_at_most : int = 5 ,
370- random_state : Optional [Union [int , np .random .RandomState ]] = None ,
371- logger_port : int = logging .handlers .DEFAULT_TCP_LOGGING_PORT ,
366+ self ,
367+ backend : Backend ,
368+ dataset_name : str ,
369+ task_type : int ,
370+ metric : Scorer ,
371+ ensemble_size : int = 10 ,
372+ ensemble_nbest : int = 100 ,
373+ max_models_on_disc : int = 100 ,
374+ performance_range_threshold : float = 0 ,
375+ seed : int = 1 ,
376+ precision : int = 32 ,
377+ memory_limit : Optional [int ] = 1024 ,
378+ read_at_most : int = 5 ,
379+ random_state : Optional [Union [int , np .random .RandomState ]] = None ,
380+ logger_port : int = logging .handlers .DEFAULT_TCP_LOGGING_PORT ,
381+ unit_test : bool = False ,
372382 ):
373383 """
374384 Constructor
@@ -416,6 +426,11 @@ def __init__(
416426 read at most n new prediction files in each iteration
417427 logger_port: int
418428 port that receives logging records
429+ unit_test: bool
430+ Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
431+ Having this is very bad coding style, but I did not find a way to make
432+ unittest.mock work through the pynisher with all spawn contexts. If you know a
433+ better solution, please let us know by opening an issue.
419434 """
420435
421436 super (EnsembleBuilder , self ).__init__ ()
@@ -454,6 +469,7 @@ def __init__(
454469 self .memory_limit = memory_limit
455470 self .read_at_most = read_at_most
456471 self .random_state = check_random_state (random_state )
472+ self .unit_test = unit_test
457473
458474 # Setup the logger
459475 self .logger_port = logger_port
@@ -1196,6 +1212,9 @@ def fit_ensemble(self, selected_keys: list):
11961212 trained Ensemble
11971213 """
11981214
1215+ if self .unit_test :
1216+ raise MemoryError ()
1217+
11991218 predictions_train = [self .read_preds [k ][Y_ENSEMBLE ] for k in selected_keys ]
12001219 include_num_runs = [
12011220 (
0 commit comments