1919import numpy as np
2020import pandas as pd
2121import pynisher
22- from sklearn .utils .validation import check_random_state
2322from smac .callbacks import IncorporateRunResultCallback
2423from smac .optimizer .smbo import SMBO
2524from smac .runhistory .runhistory import RunInfo , RunValue
@@ -57,7 +56,7 @@ def __init__(
5756 max_iterations : Optional [int ],
5857 read_at_most : int ,
5958 ensemble_memory_limit : Optional [int ],
60- random_state : int ,
59+ random_state : Union [ int , np . random . RandomState ] ,
6160 logger_port : int = logging .handlers .DEFAULT_TCP_LOGGING_PORT ,
6261 pynisher_context : str = 'fork' ,
6362 ):
@@ -228,7 +227,7 @@ def build_ensemble(
228227 precision = self .precision ,
229228 memory_limit = self .ensemble_memory_limit ,
230229 read_at_most = self .read_at_most ,
231- random_state = self .seed ,
230+ random_state = self .random_state ,
232231 end_at = self .start_time + self .time_left_for_ensembles ,
233232 iteration = self .iteration ,
234233 return_predictions = False ,
@@ -266,15 +265,15 @@ def fit_and_return_ensemble(
266265 max_models_on_disc : Union [float , int ],
267266 seed : int ,
268267 precision : int ,
269- memory_limit : Optional [int ],
270268 read_at_most : int ,
271- random_state : int ,
272269 end_at : float ,
273270 iteration : int ,
274271 return_predictions : bool ,
275272 pynisher_context : str ,
276273 logger_port : int = logging .handlers .DEFAULT_TCP_LOGGING_PORT ,
277274 unit_test : bool = False ,
275+ memory_limit : Optional [int ] = None ,
276+ random_state : Optional [Union [int , np .random .RandomState ]] = None ,
278277) -> Tuple [
279278 List [Tuple [int , float , float , float ]],
280279 int ,
@@ -318,8 +317,6 @@ def fit_and_return_ensemble(
318317 random seed
319318 precision: [16,32,64,128]
320319 precision of floats to read the predictions
321- memory_limit: Optional[int]
322- memory limit in mb. If ``None``, no memory limit is enforced.
323320 read_at_most: int
324321 read at most n new prediction files in each iteration
325322 end_at: float
@@ -329,13 +326,17 @@ def fit_and_return_ensemble(
329326 The current iteration
330327 pynisher_context: str
331328 Context to use for multiprocessing, can be either fork, spawn or forkserver.
332- logger_port: int
329+ logger_port: int = DEFAULT_TCP_LOGGING_PORT
333330 The port where the logging server is listening to.
334- unit_test: bool
331+ unit_test: bool = False
335332 Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
336333 Having this is very bad coding style, but I did not find a way to make
337334 unittest.mock work through the pynisher with all spawn contexts. If you know a
338335 better solution, please let us know by opening an issue.
336+ memory_limit: Optional[int] = None
337+ memory limit in mb. If ``None``, no memory limit is enforced.
338+ random_state: Optional[int | RandomState] = None
339+ A random state used for the ensemble selection process.
339340
340341 Returns
341342 -------
@@ -376,15 +377,15 @@ def __init__(
376377 task_type : int ,
377378 metric : Scorer ,
378379 ensemble_size : int = 10 ,
379- ensemble_nbest : int = 100 ,
380+ ensemble_nbest : Union [ int , float ] = 100 ,
380381 max_models_on_disc : int = 100 ,
381382 performance_range_threshold : float = 0 ,
382383 seed : int = 1 ,
383384 precision : int = 32 ,
384385 memory_limit : Optional [int ] = 1024 ,
385386 read_at_most : int = 5 ,
386- random_state : Optional [Union [int , np .random .RandomState ]] = None ,
387387 logger_port : int = logging .handlers .DEFAULT_TCP_LOGGING_PORT ,
388+ random_state : Optional [Union [int , np .random .RandomState ]] = None ,
388389 unit_test : bool = False ,
389390 ):
390391 """
@@ -400,14 +401,14 @@ def __init__(
400401 type of ML task
401402 metric: str
402403 name of metric to compute the loss of the given predictions
403- ensemble_size: int
404+ ensemble_size: int = 10
404405 maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection)
405- ensemble_nbest: int/ float
406+ ensemble_nbest: int | float = 100
406407 if int: consider only the n best prediction
407408 if float: consider only this fraction of the best models
408- Both wrt to validation predictions
409+ Both with respect to the validation predictions
409410 If performance_range_threshold > 0, might return less models
410- max_models_on_disc: int
411+ max_models_on_disc: int = 100
411412 Defines the maximum number of models that are kept in the disc.
412413 If int, it must be greater or equal than 1, and dictates the max number of
413414 models to keep.
@@ -417,23 +418,25 @@ def __init__(
417418 Models and predictions of the worst-performing models will be deleted then.
418419 If None, the feature is disabled.
419420 It defines an upper bound on the models that can be used in the ensemble.
420- performance_range_threshold: float
421+ performance_range_threshold: float = 0
421422 Keep only models that are better than:
422423 dummy + (best - dummy)*performance_range_threshold
423424 E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3
424425 Will at most return the minimum between ensemble_nbest models,
425426 and max_models_on_disc. Might return less
426- seed: int
427- random seed
428- precision: [16,32,64,128]
427+ seed: int = 1
428+ random seed that is used as part of the filename
429+ precision: int in [16,32,64,128] = 32
429430 precision of floats to read the predictions
430- memory_limit: Optional[int]
431+ memory_limit: Optional[int] = 1024
431432 memory limit in mb. If ``None``, no memory limit is enforced.
432- read_at_most: int
433+ read_at_most: int = 5
433434 read at most n new prediction files in each iteration
434- logger_port: int
435+ logger_port: int = DEFAULT_TCP_LOGGING_PORT
435436 port that receives logging records
436- unit_test: bool
437+ random_state: Optional[int | RandomState] = None
438+ An int or RandomState object used for generating the ensemble.
439+ unit_test: bool = False
437440 Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
438441 Having this is very bad coding style, but I did not find a way to make
439442 unittest.mock work through the pynisher with all spawn contexts. If you know a
@@ -475,7 +478,7 @@ def __init__(
475478 self .precision = precision
476479 self .memory_limit = memory_limit
477480 self .read_at_most = read_at_most
478- self .random_state = check_random_state ( random_state )
481+ self .random_state = random_state
479482 self .unit_test = unit_test
480483
481484 # Setup the logger
0 commit comments