Skip to content

Commit d4edb63

Browse files
committed
Rebase merge
1 parent c11febb commit d4edb63

File tree

3 files changed

+388
-418
lines changed

3 files changed

+388
-418
lines changed

autosklearn/automl.py

Lines changed: 63 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import (
4-
Any,
5-
Callable,
6-
Dict,
7-
Iterable,
8-
Mapping,
9-
Optional,
10-
Sequence,
11-
Tuple,
12-
Type,
13-
)
3+
from typing import Any, Callable, Iterable, Mapping, Sequence
144

155
import copy
166
import io
@@ -130,8 +120,8 @@ def _model_predict(
130120
model: Any,
131121
X: SUPPORTED_FEAT_TYPES,
132122
task: int,
133-
batch_size: Optional[int] = None,
134-
logger: Optional[PicklableClientLogger] = None,
123+
batch_size: int | None = None,
124+
logger: PicklableClientLogger | None = None,
135125
) -> np.ndarray:
136126
"""Generates the predictions from a model.
137127
@@ -213,29 +203,29 @@ def __init__(
213203
self,
214204
time_left_for_this_task: int,
215205
per_run_time_limit: int | None = None,
216-
temporary_directory: Optional[str] = None,
206+
temporary_directory: str | None = None,
217207
delete_tmp_folder_after_terminate: bool = True,
218208
initial_configurations_via_metalearning: int = 25,
219-
ensemble_class: Type[AbstractEnsemble] | None = EnsembleSelection,
220-
ensemble_kwargs: Dict[str, Any] | None = None,
209+
ensemble_class: type[AbstractEnsemble] | None = EnsembleSelection,
210+
ensemble_kwargs: dict[str, Any] | None = None,
221211
ensemble_nbest: int = 1,
222212
max_models_on_disc: int = 1,
223213
seed: int = 1,
224214
memory_limit: int | None = 3072,
225-
metadata_directory: Optional[str] = None,
226-
include: Optional[dict[str, list[str]]] = None,
227-
exclude: Optional[dict[str, list[str]]] = None,
215+
metadata_directory: str | None = None,
216+
include: dict[str, list[str]] | None = None,
217+
exclude: dict[str, list[str]] | None = None,
228218
resampling_strategy: str | Any = "holdout-iterative-fit",
229219
resampling_strategy_arguments: Mapping[str, Any] = None,
230-
n_jobs: Optional[int] = None,
231-
dask_client: Optional[Client] = None,
220+
n_jobs: int | None = None,
221+
dask_client: Client | None = None,
232222
precision: Literal[16, 32, 64] = 32,
233223
disable_evaluator_output: bool | Iterable[str] = False,
234224
get_smac_object_callback: Callable | None = None,
235225
smac_scenario_args: Mapping[str, Any] | None = None,
236-
logging_config: Optional[Mapping] = None,
226+
logging_config: Mapping | None = None,
237227
metrics: Sequence[Scorer] | None = None,
238-
scoring_functions: Optional[Sequence[Scorer]] = None,
228+
scoring_functions: Sequence[Scorer] | None = None,
239229
get_trials_callback: SMACCallback | None = None,
240230
dataset_compression: bool | Mapping[str, Any] = True,
241231
allow_string_features: bool = True,
@@ -244,7 +234,7 @@ def __init__(
244234

245235
if isinstance(disable_evaluator_output, Iterable):
246236
disable_evaluator_output = list(disable_evaluator_output) # Incase iterator
247-
allowed = set(["model", "cv_model", "y_optimization", "y_test"])
237+
allowed = {"model", "cv_model", "y_optimization", "y_test", "y_valid"}
248238
unknown = allowed - set(disable_evaluator_output)
249239
if any(unknown):
250240
raise ValueError(
@@ -253,7 +243,7 @@ def __init__(
253243
)
254244

255245
# Validate dataset_compression and set its values
256-
self._dataset_compression: Optional[DatasetCompressionSpec]
246+
self._dataset_compression: DatasetCompressionSpec | None
257247
if isinstance(dataset_compression, bool):
258248
if dataset_compression is True:
259249
self._dataset_compression = default_dataset_compression_arg
@@ -327,17 +317,17 @@ def __init__(
327317
self._datamanager = None
328318
self._dataset_name = None
329319
self._feat_type = None
330-
self._logger: Optional[PicklableClientLogger] = None
320+
self._logger: PicklableClientLogger | None = None
331321
self._task = None
332322
self._label_num = None
333323
self._parser = None
334324
self._can_predict = False
335325
self._read_at_most = None
336326
self._max_ensemble_build_iterations = None
337-
self.models_: Optional[dict] = None
338-
self.cv_models_: Optional[dict] = None
327+
self.models_: dict | None = None
328+
self.cv_models_: dict | None = None
339329
self.ensemble_ = None
340-
self.InputValidator: Optional[InputValidator] = None
330+
self.InputValidator: InputValidator | None = None
341331
self.configuration_space = None
342332

343333
# The ensemble performance history through time
@@ -502,11 +492,11 @@ def fit(
502492
self,
503493
X: SUPPORTED_FEAT_TYPES,
504494
y: SUPPORTED_TARGET_TYPES,
505-
task: Optional[int] = None,
506-
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
507-
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
508-
feat_type: Optional[list[str]] = None,
509-
dataset_name: Optional[str] = None,
495+
task: int | None = None,
496+
X_test: SUPPORTED_FEAT_TYPES | None = None,
497+
y_test: SUPPORTED_TARGET_TYPES | None = None,
498+
feat_type: list[str] | None = None,
499+
dataset_name: str | None = None,
510500
only_return_configuration_space: bool = False,
511501
load_models: bool = True,
512502
is_classification: bool = False,
@@ -1224,13 +1214,13 @@ def fit_pipeline(
12241214
y: SUPPORTED_TARGET_TYPES | spmatrix,
12251215
is_classification: bool,
12261216
config: Configuration | dict[str, str | float | int],
1227-
task: Optional[int] = None,
1228-
dataset_name: Optional[str] = None,
1229-
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
1230-
y_test: Optional[SUPPORTED_TARGET_TYPES | spmatrix] = None,
1231-
feat_type: Optional[list[str]] = None,
1217+
task: int | None = None,
1218+
dataset_name: str | None = None,
1219+
X_test: SUPPORTED_FEAT_TYPES | None = None,
1220+
y_test: SUPPORTED_TARGET_TYPES | spmatrix | None = None,
1221+
feat_type: list[str] | None = None,
12321222
**kwargs: dict,
1233-
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
1223+
) -> tuple[BasePipeline | None, RunInfo, RunValue]:
12341224
"""Fits and individual pipeline configuration and returns
12351225
the result to the user.
12361226
@@ -1495,12 +1485,12 @@ def predict(self, X, batch_size=None, n_jobs=1):
14951485
def fit_ensemble(
14961486
self,
14971487
y: SUPPORTED_TARGET_TYPES,
1498-
task: Optional[int] = None,
1488+
task: int | None = None,
14991489
precision: Literal[16, 32, 64] = 32,
1500-
dataset_name: Optional[str] = None,
1501-
ensemble_nbest: Optional[int] = None,
1502-
ensemble_class: Optional[AbstractEnsemble] = EnsembleSelection,
1503-
ensemble_kwargs: Optional[Dict[str, Any]] = None,
1490+
dataset_name: str | None = None,
1491+
ensemble_nbest: int | None = None,
1492+
ensemble_class: type[AbstractEnsemble] | None = EnsembleSelection,
1493+
ensemble_kwargs: dict[str, Any] | None = None,
15041494
metrics: Scorer | Sequence[Scorer] | None = None,
15051495
):
15061496
check_is_fitted(self)
@@ -1966,7 +1956,7 @@ def cv_results_(self):
19661956
metric_dict[metric.name].append(metric_value)
19671957
metric_mask[metric.name].append(mask_value)
19681958

1969-
optimization_metric_names = set(m.name for m in self._metrics)
1959+
optimization_metric_names = {m.name for m in self._metrics}
19701960
for metric in self._scoring_functions:
19711961
if metric.name in optimization_metric_names:
19721962
continue
@@ -2046,27 +2036,25 @@ def sprint_statistics(self) -> str:
20462036
num_runs = len(cv_results["status"])
20472037
sio.write(" Number of target algorithm runs: %d\n" % num_runs)
20482038
num_success = sum(
2049-
[
2050-
s in ["Success", "Success (but do not advance to higher budget)"]
2051-
for s in cv_results["status"]
2052-
]
2039+
s in ["Success", "Success (but do not advance to higher budget)"]
2040+
for s in cv_results["status"]
20532041
)
20542042
sio.write(" Number of successful target algorithm runs: %d\n" % num_success)
2055-
num_crash = sum([s == "Crash" for s in cv_results["status"]])
2043+
num_crash = sum(s == "Crash" for s in cv_results["status"])
20562044
sio.write(" Number of crashed target algorithm runs: %d\n" % num_crash)
2057-
num_timeout = sum([s == "Timeout" for s in cv_results["status"]])
2045+
num_timeout = sum(s == "Timeout" for s in cv_results["status"])
20582046
sio.write(
20592047
" Number of target algorithms that exceeded the time "
20602048
"limit: %d\n" % num_timeout
20612049
)
2062-
num_memout = sum([s == "Memout" for s in cv_results["status"]])
2050+
num_memout = sum(s == "Memout" for s in cv_results["status"])
20632051
sio.write(
20642052
" Number of target algorithms that exceeded the memory "
20652053
"limit: %d\n" % num_memout
20662054
)
20672055
return sio.getvalue()
20682056

2069-
def get_models_with_weights(self) -> list[Tuple[float, BasePipeline]]:
2057+
def get_models_with_weights(self) -> list[tuple[float, BasePipeline]]:
20702058
check_is_fitted(self)
20712059
if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
20722060
self._load_models()
@@ -2246,9 +2234,9 @@ def _create_search_space(
22462234
tmp_dir: str,
22472235
backend: Backend,
22482236
datamanager: XYDataManager,
2249-
include: Optional[Mapping[str, list[str]]] = None,
2250-
exclude: Optional[Mapping[str, list[str]]] = None,
2251-
) -> Tuple[ConfigurationSpace, str]:
2237+
include: Mapping[str, list[str]] | None = None,
2238+
exclude: Mapping[str, list[str]] | None = None,
2239+
) -> tuple[ConfigurationSpace, str]:
22522240
configspace_path = os.path.join(tmp_dir, "space.json")
22532241
configuration_space = pipeline.get_configuration_space(
22542242
datamanager.info,
@@ -2297,8 +2285,8 @@ def fit(
22972285
y: SUPPORTED_TARGET_TYPES,
22982286
X_test: SUPPORTED_FEAT_TYPES | None = None,
22992287
y_test: SUPPORTED_TARGET_TYPES | None = None,
2300-
feat_type: Optional[list[str]] = None,
2301-
dataset_name: Optional[str] = None,
2288+
feat_type: list[str] | None = None,
2289+
dataset_name: str | None = None,
23022290
only_return_configuration_space: bool = False,
23032291
load_models: bool = True,
23042292
) -> AutoMLClassifier:
@@ -2319,12 +2307,12 @@ def fit_pipeline(
23192307
X: SUPPORTED_FEAT_TYPES,
23202308
y: SUPPORTED_TARGET_TYPES | spmatrix,
23212309
config: Configuration | dict[str, str | float | int],
2322-
dataset_name: Optional[str] = None,
2323-
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
2324-
y_test: Optional[SUPPORTED_TARGET_TYPES | spmatrix] = None,
2325-
feat_type: Optional[list[str]] = None,
2310+
dataset_name: str | None = None,
2311+
X_test: SUPPORTED_FEAT_TYPES | None = None,
2312+
y_test: SUPPORTED_TARGET_TYPES | spmatrix | None = None,
2313+
feat_type: list[str] | None = None,
23262314
**kwargs,
2327-
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
2315+
) -> tuple[BasePipeline | None, RunInfo, RunValue]:
23282316
return super().fit_pipeline(
23292317
X=X,
23302318
y=y,
@@ -2340,7 +2328,7 @@ def fit_pipeline(
23402328
def predict(
23412329
self,
23422330
X: SUPPORTED_FEAT_TYPES,
2343-
batch_size: Optional[int] = None,
2331+
batch_size: int | None = None,
23442332
n_jobs: int = 1,
23452333
) -> np.ndarray:
23462334
check_is_fitted(self)
@@ -2359,7 +2347,7 @@ def predict(
23592347
def predict_proba(
23602348
self,
23612349
X: SUPPORTED_FEAT_TYPES,
2362-
batch_size: Optional[int] = None,
2350+
batch_size: int | None = None,
23632351
n_jobs: int = 1,
23642352
) -> np.ndarray:
23652353
return super().predict(X, batch_size=batch_size, n_jobs=n_jobs)
@@ -2385,10 +2373,10 @@ def fit(
23852373
self,
23862374
X: SUPPORTED_FEAT_TYPES,
23872375
y: SUPPORTED_TARGET_TYPES | spmatrix,
2388-
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
2389-
y_test: Optional[SUPPORTED_TARGET_TYPES | spmatrix] = None,
2390-
feat_type: Optional[list[str]] = None,
2391-
dataset_name: Optional[str] = None,
2376+
X_test: SUPPORTED_FEAT_TYPES | None = None,
2377+
y_test: SUPPORTED_TARGET_TYPES | spmatrix | None = None,
2378+
feat_type: list[str] | None = None,
2379+
dataset_name: str | None = None,
23922380
only_return_configuration_space: bool = False,
23932381
load_models: bool = True,
23942382
) -> AutoMLRegressor:
@@ -2409,12 +2397,12 @@ def fit_pipeline(
24092397
X: SUPPORTED_FEAT_TYPES,
24102398
y: SUPPORTED_TARGET_TYPES | spmatrix,
24112399
config: Configuration | dict[str, str | float | int],
2412-
dataset_name: Optional[str] = None,
2413-
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
2414-
y_test: Optional[SUPPORTED_TARGET_TYPES | spmatrix] = None,
2415-
feat_type: Optional[list[str]] = None,
2400+
dataset_name: str | None = None,
2401+
X_test: SUPPORTED_FEAT_TYPES | None = None,
2402+
y_test: SUPPORTED_TARGET_TYPES | spmatrix | None = None,
2403+
feat_type: list[str] | None = None,
24162404
**kwargs: dict,
2417-
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
2405+
) -> tuple[BasePipeline | None, RunInfo, RunValue]:
24182406
return super().fit_pipeline(
24192407
X=X,
24202408
y=y,

0 commit comments

Comments
 (0)