Skip to content

Commit 99d1f43

Browse files
committed
add 'FeatureScalingPipelineFunction' class
1 parent 30d34ef commit 99d1f43

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""Feature Scaling Pipeline test function."""
2+
3+
from typing import Any, Callable, Dict, List, Optional, Union
4+
5+
from sklearn.ensemble import GradientBoostingClassifier
6+
from sklearn.model_selection import cross_val_score
7+
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
8+
from sklearn.svm import SVC
9+
10+
from surfaces.modifiers import BaseModifier
11+
from surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.datasets import (
12+
DATASETS,
13+
)
14+
15+
from .._base_tabular_feature_engineering import BaseTabularFeatureEngineering
16+
17+
18+
class FeatureScalingPipelineFunction(BaseTabularFeatureEngineering):
19+
"""Feature Scaling Pipeline test function.
20+
21+
Optimizes the combination of scaling method and classification model.
22+
Different models benefit from different scaling strategies, and this
23+
function helps identify the best combination.
24+
25+
Parameters
26+
----------
27+
dataset : str, default="wine"
28+
Dataset to use for evaluation. One of: "digits", "iris", "wine", "breast_cancer".
29+
cv : int, default=5
30+
Number of cross-validation folds.
31+
objective : str, default="maximize"
32+
Either "minimize" or "maximize".
33+
modifiers : list of BaseModifier, optional
34+
List of modifiers to apply to function evaluations.
35+
36+
Examples
37+
--------
38+
>>> from surfaces.test_functions import FeatureScalingPipelineFunction
39+
>>> func = FeatureScalingPipelineFunction(dataset="wine", cv=5)
40+
>>> func.search_space
41+
{'scaler': ['standard', 'minmax', 'robust', 'none'], 'model_type': ['svm', 'gb']}
42+
>>> result = func({"scaler": "standard", "model_type": "svm"})
43+
"""
44+
45+
name = "Feature Scaling Pipeline"
46+
_name_ = "feature_scaling_pipeline"
47+
__name__ = "FeatureScalingPipelineFunction"
48+
49+
available_datasets = ["digits", "iris", "wine", "breast_cancer"]
50+
available_cv = [2, 3, 5, 10]
51+
52+
para_names = ["scaler", "model_type"]
53+
scaler_default = ["standard", "minmax", "robust", "none"]
54+
model_type_default = ["svm", "gb"]
55+
56+
def __init__(
57+
self,
58+
dataset: str = "wine",
59+
cv: int = 5,
60+
objective: str = "maximize",
61+
modifiers: Optional[List[BaseModifier]] = None,
62+
memory: bool = False,
63+
collect_data: bool = True,
64+
callbacks: Optional[Union[Callable, List[Callable]]] = None,
65+
catch_errors: Optional[Dict[type, float]] = None,
66+
use_surrogate: bool = False,
67+
):
68+
if dataset not in self.available_datasets:
69+
raise ValueError(f"Unknown dataset '{dataset}'. Available: {self.available_datasets}")
70+
71+
if cv not in self.available_cv:
72+
raise ValueError(f"Invalid cv={cv}. Available: {self.available_cv}")
73+
74+
self.dataset = dataset
75+
self.cv = cv
76+
self._dataset_loader = DATASETS[dataset]
77+
78+
super().__init__(
79+
objective=objective,
80+
modifiers=modifiers,
81+
memory=memory,
82+
collect_data=collect_data,
83+
callbacks=callbacks,
84+
catch_errors=catch_errors,
85+
use_surrogate=use_surrogate,
86+
)
87+
88+
@property
89+
def search_space(self) -> Dict[str, Any]:
90+
"""Search space for feature scaling pipeline."""
91+
return {
92+
"scaler": self.scaler_default,
93+
"model_type": self.model_type_default,
94+
}
95+
96+
def _create_objective_function(self) -> None:
97+
"""Create objective function for feature scaling pipeline."""
98+
X, y = self._dataset_loader()
99+
cv = self.cv
100+
101+
def objective_function(params: Dict[str, Any]) -> float:
102+
# Apply scaling
103+
scaler_type = params["scaler"]
104+
if scaler_type == "standard":
105+
scaler = StandardScaler()
106+
X_scaled = scaler.fit_transform(X)
107+
elif scaler_type == "minmax":
108+
scaler = MinMaxScaler()
109+
X_scaled = scaler.fit_transform(X)
110+
elif scaler_type == "robust":
111+
scaler = RobustScaler()
112+
X_scaled = scaler.fit_transform(X)
113+
elif scaler_type == "none":
114+
X_scaled = X
115+
else:
116+
raise ValueError(f"Unknown scaler: {scaler_type}")
117+
118+
# Train model on scaled features
119+
model_type = params["model_type"]
120+
if model_type == "svm":
121+
model = SVC(kernel="rbf", random_state=42)
122+
elif model_type == "gb":
123+
model = GradientBoostingClassifier(n_estimators=50, random_state=42)
124+
else:
125+
raise ValueError(f"Unknown model_type: {model_type}")
126+
127+
# Evaluate
128+
scores = cross_val_score(model, X_scaled, y, cv=cv, scoring="accuracy")
129+
return scores.mean()
130+
131+
self.pure_objective_function = objective_function

0 commit comments

Comments
 (0)