Skip to content

Commit b9f0e0a

Browse files
Merge pull request #62 from ThomasMeissnerDS/update_unit_tests
Add Xgboost unit tests
2 parents 096b5a8 + de80ed3 commit b9f0e0a

File tree

3 files changed

+303
-3
lines changed

3 files changed

+303
-3
lines changed

bluecast/blueprints/custom_model_recipes.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,17 @@ def autotune(
3838
"penalty": ["l2"],
3939
"C": np.logspace(0.1, 1, 5),
4040
"class_weight": ["balanced", None],
41-
"solver": ["newton-cg", "newton-cholesky", "sag", "saga"],
41+
# solvers that support l2
42+
"solver": ["lbfgs", "newton-cg", "newton-cholesky", "sag", "saga"],
4243
},
4344
{
4445
"penalty": ["elasticnet"],
4546
"C": np.logspace(0.1, 1, 5),
4647
"class_weight": ["balanced", None],
47-
"solver": ["newton-cg", "newton-cholesky", "sag", "saga"],
48-
"l1_ratio": np.arange(0, 1, 3),
48+
# elasticnet is only supported by 'saga'
49+
"solver": ["saga"],
50+
# include endpoints 0.0, 0.5, 1.0
51+
"l1_ratio": np.linspace(0.0, 1.0, 3),
4952
},
5053
]
5154

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
from typing import Optional, Tuple
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
from bluecast.blueprints.cast import BlueCast
7+
from bluecast.config.training_config import (
8+
TrainingConfig,
9+
XgboostFinalParamConfig,
10+
XgboostTuneParamsConfig,
11+
)
12+
from bluecast.ml_modelling.xgboost import XgboostModel
13+
from bluecast.preprocessing.custom import CustomPreprocessing
14+
15+
16+
def test_bluecast_with_custom_xgboost_no_tuning():
17+
train_config = TrainingConfig()
18+
train_config.hyperparameter_tuning_rounds = 5
19+
train_config.hypertuning_cv_folds = 2
20+
train_config.autotune_model = False
21+
22+
xgboost_param_config = XgboostTuneParamsConfig()
23+
xgboost_param_config.steps_min = 2
24+
xgboost_param_config.steps_max = 100
25+
xgboost_param_config.max_depth_max = 3
26+
27+
# Ensure final params are valid for binary classification and fast
28+
xgb_final_params = XgboostFinalParamConfig()
29+
xgb_final_params.params["objective"] = "multi:softprob"
30+
xgb_final_params.params["eval_metric"] = "mlogloss"
31+
xgb_final_params.params["num_class"] = 2
32+
xgb_final_params.params["steps"] = 50
33+
34+
class MyCustomLastMilePreprocessing(CustomPreprocessing):
35+
def custom_function(self, df: pd.DataFrame) -> pd.DataFrame:
36+
df["custom_col"] = 5
37+
return df
38+
39+
def fit_transform(
40+
self, df: pd.DataFrame, target: pd.Series
41+
) -> Tuple[pd.DataFrame, pd.Series]:
42+
df = self.custom_function(df)
43+
return df, target
44+
45+
def transform(
46+
self,
47+
df: pd.DataFrame,
48+
target: Optional[pd.Series] = None,
49+
predicton_mode: bool = False,
50+
) -> Tuple[pd.DataFrame, Optional[pd.Series]]:
51+
df = self.custom_function(df)
52+
return df, target
53+
54+
bluecast = BlueCast(
55+
class_problem="binary",
56+
ml_model=XgboostModel(
57+
class_problem="binary",
58+
conf_training=train_config,
59+
conf_xgboost=xgboost_param_config,
60+
conf_params_xgboost=xgb_final_params,
61+
),
62+
conf_xgboost=xgboost_param_config,
63+
conf_training=train_config,
64+
custom_last_mile_computation=MyCustomLastMilePreprocessing(),
65+
)
66+
67+
x_train = pd.DataFrame(
68+
{
69+
"feature1": [i for i in range(20)],
70+
"feature2": [i for i in range(20)],
71+
"feature3": [i for i in range(20)],
72+
"feature4": [i for i in range(20)],
73+
"feature5": [i for i in range(20)],
74+
"feature6": [i for i in range(20)],
75+
}
76+
)
77+
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
78+
x_test = pd.DataFrame(
79+
{
80+
"feature1": [i for i in range(10)],
81+
"feature2": [i for i in range(10)],
82+
"feature3": [i for i in range(10)],
83+
"feature4": [i for i in range(10)],
84+
"feature5": [i for i in range(10)],
85+
"feature6": [i for i in range(10)],
86+
}
87+
)
88+
89+
x_train["target"] = y_train
90+
91+
bluecast.fit(x_train, "target")
92+
93+
predicted_probas, predicted_classes = bluecast.predict(x_test)
94+
_ = bluecast.predict_proba(x_test)
95+
96+
assert isinstance(predicted_probas, np.ndarray)
97+
assert isinstance(predicted_classes, np.ndarray)
98+
assert len(bluecast.experiment_tracker.experiment_id) == 0
99+
100+
101+
def test_bluecast_with_custom_xgboost_with_tuning():
102+
train_config = TrainingConfig()
103+
train_config.hyperparameter_tuning_rounds = 5
104+
train_config.hypertuning_cv_folds = 2
105+
train_config.autotune_model = True
106+
train_config.plot_hyperparameter_tuning_overview = False
107+
108+
xgboost_param_config = XgboostTuneParamsConfig()
109+
xgboost_param_config.steps_min = 2
110+
xgboost_param_config.steps_max = 100
111+
xgboost_param_config.max_depth_max = 3
112+
113+
bluecast = BlueCast(
114+
class_problem="binary",
115+
ml_model=XgboostModel(
116+
class_problem="binary",
117+
conf_training=train_config,
118+
conf_xgboost=xgboost_param_config,
119+
),
120+
conf_xgboost=xgboost_param_config,
121+
conf_training=train_config,
122+
)
123+
124+
x_train = pd.DataFrame(
125+
{
126+
"feature1": [i for i in range(20)],
127+
"feature2": [i for i in range(20)],
128+
"feature3": [i for i in range(20)],
129+
"feature4": [i for i in range(20)],
130+
"feature5": [i for i in range(20)],
131+
"feature6": [i for i in range(20)],
132+
}
133+
)
134+
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
135+
x_test = pd.DataFrame(
136+
{
137+
"feature1": [i for i in range(10)],
138+
"feature2": [i for i in range(10)],
139+
"feature3": [i for i in range(10)],
140+
"feature4": [i for i in range(10)],
141+
"feature5": [i for i in range(10)],
142+
"feature6": [i for i in range(10)],
143+
}
144+
)
145+
146+
x_train["target"] = y_train
147+
148+
bluecast.fit(x_train, "target")
149+
150+
predicted_probas, predicted_classes = bluecast.predict(x_test)
151+
_ = bluecast.predict_proba(x_test)
152+
153+
assert isinstance(predicted_probas, np.ndarray)
154+
assert isinstance(predicted_classes, np.ndarray)
155+
assert len(bluecast.experiment_tracker.experiment_id) == 5
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
from typing import Optional, Tuple
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
from bluecast.blueprints.cast_regression import BlueCastRegression
7+
from bluecast.config.training_config import (
8+
TrainingConfig,
9+
XgboostTuneParamsRegressionConfig,
10+
)
11+
from bluecast.ml_modelling.xgboost_regression import XgboostModelRegression
12+
from bluecast.preprocessing.custom import CustomPreprocessing
13+
14+
15+
def test_bluecast_regression_with_custom_xgboost_no_tuning():
16+
train_config = TrainingConfig()
17+
train_config.hyperparameter_tuning_rounds = 5
18+
train_config.hypertuning_cv_folds = 2
19+
train_config.autotune_model = False
20+
21+
xgboost_param_config = XgboostTuneParamsRegressionConfig()
22+
xgboost_param_config.steps_min = 2
23+
xgboost_param_config.steps_max = 100
24+
xgboost_param_config.max_depth_max = 3
25+
26+
class MyCustomLastMilePreprocessing(CustomPreprocessing):
27+
def custom_function(self, df: pd.DataFrame) -> pd.DataFrame:
28+
df["custom_col"] = 5
29+
return df
30+
31+
def fit_transform(
32+
self, df: pd.DataFrame, target: pd.Series
33+
) -> Tuple[pd.DataFrame, pd.Series]:
34+
df = self.custom_function(df)
35+
return df, target
36+
37+
def transform(
38+
self,
39+
df: pd.DataFrame,
40+
target: Optional[pd.Series] = None,
41+
predicton_mode: bool = False,
42+
) -> Tuple[pd.DataFrame, Optional[pd.Series]]:
43+
df = self.custom_function(df)
44+
return df, target
45+
46+
bluecast = BlueCastRegression(
47+
class_problem="regression",
48+
ml_model=XgboostModelRegression(
49+
class_problem="regression",
50+
conf_training=train_config,
51+
conf_xgboost=xgboost_param_config,
52+
),
53+
conf_xgboost=xgboost_param_config,
54+
conf_training=train_config,
55+
custom_last_mile_computation=MyCustomLastMilePreprocessing(),
56+
)
57+
58+
x_train = pd.DataFrame(
59+
{
60+
"feature1": [i for i in range(20)],
61+
"feature2": [i for i in range(20)],
62+
"feature3": [i for i in range(20)],
63+
"feature4": [i for i in range(20)],
64+
"feature5": [i for i in range(20)],
65+
"feature6": [i for i in range(20)],
66+
}
67+
)
68+
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
69+
x_test = pd.DataFrame(
70+
{
71+
"feature1": [i for i in range(10)],
72+
"feature2": [i for i in range(10)],
73+
"feature3": [i for i in range(10)],
74+
"feature4": [i for i in range(10)],
75+
"feature5": [i for i in range(10)],
76+
"feature6": [i for i in range(10)],
77+
}
78+
)
79+
80+
x_train["target"] = y_train
81+
82+
bluecast.fit(x_train, "target")
83+
84+
predicted_values = bluecast.predict(x_test)
85+
86+
assert isinstance(predicted_values, np.ndarray)
87+
assert len(bluecast.experiment_tracker.experiment_id) == 0
88+
89+
90+
def test_bluecast_regression_with_custom_xgboost_with_tuning():
91+
train_config = TrainingConfig()
92+
train_config.hyperparameter_tuning_rounds = 5
93+
train_config.hypertuning_cv_folds = 2
94+
train_config.autotune_model = True
95+
train_config.plot_hyperparameter_tuning_overview = False
96+
97+
xgboost_param_config = XgboostTuneParamsRegressionConfig()
98+
xgboost_param_config.steps_min = 2
99+
xgboost_param_config.steps_max = 100
100+
xgboost_param_config.max_depth_max = 3
101+
102+
bluecast = BlueCastRegression(
103+
class_problem="regression",
104+
ml_model=XgboostModelRegression(
105+
class_problem="regression",
106+
conf_training=train_config,
107+
conf_xgboost=xgboost_param_config,
108+
),
109+
conf_xgboost=xgboost_param_config,
110+
conf_training=train_config,
111+
)
112+
113+
x_train = pd.DataFrame(
114+
{
115+
"feature1": [i for i in range(20)],
116+
"feature2": [i for i in range(20)],
117+
"feature3": [i for i in range(20)],
118+
"feature4": [i for i in range(20)],
119+
"feature5": [i for i in range(20)],
120+
"feature6": [i for i in range(20)],
121+
}
122+
)
123+
y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
124+
x_test = pd.DataFrame(
125+
{
126+
"feature1": [i for i in range(10)],
127+
"feature2": [i for i in range(10)],
128+
"feature3": [i for i in range(10)],
129+
"feature4": [i for i in range(10)],
130+
"feature5": [i for i in range(10)],
131+
"feature6": [i for i in range(10)],
132+
}
133+
)
134+
135+
x_train["target"] = y_train
136+
137+
bluecast.fit(x_train, "target")
138+
139+
predicted_values = bluecast.predict(x_test)
140+
141+
assert isinstance(predicted_values, np.ndarray)
142+
assert len(bluecast.experiment_tracker.experiment_id) == 5

0 commit comments

Comments
 (0)