Skip to content

Commit b53423c

Browse files
Fix tests
1 parent 8827c66 commit b53423c

File tree

9 files changed

+53
-54
lines changed

9 files changed

+53
-54
lines changed

bluecast/ml_modelling/base_classes.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,13 +284,15 @@ def concat_prepare_full_train_datasets(
284284

285285
return x_train, y_train
286286

287-
def get_early_stopping_callback(self) -> Optional[List[xgb.callback.EarlyStopping]]:
287+
def get_early_stopping_callback(
288+
self, data_name: str = "test"
289+
) -> Optional[List[xgb.callback.EarlyStopping]]:
288290
"""Create early stopping callback if configured."""
289291
if self.conf_training.early_stopping_rounds:
290292
early_stop = xgb.callback.EarlyStopping(
291293
rounds=self.conf_training.early_stopping_rounds,
292294
metric_name=self.conf_xgboost.xgboost_eval_metric,
293-
data_name="test",
295+
data_name=data_name,
294296
save_best=self.conf_params_xgboost.params["booster"] != "gblinear",
295297
)
296298
callbacks = [early_stop]

bluecast/ml_modelling/xgboost.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,10 @@ def fit(
101101
d_train, d_test = self._create_d_matrices(x_train, y_train, x_test, y_test)
102102
if x_test.empty:
103103
eval_set = [(d_train, "train")]
104+
eval_name = "train"
104105
else:
105106
eval_set = [(d_test, "test")]
107+
eval_name = "test"
106108

107109
steps = self.conf_params_xgboost.params.pop("steps", 300)
108110

@@ -113,7 +115,7 @@ def fit(
113115
num_boost_round=steps,
114116
evals=eval_set,
115117
verbose_eval=self.conf_xgboost.verbosity_during_final_model_training,
116-
callbacks=self.get_early_stopping_callback(),
118+
callbacks=self.get_early_stopping_callback(data_name=eval_name),
117119
)
118120
elif self.conf_xgboost:
119121
self.model = xgb.train(
@@ -123,7 +125,7 @@ def fit(
123125
early_stopping_rounds=self.conf_training.early_stopping_rounds,
124126
evals=eval_set,
125127
verbose_eval=self.conf_xgboost.verbosity_during_final_model_training,
126-
callbacks=self.get_early_stopping_callback(),
128+
callbacks=self.get_early_stopping_callback(data_name=eval_name),
127129
)
128130
logging.info("Finished training")
129131
return self.model

bluecast/ml_modelling/xgboost_regression.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,10 @@ def fit(
112112
d_train, d_test = self._create_d_matrices(x_train, y_train, x_test, y_test)
113113
if x_test.empty:
114114
eval_set = [(d_train, "train")]
115+
eval_name = "train"
115116
else:
116117
eval_set = [(d_test, "test")]
118+
eval_name = "test"
117119

118120
steps = self.conf_params_xgboost.params.pop("steps", 300)
119121

@@ -125,7 +127,7 @@ def fit(
125127
early_stopping_rounds=self.conf_training.early_stopping_rounds,
126128
evals=eval_set,
127129
verbose_eval=self.conf_xgboost.verbosity_during_final_model_training,
128-
callbacks=self.get_early_stopping_callback(),
130+
callbacks=self.get_early_stopping_callback(data_name=eval_name),
129131
)
130132
elif self.conf_xgboost:
131133
self.model = xgb.train(
@@ -135,7 +137,7 @@ def fit(
135137
early_stopping_rounds=self.conf_training.early_stopping_rounds,
136138
evals=eval_set,
137139
verbose_eval=self.conf_xgboost.verbosity_during_final_model_training,
138-
callbacks=self.get_early_stopping_callback(),
140+
callbacks=self.get_early_stopping_callback(data_name=eval_name),
139141
)
140142
logging.info("Finished training")
141143
return self.model

bluecast/tests/conftest.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@
99

1010
from bluecast.blueprints.cast import BlueCast
1111
from bluecast.blueprints.cast_regression import BlueCastRegression
12-
from bluecast.config.training_config import (
13-
CatboostTuneParamsConfig,
14-
CatboostTuneParamsRegressionConfig,
15-
TrainingConfig,
16-
)
12+
from bluecast.config.training_config import TrainingConfig
1713
from bluecast.tests.make_data.create_data import (
1814
create_synthetic_dataframe,
1915
create_synthetic_dataframe_regression,

bluecast/tests/test_conformal_prediction_evaluation.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
def test_prediction_set_coverage():
1818
X, y = make_classification(
19-
n_samples=1000, n_features=5, random_state=42, n_classes=2
19+
n_samples=3000, n_features=5, random_state=42, n_classes=2
2020
)
2121
X_train, X_calibrate, y_train, y_calibrate = train_test_split(
2222
X, y, test_size=0.2, random_state=42
@@ -36,10 +36,8 @@ def test_prediction_set_coverage():
3636

3737
# Create a custom training config and adjust general training parameters
3838
train_config = TrainingConfig()
39-
train_config.hyperparameter_tuning_rounds = 10
40-
train_config.autotune_model = (
41-
False # we want to run just normal training, no hyperparameter tuning
42-
)
39+
train_config.hyperparameter_tuning_rounds = 2
40+
train_config.autotune_model = False
4341

4442
automl = BlueCast(
4543
class_problem="binary",

bluecast/tests/test_error_paths.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
import pytest
1010

1111
from bluecast.blueprints.cast import BlueCast
12-
from bluecast.blueprints.cast_cv import BlueCastCV
1312
from bluecast.blueprints.cast_regression import BlueCastRegression
14-
from bluecast.config.training_config import TrainingConfig
1513

1614

1715
class TestPredictBeforeFit:

bluecast/tests/test_preprocessing_recipes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ def test_initialization(preprocessing_instance):
4545
def test_fit_transform(sample_data, preprocessing_instance, monkeypatch):
4646
df, target = sample_data
4747

48-
# Mock the remove_correlated_columns function
48+
# Mock the remove_correlated_columns function at the usage site
4949
monkeypatch.setattr(
50-
"bluecast.preprocessing.remove_collinearity.remove_correlated_columns",
50+
"bluecast.blueprints.preprocessing_recipes.remove_correlated_columns",
5151
mock_remove_correlated_columns,
5252
)
5353

@@ -71,7 +71,7 @@ def test_transform(sample_data, preprocessing_instance, monkeypatch):
7171

7272
# Fit-transform first to simulate the normal flow
7373
monkeypatch.setattr(
74-
"bluecast.preprocessing.remove_collinearity.remove_correlated_columns",
74+
"bluecast.blueprints.preprocessing_recipes.remove_correlated_columns",
7575
mock_remove_correlated_columns,
7676
)
7777
preprocessing_instance.fit_transform(df, target)

bluecast/tests/test_remove_collinearity.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,71 +3,73 @@
33
from bluecast.preprocessing.remove_collinearity import remove_correlated_columns
44

55

6-
def test_remove_correlated_columns_high_correlation():
7-
# Create a DataFrame with high correlation between columns
6+
def test_remove_correlated_columns_positive_correlation():
87
data = {
98
"A": [1, 2, 3, 4, 5],
10-
"B": [2, 4, 6, 8, 10], # B is perfectly correlated with A
11-
"C": [5, 4, 3, 2, 1], # C is not correlated with A or B
9+
"B": [2, 4, 6, 8, 10], # B is perfectly positively correlated with A
10+
"C": [1, 3, 2, 5, 4], # C has low correlation with A
1211
}
1312
df = pd.DataFrame(data)
1413

1514
result_df = remove_correlated_columns(df, threshold=0.9)
1615

17-
# B should be removed because it's highly correlated with A
18-
expected_df = pd.DataFrame({"A": [1, 2, 3, 4, 5], "C": [5, 4, 3, 2, 1]})
16+
assert "A" in result_df.columns
17+
assert "B" not in result_df.columns
18+
assert "C" in result_df.columns
1919

20-
pd.testing.assert_frame_equal(result_df, expected_df)
2120

22-
23-
def test_remove_correlated_columns_no_removal():
24-
# Create a DataFrame with no high correlations
21+
def test_remove_correlated_columns_negative_correlation():
2522
data = {
2623
"A": [1, 2, 3, 4, 5],
27-
"B": [2, 3, 4, 5, 6], # B is not perfectly correlated with A
28-
"C": [5, 4, 3, 2, 1], # C is not correlated with A or B
24+
"B": [5, 4, 3, 2, 1], # B is perfectly negatively correlated with A
25+
"C": [1, 3, 2, 5, 4], # C has low correlation
2926
}
3027
df = pd.DataFrame(data)
3128

3229
result_df = remove_correlated_columns(df, threshold=0.9)
3330

34-
# No columns should be removed
35-
pd.testing.assert_frame_equal(result_df, df)
31+
assert "A" in result_df.columns
32+
assert "B" not in result_df.columns, "Negative correlation should also be caught"
33+
assert "C" in result_df.columns
3634

3735

38-
def test_remove_correlated_columns_no_correlation():
39-
# Create a DataFrame where no columns are correlated above the threshold
36+
def test_remove_correlated_columns_no_removal():
4037
data = {
4138
"A": [1, 2, 3, 4, 5],
42-
"B": [2, 3, 4, 5, 6],
43-
"C": [5, 4, 3, 2, 1],
44-
"D": [1, 2, 1, 2, 1],
39+
"B": [1, 3, 2, 5, 4], # Low correlation with A
40+
"C": [3, 1, 4, 2, 5], # Low correlation with A and B
4541
}
4642
df = pd.DataFrame(data)
4743

4844
result_df = remove_correlated_columns(df, threshold=0.9)
4945

50-
# Since no columns are correlated above the threshold, the original DataFrame should be returned
51-
pd.testing.assert_frame_equal(result_df, df)
46+
assert list(result_df.columns) == ["A", "B", "C"]
5247

5348

54-
def test_remove_correlated_columns_different_threshold():
55-
# Create a DataFrame with some correlation
49+
def test_remove_correlated_columns_does_not_mutate_input():
5650
data = {
5751
"A": [1, 2, 3, 4, 5],
58-
"B": [2, 4, 6, 8, 10], # B is perfectly correlated with A
59-
"C": [5, 5, 5, 5, 5], # C is constant, should have no correlation
52+
"B": [2, 4, 6, 8, 10],
53+
"C": [1, 3, 2, 5, 4],
6054
}
6155
df = pd.DataFrame(data)
56+
original_cols = list(df.columns)
57+
58+
remove_correlated_columns(df, threshold=0.9)
6259

63-
# Use a higher threshold, so no columns should be removed
64-
result_df = remove_correlated_columns(df, threshold=0.95)
60+
assert list(df.columns) == original_cols, "Original DataFrame should not be mutated"
6561

66-
pd.testing.assert_frame_equal(result_df, df)
6762

68-
# Use a lower threshold, so column B should be removed
69-
result_df = remove_correlated_columns(df, threshold=0.8)
63+
def test_remove_correlated_columns_different_threshold():
64+
data = {
65+
"A": [1, 2, 3, 4, 5],
66+
"B": [2, 4, 6, 8, 10], # Perfectly correlated with A
67+
"C": [1, 3, 2, 5, 4], # Low correlation
68+
}
69+
df = pd.DataFrame(data)
7070

71-
expected_df = pd.DataFrame({"A": [1, 2, 3, 4, 5], "C": [5, 5, 5, 5, 5]})
71+
result_high = remove_correlated_columns(df, threshold=1.01)
72+
assert len(result_high.columns) == 3, "No columns removed at threshold > 1.0"
7273

73-
pd.testing.assert_frame_equal(result_df, expected_df)
74+
result_low = remove_correlated_columns(df, threshold=0.8)
75+
assert "B" not in result_low.columns

bluecast/tests/test_save_load_roundtrip.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import tempfile
55

66
import numpy as np
7-
import pytest
87

98
from bluecast.general_utils.general_utils import load_for_production, save_to_production
109

0 commit comments

Comments
 (0)