Merge pull request #62 from ThomasMeissnerDS/update_unit_tests

ThomasMeissnerDS · web-flow · commit b9f0e0ae746e · 2025-08-20T15:24:28.000+02:00
Add Xgboost unit tests
diff --git a/bluecast/blueprints/custom_model_recipes.py b/bluecast/blueprints/custom_model_recipes.py
@@ -38,14 +38,17 @@ def autotune(
                 "penalty": ["l2"],
                 "C": np.logspace(0.1, 1, 5),
                 "class_weight": ["balanced", None],
-                "solver": ["newton-cg", "newton-cholesky", "sag", "saga"],
+                # solvers that support l2
+                "solver": ["lbfgs", "newton-cg", "newton-cholesky", "sag", "saga"],
             },
             {
                 "penalty": ["elasticnet"],
                 "C": np.logspace(0.1, 1, 5),
                 "class_weight": ["balanced", None],
-                "solver": ["newton-cg", "newton-cholesky", "sag", "saga"],
-                "l1_ratio": np.arange(0, 1, 3),
+                # elasticnet is only supported by 'saga'
+                "solver": ["saga"],
+                # include endpoints 0.0, 0.5, 1.0
+                "l1_ratio": np.linspace(0.0, 1.0, 3),
             },
         ]
 
diff --git a/bluecast/tests/test_xgboost_custom.py b/bluecast/tests/test_xgboost_custom.py
@@ -0,0 +1,155 @@
+from typing import Optional, Tuple
+
+import numpy as np
+import pandas as pd
+
+from bluecast.blueprints.cast import BlueCast
+from bluecast.config.training_config import (
+    TrainingConfig,
+    XgboostFinalParamConfig,
+    XgboostTuneParamsConfig,
+)
+from bluecast.ml_modelling.xgboost import XgboostModel
+from bluecast.preprocessing.custom import CustomPreprocessing
+
+
+def test_bluecast_with_custom_xgboost_no_tuning():
+    train_config = TrainingConfig()
+    train_config.hyperparameter_tuning_rounds = 5
+    train_config.hypertuning_cv_folds = 2
+    train_config.autotune_model = False
+
+    xgboost_param_config = XgboostTuneParamsConfig()
+    xgboost_param_config.steps_min = 2
+    xgboost_param_config.steps_max = 100
+    xgboost_param_config.max_depth_max = 3
+
+    # Ensure final params are valid for binary classification and fast
+    xgb_final_params = XgboostFinalParamConfig()
+    xgb_final_params.params["objective"] = "multi:softprob"
+    xgb_final_params.params["eval_metric"] = "mlogloss"
+    xgb_final_params.params["num_class"] = 2
+    xgb_final_params.params["steps"] = 50
+
+    class MyCustomLastMilePreprocessing(CustomPreprocessing):
+        def custom_function(self, df: pd.DataFrame) -> pd.DataFrame:
+            df["custom_col"] = 5
+            return df
+
+        def fit_transform(
+            self, df: pd.DataFrame, target: pd.Series
+        ) -> Tuple[pd.DataFrame, pd.Series]:
+            df = self.custom_function(df)
+            return df, target
+
+        def transform(
+            self,
+            df: pd.DataFrame,
+            target: Optional[pd.Series] = None,
+            predicton_mode: bool = False,
+        ) -> Tuple[pd.DataFrame, Optional[pd.Series]]:
+            df = self.custom_function(df)
+            return df, target
+
+    bluecast = BlueCast(
+        class_problem="binary",
+        ml_model=XgboostModel(
+            class_problem="binary",
+            conf_training=train_config,
+            conf_xgboost=xgboost_param_config,
+            conf_params_xgboost=xgb_final_params,
+        ),
+        conf_xgboost=xgboost_param_config,
+        conf_training=train_config,
+        custom_last_mile_computation=MyCustomLastMilePreprocessing(),
+    )
+
+    x_train = pd.DataFrame(
+        {
+            "feature1": [i for i in range(20)],
+            "feature2": [i for i in range(20)],
+            "feature3": [i for i in range(20)],
+            "feature4": [i for i in range(20)],
+            "feature5": [i for i in range(20)],
+            "feature6": [i for i in range(20)],
+        }
+    )
+    y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    x_test = pd.DataFrame(
+        {
+            "feature1": [i for i in range(10)],
+            "feature2": [i for i in range(10)],
+            "feature3": [i for i in range(10)],
+            "feature4": [i for i in range(10)],
+            "feature5": [i for i in range(10)],
+            "feature6": [i for i in range(10)],
+        }
+    )
+
+    x_train["target"] = y_train
+
+    bluecast.fit(x_train, "target")
+
+    predicted_probas, predicted_classes = bluecast.predict(x_test)
+    _ = bluecast.predict_proba(x_test)
+
+    assert isinstance(predicted_probas, np.ndarray)
+    assert isinstance(predicted_classes, np.ndarray)
+    assert len(bluecast.experiment_tracker.experiment_id) == 0
+
+
+def test_bluecast_with_custom_xgboost_with_tuning():
+    train_config = TrainingConfig()
+    train_config.hyperparameter_tuning_rounds = 5
+    train_config.hypertuning_cv_folds = 2
+    train_config.autotune_model = True
+    train_config.plot_hyperparameter_tuning_overview = False
+
+    xgboost_param_config = XgboostTuneParamsConfig()
+    xgboost_param_config.steps_min = 2
+    xgboost_param_config.steps_max = 100
+    xgboost_param_config.max_depth_max = 3
+
+    bluecast = BlueCast(
+        class_problem="binary",
+        ml_model=XgboostModel(
+            class_problem="binary",
+            conf_training=train_config,
+            conf_xgboost=xgboost_param_config,
+        ),
+        conf_xgboost=xgboost_param_config,
+        conf_training=train_config,
+    )
+
+    x_train = pd.DataFrame(
+        {
+            "feature1": [i for i in range(20)],
+            "feature2": [i for i in range(20)],
+            "feature3": [i for i in range(20)],
+            "feature4": [i for i in range(20)],
+            "feature5": [i for i in range(20)],
+            "feature6": [i for i in range(20)],
+        }
+    )
+    y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    x_test = pd.DataFrame(
+        {
+            "feature1": [i for i in range(10)],
+            "feature2": [i for i in range(10)],
+            "feature3": [i for i in range(10)],
+            "feature4": [i for i in range(10)],
+            "feature5": [i for i in range(10)],
+            "feature6": [i for i in range(10)],
+        }
+    )
+
+    x_train["target"] = y_train
+
+    bluecast.fit(x_train, "target")
+
+    predicted_probas, predicted_classes = bluecast.predict(x_test)
+    _ = bluecast.predict_proba(x_test)
+
+    assert isinstance(predicted_probas, np.ndarray)
+    assert isinstance(predicted_classes, np.ndarray)
+    assert len(bluecast.experiment_tracker.experiment_id) == 5
diff --git a/bluecast/tests/test_xgboost_regression_custom.py b/bluecast/tests/test_xgboost_regression_custom.py
@@ -0,0 +1,142 @@
+from typing import Optional, Tuple
+
+import numpy as np
+import pandas as pd
+
+from bluecast.blueprints.cast_regression import BlueCastRegression
+from bluecast.config.training_config import (
+    TrainingConfig,
+    XgboostTuneParamsRegressionConfig,
+)
+from bluecast.ml_modelling.xgboost_regression import XgboostModelRegression
+from bluecast.preprocessing.custom import CustomPreprocessing
+
+
+def test_bluecast_regression_with_custom_xgboost_no_tuning():
+    train_config = TrainingConfig()
+    train_config.hyperparameter_tuning_rounds = 5
+    train_config.hypertuning_cv_folds = 2
+    train_config.autotune_model = False
+
+    xgboost_param_config = XgboostTuneParamsRegressionConfig()
+    xgboost_param_config.steps_min = 2
+    xgboost_param_config.steps_max = 100
+    xgboost_param_config.max_depth_max = 3
+
+    class MyCustomLastMilePreprocessing(CustomPreprocessing):
+        def custom_function(self, df: pd.DataFrame) -> pd.DataFrame:
+            df["custom_col"] = 5
+            return df
+
+        def fit_transform(
+            self, df: pd.DataFrame, target: pd.Series
+        ) -> Tuple[pd.DataFrame, pd.Series]:
+            df = self.custom_function(df)
+            return df, target
+
+        def transform(
+            self,
+            df: pd.DataFrame,
+            target: Optional[pd.Series] = None,
+            predicton_mode: bool = False,
+        ) -> Tuple[pd.DataFrame, Optional[pd.Series]]:
+            df = self.custom_function(df)
+            return df, target
+
+    bluecast = BlueCastRegression(
+        class_problem="regression",
+        ml_model=XgboostModelRegression(
+            class_problem="regression",
+            conf_training=train_config,
+            conf_xgboost=xgboost_param_config,
+        ),
+        conf_xgboost=xgboost_param_config,
+        conf_training=train_config,
+        custom_last_mile_computation=MyCustomLastMilePreprocessing(),
+    )
+
+    x_train = pd.DataFrame(
+        {
+            "feature1": [i for i in range(20)],
+            "feature2": [i for i in range(20)],
+            "feature3": [i for i in range(20)],
+            "feature4": [i for i in range(20)],
+            "feature5": [i for i in range(20)],
+            "feature6": [i for i in range(20)],
+        }
+    )
+    y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    x_test = pd.DataFrame(
+        {
+            "feature1": [i for i in range(10)],
+            "feature2": [i for i in range(10)],
+            "feature3": [i for i in range(10)],
+            "feature4": [i for i in range(10)],
+            "feature5": [i for i in range(10)],
+            "feature6": [i for i in range(10)],
+        }
+    )
+
+    x_train["target"] = y_train
+
+    bluecast.fit(x_train, "target")
+
+    predicted_values = bluecast.predict(x_test)
+
+    assert isinstance(predicted_values, np.ndarray)
+    assert len(bluecast.experiment_tracker.experiment_id) == 0
+
+
+def test_bluecast_regression_with_custom_xgboost_with_tuning():
+    train_config = TrainingConfig()
+    train_config.hyperparameter_tuning_rounds = 5
+    train_config.hypertuning_cv_folds = 2
+    train_config.autotune_model = True
+    train_config.plot_hyperparameter_tuning_overview = False
+
+    xgboost_param_config = XgboostTuneParamsRegressionConfig()
+    xgboost_param_config.steps_min = 2
+    xgboost_param_config.steps_max = 100
+    xgboost_param_config.max_depth_max = 3
+
+    bluecast = BlueCastRegression(
+        class_problem="regression",
+        ml_model=XgboostModelRegression(
+            class_problem="regression",
+            conf_training=train_config,
+            conf_xgboost=xgboost_param_config,
+        ),
+        conf_xgboost=xgboost_param_config,
+        conf_training=train_config,
+    )
+
+    x_train = pd.DataFrame(
+        {
+            "feature1": [i for i in range(20)],
+            "feature2": [i for i in range(20)],
+            "feature3": [i for i in range(20)],
+            "feature4": [i for i in range(20)],
+            "feature5": [i for i in range(20)],
+            "feature6": [i for i in range(20)],
+        }
+    )
+    y_train = pd.Series([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    x_test = pd.DataFrame(
+        {
+            "feature1": [i for i in range(10)],
+            "feature2": [i for i in range(10)],
+            "feature3": [i for i in range(10)],
+            "feature4": [i for i in range(10)],
+            "feature5": [i for i in range(10)],
+            "feature6": [i for i in range(10)],
+        }
+    )
+
+    x_train["target"] = y_train
+
+    bluecast.fit(x_train, "target")
+
+    predicted_values = bluecast.predict(x_test)
+
+    assert isinstance(predicted_values, np.ndarray)
+    assert len(bluecast.experiment_tracker.experiment_id) == 5