ottenbreit-data-science
diff --git a/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 4 additions & 1 deletion b/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 4 additions & 1 deletion b/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎aplr/aplr.py‎
Lines changed: 16 additions & 0 deletions b/‎aplr/aplr.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎cpp/APLRClassifier.h‎
Lines changed: 9 additions & 4 deletions b/‎cpp/APLRClassifier.h‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎cpp/APLRRegressor.h‎
Lines changed: 13 additions & 5 deletions b/‎cpp/APLRRegressor.h‎
Lines changed: 13 additions & 5 deletions
@@ -1,6 +1,6 @@
 # APLRClassifier
 
-## class aplr.APLRClassifier(m:int=3000, v:float=0.1, random_state:int=0, n_jobs:int=0, cv_folds:int=5, bins:int=300, verbosity:int=0, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, early_stopping_rounds: int = 500)
+## class aplr.APLRClassifier(m:int=3000, v:float=0.1, random_state:int=0, n_jobs:int=0, cv_folds:int=5, bins:int=300, verbosity:int=0, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0)
 
 ### Constructor parameters
 
@@ -49,6 +49,9 @@ See ***monotonic_constraints*** in the ***fit*** method.
 #### early_stopping_rounds (default = 500)
 If validation loss does not improve during the last ***early_stopping_rounds*** boosting steps then boosting is aborted. The point with this constructor parameter is to speed up the training and make it easier to select a high ***m***.
 
+#### num_first_steps_with_linear_effects_only (default = 0)
+Specifies the number of initial boosting steps that are reserved only for linear effects. 0 means that non-linear effects are allowed from the first boosting step. Reasons for setting this parameter to a higher value than 0 could be to 1) build a more interpretable model with more emphasis on linear effects or 2) build a linear only model by setting ***num_first_steps_with_linear_effects_only*** to no less than ***m***.
+
 
 ## Method: fit(X:npt.ArrayLike, y:List[str], sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], interaction_constraints:List[List[int]]=[])
 
 
@@ -1,6 +1,6 @@
 # APLRRegressor
 
-## class aplr.APLRRegressor(m:int=3000, v:float=0.1, random_state:int=0, loss_function:str="mse", link_function:str="identity", n_jobs:int=0, cv_folds:int=5, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, dispersion_parameter:float=1.5, validation_tuning_metric:str="default", quantile:float=0.5, calculate_custom_validation_error_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_loss_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_negative_gradient_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, group_mse_by_prediction_bins: int = 10, group_mse_cycle_min_obs_in_bin: int = 30, early_stopping_rounds: int = 500)
+## class aplr.APLRRegressor(m:int=3000, v:float=0.1, random_state:int=0, loss_function:str="mse", link_function:str="identity", n_jobs:int=0, cv_folds:int=5, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, dispersion_parameter:float=1.5, validation_tuning_metric:str="default", quantile:float=0.5, calculate_custom_validation_error_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_loss_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_negative_gradient_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, group_mse_by_prediction_bins: int = 10, group_mse_cycle_min_obs_in_bin: int = 30, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0)
 
 ### Constructor parameters
 
@@ -117,6 +117,9 @@ When ***loss_function*** equals ***group_mse_cycle*** then ***group_mse_cycle_mi
 #### early_stopping_rounds (default = 500)
 If validation loss does not improve during the last ***early_stopping_rounds*** boosting steps then boosting is aborted. The point with this constructor parameter is to speed up the training and make it easier to select a high ***m***.
 
+#### num_first_steps_with_linear_effects_only (default = 0)
+Specifies the number of initial boosting steps that are reserved only for linear effects. 0 means that non-linear effects are allowed from the first boosting step. Reasons for setting this parameter to a higher value than 0 could be to 1) build a more interpretable model with more emphasis on linear effects or 2) build a linear only model by setting ***num_first_steps_with_linear_effects_only*** to no less than ***m***. 
+
 
 ## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], group:npt.ArrayLike = np.empty(0), interaction_constraints:List[List[int]]=[], other_data: npt.ArrayLike = np.empty([0, 0]))
 
 
@@ -65,6 +65,7 @@ def __init__(
         group_mse_by_prediction_bins: int = 10,
         group_mse_cycle_min_obs_in_bin: int = 30,
         early_stopping_rounds: int = 500,
+        num_first_steps_with_linear_effects_only: int = 0,
     ):
         self.m = m
         self.v = v
@@ -105,6 +106,9 @@ def __init__(
         self.group_mse_by_prediction_bins = group_mse_by_prediction_bins
         self.group_mse_cycle_min_obs_in_bin = group_mse_cycle_min_obs_in_bin
         self.early_stopping_rounds = early_stopping_rounds
+        self.num_first_steps_with_linear_effects_only = (
+            num_first_steps_with_linear_effects_only
+        )
 
         # Creating aplr_cpp and setting parameters
         self.APLRRegressor = aplr_cpp.APLRRegressor()
@@ -159,6 +163,9 @@ def __set_params_cpp(self):
             self.group_mse_cycle_min_obs_in_bin
         )
         self.APLRRegressor.early_stopping_rounds = self.early_stopping_rounds
+        self.APLRRegressor.num_first_steps_with_linear_effects_only = (
+            self.num_first_steps_with_linear_effects_only
+        )
 
     def fit(
         self,
@@ -286,6 +293,7 @@ def get_params(self, deep=True):
             "group_mse_by_prediction_bins": self.group_mse_by_prediction_bins,
             "group_mse_cycle_min_obs_in_bin": self.group_mse_cycle_min_obs_in_bin,
             "early_stopping_rounds": self.early_stopping_rounds,
+            "num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
         }
 
     # For sklearn
@@ -314,6 +322,7 @@ def __init__(
         boosting_steps_before_interactions_are_allowed: int = 0,
         monotonic_constraints_ignore_interactions: bool = False,
         early_stopping_rounds: int = 500,
+        num_first_steps_with_linear_effects_only: int = 0,
     ):
         self.m = m
         self.v = v
@@ -334,6 +343,9 @@ def __init__(
             monotonic_constraints_ignore_interactions
         )
         self.early_stopping_rounds = early_stopping_rounds
+        self.num_first_steps_with_linear_effects_only = (
+            num_first_steps_with_linear_effects_only
+        )
 
         # Creating aplr_cpp and setting parameters
         self.APLRClassifier = aplr_cpp.APLRClassifier()
@@ -362,6 +374,9 @@ def __set_params_cpp(self):
             self.monotonic_constraints_ignore_interactions
         )
         self.APLRClassifier.early_stopping_rounds = self.early_stopping_rounds
+        self.APLRClassifier.num_first_steps_with_linear_effects_only = (
+            self.num_first_steps_with_linear_effects_only
+        )
 
     def fit(
         self,
@@ -434,6 +449,7 @@ def get_params(self, deep=True):
             "boosting_steps_before_interactions_are_allowed": self.boosting_steps_before_interactions_are_allowed,
             "monotonic_constraints_ignore_interactions": self.monotonic_constraints_ignore_interactions,
             "early_stopping_rounds": self.early_stopping_rounds,
+            "num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
         }
 
     # For sklearn
 
@@ -45,12 +45,13 @@ class APLRClassifier
     size_t boosting_steps_before_interactions_are_allowed;
     bool monotonic_constraints_ignore_interactions;
     size_t early_stopping_rounds;
+    size_t num_first_steps_with_linear_effects_only;
 
     APLRClassifier(size_t m = 3000, double v = 0.1, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), size_t n_jobs = 0,
                    size_t cv_folds = 5, size_t reserved_terms_times_num_x = 100, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
                    size_t max_interactions = 100000, size_t min_observations_in_split = 20, size_t ineligible_boosting_steps_added = 10, size_t max_eligible_terms = 5,
                    size_t boosting_steps_before_interactions_are_allowed = 0, bool monotonic_constraints_ignore_interactions = false,
-                   size_t early_stopping_rounds = 500);
+                   size_t early_stopping_rounds = 500, size_t num_first_steps_with_linear_effects_only = 0);
     APLRClassifier(const APLRClassifier &other);
     ~APLRClassifier();
     void fit(const MatrixXd &X, const std::vector<std::string> &y, const VectorXd &sample_weight = VectorXd(0),
@@ -71,13 +72,14 @@ APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, s
                                size_t reserved_terms_times_num_x, size_t bins, size_t verbosity, size_t max_interaction_level, size_t max_interactions,
                                size_t min_observations_in_split, size_t ineligible_boosting_steps_added, size_t max_eligible_terms,
                                size_t boosting_steps_before_interactions_are_allowed, bool monotonic_constraints_ignore_interactions,
-                               size_t early_stopping_rounds)
+                               size_t early_stopping_rounds, size_t num_first_steps_with_linear_effects_only)
     : m{m}, v{v}, random_state{random_state}, n_jobs{n_jobs}, cv_folds{cv_folds},
       reserved_terms_times_num_x{reserved_terms_times_num_x}, bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
       max_interactions{max_interactions}, min_observations_in_split{min_observations_in_split},
       ineligible_boosting_steps_added{ineligible_boosting_steps_added}, max_eligible_terms{max_eligible_terms},
       boosting_steps_before_interactions_are_allowed{boosting_steps_before_interactions_are_allowed},
-      monotonic_constraints_ignore_interactions{monotonic_constraints_ignore_interactions}, early_stopping_rounds{early_stopping_rounds}
+      monotonic_constraints_ignore_interactions{monotonic_constraints_ignore_interactions}, early_stopping_rounds{early_stopping_rounds},
+      num_first_steps_with_linear_effects_only{num_first_steps_with_linear_effects_only}
 {
 }
 
@@ -91,7 +93,8 @@ APLRClassifier::APLRClassifier(const APLRClassifier &other)
       feature_importance{other.feature_importance},
       boosting_steps_before_interactions_are_allowed{other.boosting_steps_before_interactions_are_allowed},
       monotonic_constraints_ignore_interactions{other.monotonic_constraints_ignore_interactions},
-      early_stopping_rounds{other.early_stopping_rounds}
+      early_stopping_rounds{other.early_stopping_rounds},
+      num_first_steps_with_linear_effects_only{other.num_first_steps_with_linear_effects_only}
 {
 }
 
@@ -117,6 +120,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
         logit_models[categories[0]].boosting_steps_before_interactions_are_allowed = boosting_steps_before_interactions_are_allowed;
         logit_models[categories[0]].monotonic_constraints_ignore_interactions = monotonic_constraints_ignore_interactions;
         logit_models[categories[0]].early_stopping_rounds = early_stopping_rounds;
+        logit_models[categories[0]].num_first_steps_with_linear_effects_only = num_first_steps_with_linear_effects_only;
         logit_models[categories[0]].fit(X, response_values[categories[0]], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
                                         monotonic_constraints, VectorXi(0), interaction_constraints);
 
@@ -133,6 +137,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
             logit_models[category].boosting_steps_before_interactions_are_allowed = boosting_steps_before_interactions_are_allowed;
             logit_models[category].monotonic_constraints_ignore_interactions = monotonic_constraints_ignore_interactions;
             logit_models[category].early_stopping_rounds = early_stopping_rounds;
+            logit_models[category].num_first_steps_with_linear_effects_only = num_first_steps_with_linear_effects_only;
             logit_models[category].fit(X, response_values[category], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
                                        monotonic_constraints, VectorXi(0), interaction_constraints);
         }
 
@@ -72,6 +72,7 @@ class APLRRegressor
     VectorXd intercept_steps;
     double best_validation_error_so_far;
     size_t best_m_so_far;
+    bool linear_effects_only_in_this_boosting_step;
 
     void validate_input_to_fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight, const std::vector<std::string> &X_names,
                                const MatrixXi &cv_observations, const std::vector<size_t> &prioritized_predictors_indexes,
@@ -210,6 +211,7 @@ class APLRRegressor
     VectorXi term_main_predictor_indexes;
     VectorXi term_interaction_levels;
     size_t early_stopping_rounds;
+    size_t num_first_steps_with_linear_effects_only;
 
     APLRRegressor(size_t m = 3000, double v = 0.1, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), std::string loss_function = "mse",
                   std::string link_function = "identity", size_t n_jobs = 0, size_t cv_folds = 5,
@@ -222,7 +224,8 @@ class APLRRegressor
                   const std::function<VectorXd(VectorXd)> &calculate_custom_transform_linear_predictor_to_predictions_function = {},
                   const std::function<VectorXd(VectorXd)> &calculate_custom_differentiate_predictions_wrt_linear_predictor_function = {},
                   size_t boosting_steps_before_interactions_are_allowed = 0, bool monotonic_constraints_ignore_interactions = false,
-                  size_t group_mse_by_prediction_bins = 10, size_t group_mse_cycle_min_obs_in_bin = 30, size_t early_stopping_rounds = 500);
+                  size_t group_mse_by_prediction_bins = 10, size_t group_mse_cycle_min_obs_in_bin = 30, size_t early_stopping_rounds = 500,
+                  size_t num_first_steps_with_linear_effects_only = 0);
     APLRRegressor(const APLRRegressor &other);
     ~APLRRegressor();
     void fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight = VectorXd(0), const std::vector<std::string> &X_names = {},
@@ -262,7 +265,8 @@ APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std
                              const std::function<VectorXd(VectorXd)> &calculate_custom_transform_linear_predictor_to_predictions_function,
                              const std::function<VectorXd(VectorXd)> &calculate_custom_differentiate_predictions_wrt_linear_predictor_function,
                              size_t boosting_steps_before_interactions_are_allowed, bool monotonic_constraints_ignore_interactions,
-                             size_t group_mse_by_prediction_bins, size_t group_mse_cycle_min_obs_in_bin, size_t early_stopping_rounds)
+                             size_t group_mse_by_prediction_bins, size_t group_mse_cycle_min_obs_in_bin, size_t early_stopping_rounds,
+                             size_t num_first_steps_with_linear_effects_only)
     : reserved_terms_times_num_x{reserved_terms_times_num_x}, intercept{NAN_DOUBLE}, m{m}, v{v},
       loss_function{loss_function}, link_function{link_function}, cv_folds{cv_folds}, n_jobs{n_jobs}, random_state{random_state},
       bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
@@ -276,7 +280,8 @@ APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std
       calculate_custom_differentiate_predictions_wrt_linear_predictor_function{calculate_custom_differentiate_predictions_wrt_linear_predictor_function},
       boosting_steps_before_interactions_are_allowed{boosting_steps_before_interactions_are_allowed},
       monotonic_constraints_ignore_interactions{monotonic_constraints_ignore_interactions}, group_mse_by_prediction_bins{group_mse_by_prediction_bins},
-      group_mse_cycle_min_obs_in_bin{group_mse_cycle_min_obs_in_bin}, cv_error{NAN_DOUBLE}, early_stopping_rounds{early_stopping_rounds}
+      group_mse_cycle_min_obs_in_bin{group_mse_cycle_min_obs_in_bin}, cv_error{NAN_DOUBLE}, early_stopping_rounds{early_stopping_rounds},
+      num_first_steps_with_linear_effects_only{num_first_steps_with_linear_effects_only}
 {
 }
 
@@ -301,7 +306,8 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other)
       monotonic_constraints_ignore_interactions{other.monotonic_constraints_ignore_interactions}, group_mse_by_prediction_bins{other.group_mse_by_prediction_bins},
       group_mse_cycle_min_obs_in_bin{other.group_mse_cycle_min_obs_in_bin}, cv_error{other.cv_error},
       term_main_predictor_indexes{other.term_main_predictor_indexes}, term_interaction_levels{other.term_interaction_levels},
-      early_stopping_rounds{other.early_stopping_rounds}
+      early_stopping_rounds{other.early_stopping_rounds},
+      num_first_steps_with_linear_effects_only{other.num_first_steps_with_linear_effects_only}
 {
 }
 
@@ -1026,6 +1032,7 @@ void APLRRegressor::execute_boosting_steps(Eigen::Index fold_index)
     abort_boosting = false;
     for (size_t boosting_step = 0; boosting_step < m; ++boosting_step)
     {
+        linear_effects_only_in_this_boosting_step = num_first_steps_with_linear_effects_only > boosting_step;
         execute_boosting_step(boosting_step, fold_index);
         if (abort_boosting)
             break;
@@ -1137,7 +1144,8 @@ void APLRRegressor::estimate_split_point_for_each_term(std::vector<Term> &terms,
 #pragma omp parallel for schedule(guided) if (multithreading)
     for (size_t i = 0; i < terms_indexes.size(); ++i)
     {
-        terms[terms_indexes[i]].estimate_split_point(X_train, neg_gradient_current, sample_weight_train, bins, v, min_observations_in_split);
+        terms[terms_indexes[i]].estimate_split_point(X_train, neg_gradient_current, sample_weight_train, bins, v, min_observations_in_split,
+                                                     linear_effects_only_in_this_boosting_step);
     }
 }