ottenbreit-data-science
diff --git a/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 5 additions & 2 deletions b/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 5 additions & 2 deletions b/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎aplr/aplr.py‎
Lines changed: 10 additions & 2 deletions b/‎aplr/aplr.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎cpp/APLRClassifier.h‎
Lines changed: 9 additions & 5 deletions b/‎cpp/APLRClassifier.h‎
Lines changed: 9 additions & 5 deletions
@@ -1,6 +1,6 @@
 # APLRClassifier
 
-## class aplr.APLRClassifier(m:int = 3000, v:float = 0.5, random_state:int = 0, n_jobs:int = 0, cv_folds:int = 5, bins:int = 300, verbosity:int = 0, max_interaction_level:int = 1, max_interactions:int = 100000, min_observations_in_split:int = 4, ineligible_boosting_steps_added:int = 15, max_eligible_terms:int = 7, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0, penalty_for_non_linearity: float = 0.0, penalty_for_interactions: float = 0.0, max_terms: int = 0)
+## class aplr.APLRClassifier(m:int = 3000, v:float = 0.5, random_state:int = 0, n_jobs:int = 0, cv_folds:int = 5, bins:int = 300, verbosity:int = 0, max_interaction_level:int = 1, max_interactions:int = 100000, min_observations_in_split:int = 4, ineligible_boosting_steps_added:int = 15, max_eligible_terms:int = 7, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, early_stopping_rounds: int = 200, num_first_steps_with_linear_effects_only: int = 0, penalty_for_non_linearity: float = 0.0, penalty_for_interactions: float = 0.0, max_terms: int = 0, ridge_penalty: float = 0.0001)
 
 ### Constructor parameters
 
@@ -46,7 +46,7 @@ Specifies how many boosting steps to wait before searching for interactions. If
 #### monotonic_constraints_ignore_interactions (default = False)
 See ***monotonic_constraints*** in the ***fit*** method.
 
-#### early_stopping_rounds (default = 500)
+#### early_stopping_rounds (default = 200)
 If validation loss does not improve during the last ***early_stopping_rounds*** boosting steps then boosting is aborted. The point with this constructor parameter is to speed up the training and make it easier to select a high ***m***.
 
 #### num_first_steps_with_linear_effects_only (default = 0)
@@ -61,6 +61,9 @@ Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value
 #### max_terms (default = 0)
 Restricts the maximum number of terms in any of the underlying models trained to ***max_terms***. The default value of 0 means no limit. After the limit is reached, the remaining boosting steps are used to further update the coefficients of already included terms. An optional tuning objective could be to find the lowest positive value of ***max_terms*** that does not increase the prediction error significantly. Low positive values can speed up the training process significantly. Setting a limit with ***max_terms*** may require a higher learning rate for best results.
 
+#### ridge_penalty (default = 0.0001)
+Specifies the (weighted) ridge penalty applied to the model. Positive values can smooth model effects and help mitigate boundary problems, such as regression coefficients with excessively high magnitudes near the boundaries. To find the optimal value, consider using a grid search or similar. Negative values are treated as zero.
+
 
 ## Method: fit(X:FloatMatrix, y:List[str], sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], interaction_constraints:List[List[int]] = [], predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [], predictor_min_observations_in_split: List[int] = [])
 
 
@@ -1,6 +1,6 @@
 # APLRRegressor
 
-## class aplr.APLRRegressor(m:int = 3000, v:float = 0.5, random_state:int = 0, loss_function:str = "mse", link_function:str = "identity", n_jobs:int = 0, cv_folds:int = 5, bins:int = 300, max_interaction_level:int = 1, max_interactions:int = 100000, min_observations_in_split:int = 4, ineligible_boosting_steps_added:int = 15, max_eligible_terms:int = 7, verbosity:int = 0, dispersion_parameter:float = 1.5, validation_tuning_metric:str = "default", quantile:float = 0.5, calculate_custom_validation_error_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_loss_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_negative_gradient_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatMatrix],FloatVector]] = None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[FloatVector], FloatVector]] = None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[FloatVector], FloatVector]] = None, boosting_steps_before_interactions_are_allowed:int = 0, monotonic_constraints_ignore_interactions:bool = False, group_mse_by_prediction_bins:int = 10, group_mse_cycle_min_obs_in_bin:int = 30, early_stopping_rounds:int = 500, num_first_steps_with_linear_effects_only:int = 0, penalty_for_non_linearity:float = 0.0, penalty_for_interactions:float = 0.0, max_terms:int = 0)
+## class aplr.APLRRegressor(m:int = 3000, v:float = 0.5, random_state:int = 0, loss_function:str = "mse", link_function:str = "identity", n_jobs:int = 0, cv_folds:int = 5, bins:int = 300, max_interaction_level:int = 1, max_interactions:int = 100000, min_observations_in_split:int = 4, ineligible_boosting_steps_added:int = 15, max_eligible_terms:int = 7, verbosity:int = 0, dispersion_parameter:float = 1.5, validation_tuning_metric:str = "default", quantile:float = 0.5, calculate_custom_validation_error_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_loss_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_negative_gradient_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatMatrix],FloatVector]] = None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[FloatVector], FloatVector]] = None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[FloatVector], FloatVector]] = None, boosting_steps_before_interactions_are_allowed:int = 0, monotonic_constraints_ignore_interactions:bool = False, group_mse_by_prediction_bins:int = 10, group_mse_cycle_min_obs_in_bin:int = 30, early_stopping_rounds:int = 200, num_first_steps_with_linear_effects_only:int = 0, penalty_for_non_linearity:float = 0.0, penalty_for_interactions:float = 0.0, max_terms:int = 0, ridge_penalty: float = 0.0001)
 
 ### Constructor parameters
 
@@ -114,7 +114,7 @@ Specifies how many groups to bin predictions by when ***validation_tuning_metric
 #### group_mse_cycle_min_obs_in_bin (default = 30)
 When ***loss_function*** equals ***group_mse_cycle*** then ***group_mse_cycle_min_obs_in_bin*** specifies the minimum amount of observations in each group. The loss function ***group_mse_cycle*** groups by the first predictor in ***X*** in the first boosting step, then by the second predictor in ***X*** in the second boosting step, etc. So in each boosting step the predictor to group by is changed. If ***validation_tuning_metric*** is "default" then "group_mse_by_prediction" will be used as ***validation_tuning_metric***.
 
-#### early_stopping_rounds (default = 500)
+#### early_stopping_rounds (default = 200)
 If validation loss does not improve during the last ***early_stopping_rounds*** boosting steps then boosting is aborted. The point with this constructor parameter is to speed up the training and make it easier to select a high ***m***.
 
 #### num_first_steps_with_linear_effects_only (default = 0)
@@ -129,6 +129,9 @@ Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value
 #### max_terms (default = 0)
 Restricts the maximum number of terms in any of the underlying models trained to ***max_terms***. The default value of 0 means no limit. After the limit is reached, the remaining boosting steps are used to further update the coefficients of already included terms. An optional tuning objective could be to find the lowest positive value of ***max_terms*** that does not increase the prediction error significantly. Low positive values can speed up the training process significantly. Setting a limit with ***max_terms*** may require a higher learning rate for best results.
 
+#### ridge_penalty (default = 0.0001)
+Specifies the (weighted) ridge penalty applied to the model. Positive values can smooth model effects and help mitigate boundary problems, such as regression coefficients with excessively high magnitudes near the boundaries. To find the optimal value, consider using a grid search or similar. Negative values are treated as zero.
+
 
 ## Method: fit(X:FloatMatrix, y:FloatVector, sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], group:FloatVector = np.empty(0), interaction_constraints:List[List[int]] = [], other_data:FloatMatrix = np.empty([0, 0]), predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [], predictor_min_observations_in_split: List[int] = [])
 
 
@@ -69,11 +69,12 @@ def __init__(
         monotonic_constraints_ignore_interactions: bool = False,
         group_mse_by_prediction_bins: int = 10,
         group_mse_cycle_min_obs_in_bin: int = 30,
-        early_stopping_rounds: int = 500,
+        early_stopping_rounds: int = 200,
         num_first_steps_with_linear_effects_only: int = 0,
         penalty_for_non_linearity: float = 0.0,
         penalty_for_interactions: float = 0.0,
         max_terms: int = 0,
+        ridge_penalty: float = 0.0001,
     ):
         self.m = m
         self.v = v
@@ -120,6 +121,7 @@ def __init__(
         self.penalty_for_non_linearity = penalty_for_non_linearity
         self.penalty_for_interactions = penalty_for_interactions
         self.max_terms = max_terms
+        self.ridge_penalty = ridge_penalty
 
         # Creating aplr_cpp and setting parameters
         self.APLRRegressor = aplr_cpp.APLRRegressor()
@@ -180,6 +182,7 @@ def __set_params_cpp(self):
         self.APLRRegressor.penalty_for_non_linearity = self.penalty_for_non_linearity
         self.APLRRegressor.penalty_for_interactions = self.penalty_for_interactions
         self.APLRRegressor.max_terms = self.max_terms
+        self.APLRRegressor.ridge_penalty = self.ridge_penalty
 
     def fit(
         self,
@@ -343,6 +346,7 @@ def get_params(self, deep=True):
             "penalty_for_non_linearity": self.penalty_for_non_linearity,
             "penalty_for_interactions": self.penalty_for_interactions,
             "max_terms": self.max_terms,
+            "ridge_penalty": self.ridge_penalty,
         }
 
     # For sklearn
@@ -370,11 +374,12 @@ def __init__(
         max_eligible_terms: int = 7,
         boosting_steps_before_interactions_are_allowed: int = 0,
         monotonic_constraints_ignore_interactions: bool = False,
-        early_stopping_rounds: int = 500,
+        early_stopping_rounds: int = 200,
         num_first_steps_with_linear_effects_only: int = 0,
         penalty_for_non_linearity: float = 0.0,
         penalty_for_interactions: float = 0.0,
         max_terms: int = 0,
+        ridge_penalty: float = 0.0001,
     ):
         self.m = m
         self.v = v
@@ -401,6 +406,7 @@ def __init__(
         self.penalty_for_non_linearity = penalty_for_non_linearity
         self.penalty_for_interactions = penalty_for_interactions
         self.max_terms = max_terms
+        self.ridge_penalty = ridge_penalty
 
         # Creating aplr_cpp and setting parameters
         self.APLRClassifier = aplr_cpp.APLRClassifier()
@@ -435,6 +441,7 @@ def __set_params_cpp(self):
         self.APLRClassifier.penalty_for_non_linearity = self.penalty_for_non_linearity
         self.APLRClassifier.penalty_for_interactions = self.penalty_for_interactions
         self.APLRClassifier.max_terms = self.max_terms
+        self.APLRClassifier.ridge_penalty = self.ridge_penalty
 
     def fit(
         self,
@@ -527,6 +534,7 @@ def get_params(self, deep=True):
             "penalty_for_non_linearity": self.penalty_for_non_linearity,
             "penalty_for_interactions": self.penalty_for_interactions,
             "max_terms": self.max_terms,
+            "ridge_penalty": self.ridge_penalty,
         }
 
     # For sklearn
 
@@ -52,13 +52,14 @@ class APLRClassifier
     std::vector<std::string> unique_term_affiliations;
     std::map<std::string, size_t> unique_term_affiliation_map;
     std::vector<std::vector<size_t>> base_predictors_in_each_unique_term_affiliation;
+    double ridge_penalty;
 
     APLRClassifier(size_t m = 3000, double v = 0.5, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), size_t n_jobs = 0,
                    size_t cv_folds = 5, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
                    size_t max_interactions = 100000, size_t min_observations_in_split = 4, size_t ineligible_boosting_steps_added = 15, size_t max_eligible_terms = 7,
                    size_t boosting_steps_before_interactions_are_allowed = 0, bool monotonic_constraints_ignore_interactions = false,
-                   size_t early_stopping_rounds = 500, size_t num_first_steps_with_linear_effects_only = 0,
-                   double penalty_for_non_linearity = 0.0, double penalty_for_interactions = 0.0, size_t max_terms = 0);
+                   size_t early_stopping_rounds = 200, size_t num_first_steps_with_linear_effects_only = 0,
+                   double penalty_for_non_linearity = 0.0, double penalty_for_interactions = 0.0, size_t max_terms = 0, double ridge_penalty = 0.0001);
     APLRClassifier(const APLRClassifier &other);
     ~APLRClassifier();
     void fit(const MatrixXd &X, const std::vector<std::string> &y, const VectorXd &sample_weight = VectorXd(0),
@@ -85,15 +86,15 @@ APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, s
                                size_t min_observations_in_split, size_t ineligible_boosting_steps_added, size_t max_eligible_terms,
                                size_t boosting_steps_before_interactions_are_allowed, bool monotonic_constraints_ignore_interactions,
                                size_t early_stopping_rounds, size_t num_first_steps_with_linear_effects_only,
-                               double penalty_for_non_linearity, double penalty_for_interactions, size_t max_terms)
+                               double penalty_for_non_linearity, double penalty_for_interactions, size_t max_terms, double ridge_penalty)
     : m{m}, v{v}, random_state{random_state}, n_jobs{n_jobs}, cv_folds{cv_folds},
       bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
       max_interactions{max_interactions}, min_observations_in_split{min_observations_in_split},
       ineligible_boosting_steps_added{ineligible_boosting_steps_added}, max_eligible_terms{max_eligible_terms},
       boosting_steps_before_interactions_are_allowed{boosting_steps_before_interactions_are_allowed},
       monotonic_constraints_ignore_interactions{monotonic_constraints_ignore_interactions}, early_stopping_rounds{early_stopping_rounds},
       num_first_steps_with_linear_effects_only{num_first_steps_with_linear_effects_only}, penalty_for_non_linearity{penalty_for_non_linearity},
-      penalty_for_interactions{penalty_for_interactions}, max_terms{max_terms}
+      penalty_for_interactions{penalty_for_interactions}, max_terms{max_terms}, ridge_penalty{ridge_penalty}
 {
 }
 
@@ -112,7 +113,8 @@ APLRClassifier::APLRClassifier(const APLRClassifier &other)
       penalty_for_non_linearity{other.penalty_for_non_linearity}, penalty_for_interactions{other.penalty_for_interactions},
       max_terms{other.max_terms}, unique_term_affiliations{other.unique_term_affiliations},
       unique_term_affiliation_map{other.unique_term_affiliation_map},
-      base_predictors_in_each_unique_term_affiliation{other.base_predictors_in_each_unique_term_affiliation}
+      base_predictors_in_each_unique_term_affiliation{other.base_predictors_in_each_unique_term_affiliation},
+      ridge_penalty{other.ridge_penalty}
 {
 }
 
@@ -145,6 +147,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
         logit_models[categories[0]].penalty_for_non_linearity = penalty_for_non_linearity;
         logit_models[categories[0]].penalty_for_interactions = penalty_for_interactions;
         logit_models[categories[0]].max_terms = max_terms;
+        logit_models[categories[0]].ridge_penalty = ridge_penalty;
         logit_models[categories[0]].fit(X, response_values[categories[0]], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
                                         monotonic_constraints, VectorXi(0), interaction_constraints, MatrixXd(0, 0), predictor_learning_rates,
                                         predictor_penalties_for_non_linearity, predictor_penalties_for_interactions,
@@ -167,6 +170,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
             logit_models[category].penalty_for_non_linearity = penalty_for_non_linearity;
             logit_models[category].penalty_for_interactions = penalty_for_interactions;
             logit_models[category].max_terms = max_terms;
+            logit_models[category].ridge_penalty = ridge_penalty;
             logit_models[category].fit(X, response_values[category], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
                                        monotonic_constraints, VectorXi(0), interaction_constraints, MatrixXd(0, 0), predictor_learning_rates,
                                        predictor_penalties_for_non_linearity, predictor_penalties_for_interactions,