ottenbreit-data-science
diff --git a/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 7 additions & 4 deletions b/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎aplr/aplr.py‎
Lines changed: 4 additions & 0 deletions b/‎aplr/aplr.py‎
Lines changed: 4 additions & 0 deletions
@@ -1,6 +1,6 @@
 # APLRRegressor
 
-## class aplr.APLRRegressor(m:int = 3000, v:float = 0.5, random_state:int = 0, loss_function:str = "mse", link_function:str = "identity", n_jobs:int = 0, cv_folds:int = 5, bins:int = 300, max_interaction_level:int = 1, max_interactions:int = 100000, min_observations_in_split:int = 4, ineligible_boosting_steps_added:int = 15, max_eligible_terms:int = 7, verbosity:int = 0, dispersion_parameter:float = 1.5, validation_tuning_metric:str = "default", quantile:float = 0.5, calculate_custom_validation_error_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_loss_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_negative_gradient_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatMatrix],FloatVector]] = None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[FloatVector], FloatVector]] = None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[FloatVector], FloatVector]] = None, boosting_steps_before_interactions_are_allowed:int = 0, monotonic_constraints_ignore_interactions:bool = False, group_mse_by_prediction_bins:int = 10, group_mse_cycle_min_obs_in_bin:int = 30, early_stopping_rounds:int = 200, num_first_steps_with_linear_effects_only:int = 0, penalty_for_non_linearity:float = 0.0, penalty_for_interactions:float = 0.0, max_terms:int = 0, ridge_penalty: float = 0.0001)
+## class aplr.APLRRegressor(m:int = 3000, v:float = 0.5, random_state:int = 0, loss_function:str = "mse", link_function:str = "identity", n_jobs:int = 0, cv_folds:int = 5, bins:int = 300, max_interaction_level:int = 1, max_interactions:int = 100000, min_observations_in_split:int = 4, ineligible_boosting_steps_added:int = 15, max_eligible_terms:int = 7, verbosity:int = 0, dispersion_parameter:float = 1.5, validation_tuning_metric:str = "default", quantile:float = 0.5, calculate_custom_validation_error_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_loss_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatVector, FloatMatrix], float]] = None, calculate_custom_negative_gradient_function:Optional[Callable[[FloatVector, FloatVector, FloatVector, FloatMatrix],FloatVector]] = None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[FloatVector], FloatVector]] = None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[FloatVector], FloatVector]] = None, boosting_steps_before_interactions_are_allowed:int = 0, monotonic_constraints_ignore_interactions:bool = False, group_mse_by_prediction_bins:int = 10, group_mse_cycle_min_obs_in_bin:int = 30, early_stopping_rounds:int = 200, num_first_steps_with_linear_effects_only:int = 0, penalty_for_non_linearity:float = 0.0, penalty_for_interactions:float = 0.0, max_terms:int = 0, ridge_penalty: float = 0.0001, mean_bias_correction:bool = False)
 
 ### Constructor parameters
 
@@ -14,7 +14,7 @@ The learning rate. Must be greater than zero and not more than one. The higher t
 Used to randomly split training observations into cv_folds if ***cv_observations*** is not specified when fitting.
 
 #### loss_function (default = "mse")
-Determines the loss function used. Allowed values are "mse", "binomial", "poisson", "gamma", "tweedie", "group_mse", "group_mse_cycle","mae", "quantile", "negative_binomial", "cauchy", "weibull" and "custom_function". This is used together with ***link_function***. When ***loss_function*** is "group_mse" then the "group" argument in the ***fit*** method must be provided. In the latter case APLR will try to minimize group MSE when training the model. When using "group_mse_cycle", ***group_mse_cycle_min_obs_in_bin*** controls the minimum amount of observations in each group. For a description of "group_mse_cycle" see ***group_mse_cycle_min_obs_in_bin***. The ***loss_function*** "quantile" is used together with the ***quantile*** constructor parameter. When ***loss_function*** is "custom_function" then the constructor parameters ***calculate_custom_loss_function*** and ***calculate_custom_negative_gradient_function***, both described below, must be provided.
+Determines the loss function used. Allowed values are "mse", "binomial", "poisson", "gamma", "tweedie", "group_mse", "group_mse_cycle","mae", "quantile", "negative_binomial", "cauchy", "weibull", "huber" and "custom_function". This is used together with ***link_function***. When ***loss_function*** is "group_mse" then the "group" argument in the ***fit*** method must be provided. In the latter case APLR will try to minimize group MSE when training the model. When using "group_mse_cycle", ***group_mse_cycle_min_obs_in_bin*** controls the minimum amount of observations in each group. For a description of "group_mse_cycle" see ***group_mse_cycle_min_obs_in_bin***. The ***loss_function*** "quantile" is used together with the ***quantile*** constructor parameter. When ***loss_function*** is "custom_function" then the constructor parameters ***calculate_custom_loss_function*** and ***calculate_custom_negative_gradient_function***, both described below, must be provided.
 
 #### link_function (default = "identity")
 Determines how the linear predictor is transformed to predictions. Allowed values are "identity", "logit", "log" and "custom_function". For an ordinary regression model use ***loss_function*** "mse" and ***link_function*** "identity". For logistic regression use ***loss_function*** "binomial" and ***link_function*** "logit". For a multiplicative model use the "log" ***link_function***. The "log" ***link_function*** often works best with a "poisson", "gamma", "tweedie", "negative_binomial" or "weibull" ***loss_function***, depending on the data. The ***loss_function*** "poisson", "gamma", "tweedie", "negative_binomial" or "weibull" should only be used with the "log" ***link_function***. Inappropriate combinations of ***loss_function*** and ***link_function*** may result in a warning message when fitting the model and/or a poor model fit. When ***link_function*** is "custom_function" then the constructor parameters ***calculate_custom_transform_linear_predictor_to_predictions_function*** and ***calculate_custom_differentiate_predictions_wrt_linear_predictor_function***, both described below, must be provided.
@@ -47,10 +47,10 @@ Limits 1) the number of terms already in the model that can be considered as int
 ***0*** does not print progress reports during fitting. ***1*** prints a summary after running the ***fit*** method. ***2*** prints a summary after each boosting step.
 
 #### dispersion_parameter (default = 1.5)
-Specifies the variance power when ***loss_function*** is "tweedie". Specifies a dispersion parameter when ***loss_function*** is "negative_binomial", "cauchy" or "weibull". 
+Specifies the variance power when ***loss_function*** is "tweedie". Specifies a dispersion parameter when ***loss_function*** is "negative_binomial", "cauchy" or "weibull". For "huber" it specifies the delta parameter.
 
 #### validation_tuning_metric (default = "default")
-Specifies which metric to use for validating the model and tuning ***m***. The model will try to minimize the validation metric. Available options are "default" (using the same methodology as when calculating the training error), "mse", "mae", "negative_gini" (normalized), "group_mse", "group_mse_by_prediction", "neg_top_quantile_mean_response", "bottom_quantile_mean_response" and "custom_function". The default is often a choice that fits well with respect to the ***loss_function*** chosen. However, if you want to use ***loss_function*** or ***dispersion_parameter*** as tuning parameters then the default is not suitable. "group_mse" requires that the "group" argument in the ***fit*** method is provided. "group_mse_by_prediction" groups predictions by up to ***group_mse_by_prediction_bins*** groups and calculates groupwise mse. "neg_top_quantile_mean_response" calculates the negative of the sample weighted mean response for observations with predictions in the top quantile (as specified by the ***quantile*** parameter). For example, if ***quantile*** is 0.95, this metric will be the negative of the sample weighted mean response for the 5% of observations with the highest predictions. "bottom_quantile_mean_response" calculates the sample weighted mean response for observations with predictions in the bottom quantile (as specified by the ***quantile*** parameter). For example, if ***quantile*** is 0.05, this metric will be the sample weighted mean response for the 5% of observations with the lowest predictions. For "custom_function" see ***calculate_custom_validation_error_function*** below. Please note that for non-default values a significantly higher ***early_stopping_rounds*** than the default of 200 might be needed.
+Specifies which metric to use for validating the model and tuning ***m***. The model will try to minimize the validation metric. Available options are "default" (using the same methodology as when calculating the training error), "mse", "mae", "huber", "negative_gini" (normalized), "group_mse", "group_mse_by_prediction", "neg_top_quantile_mean_response", "bottom_quantile_mean_response" and "custom_function". The default is often a choice that fits well with respect to the ***loss_function*** chosen. However, if you want to use ***loss_function*** or ***dispersion_parameter*** as tuning parameters then the default is not suitable. "group_mse" requires that the "group" argument in the ***fit*** method is provided. "group_mse_by_prediction" groups predictions by up to ***group_mse_by_prediction_bins*** groups and calculates groupwise mse. "neg_top_quantile_mean_response" calculates the negative of the sample weighted mean response for observations with predictions in the top quantile (as specified by the ***quantile*** parameter). For example, if ***quantile*** is 0.95, this metric will be the negative of the sample weighted mean response for the 5% of observations with the highest predictions. "bottom_quantile_mean_response" calculates the sample weighted mean response for observations with predictions in the bottom quantile (as specified by the ***quantile*** parameter). For example, if ***quantile*** is 0.05, this metric will be the sample weighted mean response for the 5% of observations with the lowest predictions. For "custom_function" see ***calculate_custom_validation_error_function*** below. Please note that for non-default values a significantly higher ***early_stopping_rounds*** than the default of 200 might be needed.
 
 #### quantile (default = 0.5)
 Specifies the quantile to use when ***loss_function*** is "quantile" or when ***validation_tuning_metric*** is "neg_top_quantile_mean_response" or "bottom_quantile_mean_response".
@@ -132,6 +132,9 @@ Restricts the maximum number of terms in any of the underlying models trained to
 #### ridge_penalty (default = 0.0001)
 Specifies the (weighted) ridge penalty applied to the model. Positive values can smooth model effects and help mitigate boundary problems, such as regression coefficients with excessively high magnitudes near the boundaries. To find the optimal value, consider using a grid search or similar. Negative values are treated as zero.
 
+#### mean_bias_correction (default = False)
+If true, then a mean bias correction is applied to the model's intercept term. This can be useful for some loss functions, such as "huber", that can otherwise produce biased predictions. The correction is only applied for the "identity" and "log" link functions.
+
 
 ## Method: fit(X:FloatMatrix, y:FloatVector, sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], group:FloatVector = np.empty(0), interaction_constraints:List[List[int]] = [], other_data:FloatMatrix = np.empty([0, 0]), predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [], predictor_min_observations_in_split: List[int] = [])
 
 
@@ -75,6 +75,7 @@ def __init__(
         penalty_for_interactions: float = 0.0,
         max_terms: int = 0,
         ridge_penalty: float = 0.0001,
+        mean_bias_correction: bool = False,
     ):
         self.m = m
         self.v = v
@@ -122,6 +123,7 @@ def __init__(
         self.penalty_for_interactions = penalty_for_interactions
         self.max_terms = max_terms
         self.ridge_penalty = ridge_penalty
+        self.mean_bias_correction = mean_bias_correction
 
         # Creating aplr_cpp and setting parameters
         self.APLRRegressor = aplr_cpp.APLRRegressor()
@@ -183,6 +185,7 @@ def __set_params_cpp(self):
         self.APLRRegressor.penalty_for_interactions = self.penalty_for_interactions
         self.APLRRegressor.max_terms = self.max_terms
         self.APLRRegressor.ridge_penalty = self.ridge_penalty
+        self.APLRRegressor.mean_bias_correction = self.mean_bias_correction
 
     def fit(
         self,
@@ -465,6 +468,7 @@ def get_params(self, deep=True):
             "penalty_for_interactions": self.penalty_for_interactions,
             "max_terms": self.max_terms,
             "ridge_penalty": self.ridge_penalty,
+            "mean_bias_correction": self.mean_bias_correction,
         }
 
     # For sklearn