970

mathias-von-ottenbreit · mathias-von-ottenbreit · commit 8df0e3a0a587 · 2024-04-22T18:56:03.000+02:00
diff --git a/API_REFERENCE_FOR_CLASSIFICATION.md b/API_REFERENCE_FOR_CLASSIFICATION.md
@@ -53,10 +53,10 @@ If validation loss does not improve during the last ***early_stopping_rounds***
 Specifies the number of initial boosting steps that are reserved only for linear effects. 0 means that non-linear effects are allowed from the first boosting step. Reasons for setting this parameter to a higher value than 0 could be to 1) build a more interpretable model with more emphasis on linear effects or 2) build a linear only model by setting ***num_first_steps_with_linear_effects_only*** to no less than ***m***.
 
 #### penalty_for_non_linearity (default = 0.0)
-Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness.
+Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
 
 #### penalty_for_interactions (default = 0.0)
-Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness.
+Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
 
 
 ## Method: fit(X:npt.ArrayLike, y:List[str], sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], interaction_constraints:List[List[int]]=[])
diff --git a/API_REFERENCE_FOR_REGRESSION.md b/API_REFERENCE_FOR_REGRESSION.md
@@ -121,10 +121,10 @@ If validation loss does not improve during the last ***early_stopping_rounds***
 Specifies the number of initial boosting steps that are reserved only for linear effects. 0 means that non-linear effects are allowed from the first boosting step. Reasons for setting this parameter to a higher value than 0 could be to 1) build a more interpretable model with more emphasis on linear effects or 2) build a linear only model by setting ***num_first_steps_with_linear_effects_only*** to no less than ***m***. 
 
 #### penalty_for_non_linearity (default = 0.0)
-Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness.
+Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
 
 #### penalty_for_interactions (default = 0.0)
-Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness.
+Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
 
 
 ## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], group:npt.ArrayLike = np.empty(0), interaction_constraints:List[List[int]]=[], other_data: npt.ArrayLike = np.empty([0, 0]))
diff --git a/cpp/APLRRegressor.h b/cpp/APLRRegressor.h
@@ -86,6 +86,8 @@ class APLRRegressor
     void preprocess_prioritized_predictors_and_interaction_constraints(const MatrixXd &X, const std::vector<size_t> &prioritized_predictors_indexes,
                                                                        const std::vector<std::vector<size_t>> &interaction_constraints);
     void initialize_multithreading();
+    void preprocess_penalties();
+    void preprocess_penalty(double &penalty);
     void fit_model_for_cv_fold(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight,
                                const std::vector<std::string> &X_names, const VectorXi &cv_observations_in_fold,
                                const std::vector<int> &monotonic_constraints, const VectorXi &group, const MatrixXd &other_data,
@@ -334,6 +336,7 @@ void APLRRegressor::fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sa
     MatrixXi cv_observations_used{preprocess_cv_observations(cv_observations, y)};
     preprocess_prioritized_predictors_and_interaction_constraints(X, prioritized_predictors_indexes, interaction_constraints);
     initialize_multithreading();
+    preprocess_penalties();
     cv_fold_models.resize(cv_observations_used.cols());
     for (Eigen::Index i = 0; i < cv_observations_used.cols(); ++i)
     {
@@ -372,6 +375,20 @@ void APLRRegressor::initialize_multithreading()
     omp_set_num_threads(cores_to_use);
 }
 
+void APLRRegressor::preprocess_penalties()
+{
+    preprocess_penalty(penalty_for_non_linearity);
+    preprocess_penalty(penalty_for_interactions);
+}
+
+void APLRRegressor::preprocess_penalty(double &penalty)
+{
+    if (std::isgreater(penalty, 1.0))
+        penalty = 1.0;
+    else if (std::isless(penalty, 0.0))
+        penalty = 0.0;
+}
+
 void APLRRegressor::fit_model_for_cv_fold(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight,
                                           const std::vector<std::string> &X_names, const VectorXi &cv_observations_in_fold,
                                           const std::vector<int> &monotonic_constraints, const VectorXi &group, const MatrixXd &other_data,
@@ -1177,7 +1194,7 @@ size_t APLRRegressor::find_best_term_index(std::vector<Term> &terms, std::vector
 
 void APLRRegressor::consider_interactions(const std::vector<size_t> &available_predictor_indexes, size_t boosting_step)
 {
-    bool consider_interactions{terms.size() > 0 && max_interaction_level > 0 && interactions_eligible < max_interactions && boosting_step >= boosting_steps_before_interactions_are_allowed};
+    bool consider_interactions{terms.size() > 0 && max_interaction_level > 0 && interactions_eligible < max_interactions && boosting_step >= boosting_steps_before_interactions_are_allowed && std::isless(penalty_for_interactions, 1.0)};
     if (consider_interactions)
     {
         determine_interactions_to_consider(available_predictor_indexes);
diff --git a/cpp/term.h b/cpp/term.h
@@ -519,7 +519,8 @@ void Term::estimate_split_point_on_discretized_data()
         error_split_point_nan = split_point_search_errors_sum;
     }
 
-    if (!linear_effects_only_in_this_boosting_step)
+    bool non_linear_effects_are_allowed{!linear_effects_only_in_this_boosting_step && std::isless(penalty_for_non_linearity, 1.0)};
+    if (non_linear_effects_are_allowed)
     {
         double split_point_left{NAN_DOUBLE};
         double error_min_left{error_split_point_nan};
diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@
 
 setuptools.setup(
     name="aplr",
-    version="9.6.0",
+    version="9.7.0",
     description="Automatic Piecewise Linear Regression",
     ext_modules=[sfc_module],
     author="Mathias von Ottenbreit",

Original file line number	Diff line number	Diff line change
`@@ -519,7 +519,8 @@ void Term::estimate_split_point_on_discretized_data()`
`519`	`519`	`error_split_point_nan = split_point_search_errors_sum;`
`520`	`520`	`}`
`521`	`521`
`522`		`- if (!linear_effects_only_in_this_boosting_step)`
	`522`	`+ bool non_linear_effects_are_allowed{!linear_effects_only_in_this_boosting_step && std::isless(penalty_for_non_linearity, 1.0)};`
	`523`	`+ if (non_linear_effects_are_allowed)`
`523`	`524`	`{`
`524`	`525`	`double split_point_left{NAN_DOUBLE};`
`525`	`526`	`double error_min_left{error_split_point_nan};`