Skip to content

Commit 8df0e3a

Browse files
970
1 parent e341ed0 commit 8df0e3a

File tree

5 files changed

+25
-7
lines changed

5 files changed

+25
-7
lines changed

API_REFERENCE_FOR_CLASSIFICATION.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ If validation loss does not improve during the last ***early_stopping_rounds***
5353
Specifies the number of initial boosting steps that are reserved only for linear effects. 0 means that non-linear effects are allowed from the first boosting step. Reasons for setting this parameter to a higher value than 0 could be to 1) build a more interpretable model with more emphasis on linear effects or 2) build a linear only model by setting ***num_first_steps_with_linear_effects_only*** to no less than ***m***.
5454

5555
#### penalty_for_non_linearity (default = 0.0)
56-
Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness.
56+
Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
5757

5858
#### penalty_for_interactions (default = 0.0)
59-
Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness.
59+
Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
6060

6161

6262
## Method: fit(X:npt.ArrayLike, y:List[str], sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], interaction_constraints:List[List[int]]=[])

API_REFERENCE_FOR_REGRESSION.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,10 @@ If validation loss does not improve during the last ***early_stopping_rounds***
121121
Specifies the number of initial boosting steps that are reserved only for linear effects. 0 means that non-linear effects are allowed from the first boosting step. Reasons for setting this parameter to a higher value than 0 could be to 1) build a more interpretable model with more emphasis on linear effects or 2) build a linear only model by setting ***num_first_steps_with_linear_effects_only*** to no less than ***m***.
122122

123123
#### penalty_for_non_linearity (default = 0.0)
124-
Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness.
124+
Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
125125

126126
#### penalty_for_interactions (default = 0.0)
127-
Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness.
127+
Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
128128

129129

130130
## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], group:npt.ArrayLike = np.empty(0), interaction_constraints:List[List[int]]=[], other_data: npt.ArrayLike = np.empty([0, 0]))

cpp/APLRRegressor.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ class APLRRegressor
8686
void preprocess_prioritized_predictors_and_interaction_constraints(const MatrixXd &X, const std::vector<size_t> &prioritized_predictors_indexes,
8787
const std::vector<std::vector<size_t>> &interaction_constraints);
8888
void initialize_multithreading();
89+
void preprocess_penalties();
90+
void preprocess_penalty(double &penalty);
8991
void fit_model_for_cv_fold(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight,
9092
const std::vector<std::string> &X_names, const VectorXi &cv_observations_in_fold,
9193
const std::vector<int> &monotonic_constraints, const VectorXi &group, const MatrixXd &other_data,
@@ -334,6 +336,7 @@ void APLRRegressor::fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sa
334336
MatrixXi cv_observations_used{preprocess_cv_observations(cv_observations, y)};
335337
preprocess_prioritized_predictors_and_interaction_constraints(X, prioritized_predictors_indexes, interaction_constraints);
336338
initialize_multithreading();
339+
preprocess_penalties();
337340
cv_fold_models.resize(cv_observations_used.cols());
338341
for (Eigen::Index i = 0; i < cv_observations_used.cols(); ++i)
339342
{
@@ -372,6 +375,20 @@ void APLRRegressor::initialize_multithreading()
372375
omp_set_num_threads(cores_to_use);
373376
}
374377

378+
void APLRRegressor::preprocess_penalties()
379+
{
380+
preprocess_penalty(penalty_for_non_linearity);
381+
preprocess_penalty(penalty_for_interactions);
382+
}
383+
384+
void APLRRegressor::preprocess_penalty(double &penalty)
385+
{
386+
if (std::isgreater(penalty, 1.0))
387+
penalty = 1.0;
388+
else if (std::isless(penalty, 0.0))
389+
penalty = 0.0;
390+
}
391+
375392
void APLRRegressor::fit_model_for_cv_fold(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight,
376393
const std::vector<std::string> &X_names, const VectorXi &cv_observations_in_fold,
377394
const std::vector<int> &monotonic_constraints, const VectorXi &group, const MatrixXd &other_data,
@@ -1177,7 +1194,7 @@ size_t APLRRegressor::find_best_term_index(std::vector<Term> &terms, std::vector
11771194

11781195
void APLRRegressor::consider_interactions(const std::vector<size_t> &available_predictor_indexes, size_t boosting_step)
11791196
{
1180-
bool consider_interactions{terms.size() > 0 && max_interaction_level > 0 && interactions_eligible < max_interactions && boosting_step >= boosting_steps_before_interactions_are_allowed};
1197+
bool consider_interactions{terms.size() > 0 && max_interaction_level > 0 && interactions_eligible < max_interactions && boosting_step >= boosting_steps_before_interactions_are_allowed && std::isless(penalty_for_interactions, 1.0)};
11811198
if (consider_interactions)
11821199
{
11831200
determine_interactions_to_consider(available_predictor_indexes);

cpp/term.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,8 @@ void Term::estimate_split_point_on_discretized_data()
519519
error_split_point_nan = split_point_search_errors_sum;
520520
}
521521

522-
if (!linear_effects_only_in_this_boosting_step)
522+
bool non_linear_effects_are_allowed{!linear_effects_only_in_this_boosting_step && std::isless(penalty_for_non_linearity, 1.0)};
523+
if (non_linear_effects_are_allowed)
523524
{
524525
double split_point_left{NAN_DOUBLE};
525526
double error_min_left{error_split_point_nan};

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
setuptools.setup(
2323
name="aplr",
24-
version="9.6.0",
24+
version="9.7.0",
2525
description="Automatic Piecewise Linear Regression",
2626
ext_modules=[sfc_module],
2727
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)