Skip to content

Commit fc92b0a

Browse files
adding possibility to impose monotonic constraints
1 parent 03b3ac4 commit fc92b0a

File tree

7 files changed

+176
-27
lines changed

7 files changed

+176
-27
lines changed

API_REFERENCE.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Species the variance power for the "tweedie" ***family***.
5656
APLR calculates a tuning metric, mean squared error for groups of observations in the validation set. This metric is provided by the method ***get_validation_group_mse()***. The metric may be useful for tuning ***tweedie_power*** and to some extent ***family*** or ***link_function***. The reasoning behind this is that mean squared error (MSE) is often appropriate for evaluating goodness of fit on approximately normally distributed data. The mean of a group of observations is approximately normally distributed according to the Central Limit Theorem (CLT) if there are enough observations in the group, regardless of how individual observations are distributed. Ideally, ***group_size_for_validation_group_mse*** should be large enough so that the Central Limit Theorem holds (at least 30, but the default of 100 is a safer choice). Also, the number of observations in the validation set should be substantially higher than ***group_size_for_validation_group_mse***.
5757

5858

59-
## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], validation_set_indexes:List[int]=[], prioritized_predictors_indexes:List[int]=[])
59+
## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], validation_set_indexes:List[int]=[], prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[])
6060

6161
***This method fits the model to data.***
6262

@@ -80,6 +80,9 @@ An optional list of integers specifying the indexes of observations to be used f
8080
#### prioritized_predictors_indexes
8181
An optional list of integers specifying the indexes of predictors (columns) in ***X*** that should be prioritized. Terms of the prioritized predictors will enter the model as long as they reduce the training error and do not contain too few effective observations. They will also be updated more often.
8282

83+
#### monotonic_constraints
84+
An optional list of integers specifying monotonic constraints on model terms. For example, if there are three predictors in ***X***, then monotonic_constraints = [1,0,-1] means that 1) the first predictor in ***X*** cannot be used in interaction terms and all terms using the first predictor in ***X*** as a main effect must have positive regression coefficients, 2) there are no monotonic constraints on terms using the second predictor in ***X***, and 3) the third predictor in ***X*** cannot be used in interaction terms and all terms using the third predictor in ***X*** as a main effect must have negative regression coefficients.
85+
8386

8487
## Method: predict(X:npt.ArrayLike, cap_predictions_to_minmax_in_training:bool=True)
8588

aplr/aplr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def __set_params_cpp(self):
4848
self.APLRRegressor.tweedie_power=self.tweedie_power
4949
self.APLRRegressor.group_size_for_validation_group_mse=self.group_size_for_validation_group_mse
5050

51-
def fit(self, X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], validation_set_indexes:List[int]=[], prioritized_predictors_indexes:List[int]=[]):
51+
def fit(self, X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], validation_set_indexes:List[int]=[], prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[]):
5252
self.__set_params_cpp()
53-
self.APLRRegressor.fit(X,y,sample_weight,X_names,validation_set_indexes,prioritized_predictors_indexes)
53+
self.APLRRegressor.fit(X,y,sample_weight,X_names,validation_set_indexes,prioritized_predictors_indexes,monotonic_constraints)
5454

5555
def predict(self, X:npt.ArrayLike, cap_predictions_to_minmax_in_training:bool=True)->npt.ArrayLike:
5656
return self.APLRRegressor.predict(X, cap_predictions_to_minmax_in_training)

cpp/APLRRegressor.h

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,17 @@ class APLRRegressor
4646
double scaling_factor_for_log_link_function;
4747
std::vector<size_t> predictor_indexes;
4848
std::vector<size_t> prioritized_predictors_indexes;
49+
std::vector<int> monotonic_constraints; //Make this VectorXi and validate for nan/inf input
4950

5051
//Methods
51-
void validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes, const std::vector<size_t> &prioritized_predictors_indexes);
52+
void validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,
53+
const std::vector<size_t> &validation_set_indexes, const std::vector<size_t> &prioritized_predictors_indexes,
54+
const std::vector<int> &monotonic_constraints);
5255
void throw_error_if_validation_set_indexes_has_invalid_indexes(const VectorXd &y, const std::vector<size_t> &validation_set_indexes);
5356
void throw_error_if_prioritized_predictors_indexes_has_invalid_indexes(const MatrixXd &X, const std::vector<size_t> &prioritized_predictors_indexes);
57+
void throw_error_if_monotonic_constraints_has_invalid_indexes(const MatrixXd &X, const std::vector<int> &monotonic_constraints);
5458
void define_training_and_validation_sets(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight, const std::vector<size_t> &validation_set_indexes);
55-
void initialize(const std::vector<size_t> &prioritized_predictors_indexes);
59+
void initialize(const std::vector<size_t> &prioritized_predictors_indexes, const std::vector<int> &monotonic_constraints);
5660
bool check_if_base_term_has_only_one_unique_value(size_t base_term);
5761
void add_term_to_terms_eligible_current(Term &term);
5862
VectorXd calculate_neg_gradient_current();
@@ -141,7 +145,8 @@ class APLRRegressor
141145
size_t group_size_for_validation_group_mse=100);
142146
APLRRegressor(const APLRRegressor &other);
143147
~APLRRegressor();
144-
void fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight=VectorXd(0),const std::vector<std::string> &X_names={},const std::vector<size_t> &validation_set_indexes={},const std::vector<size_t> &prioritized_predictors_indexes={});
148+
void fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight=VectorXd(0),const std::vector<std::string> &X_names={},const std::vector<size_t> &validation_set_indexes={},
149+
const std::vector<size_t> &prioritized_predictors_indexes={}, const std::vector<int> &monotonic_constraints={});
145150
VectorXd predict(const MatrixXd &X, bool cap_predictions_to_minmax_in_training=true);
146151
void set_term_names(const std::vector<std::string> &X_names);
147152
MatrixXd calculate_local_feature_importance(const MatrixXd &X);
@@ -198,15 +203,17 @@ APLRRegressor::~APLRRegressor()
198203
//X_names specifies names for each column in X. If not specified then X1, X2, X3, ... will be used as names for each column in X.
199204
//If validation_set_indexes.size()>0 then validation_set_indexes defines which of the indexes in X, y and sample_weight are used to validate,
200205
//invalidating validation_ratio. The rest of indexes are used to train.
201-
void APLRRegressor::fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,const std::vector<size_t> &validation_set_indexes,const std::vector<size_t> &prioritized_predictors_indexes)
206+
void APLRRegressor::fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,
207+
const std::vector<size_t> &validation_set_indexes,const std::vector<size_t> &prioritized_predictors_indexes,
208+
const std::vector<int> &monotonic_constraints)
202209
{
203210
throw_error_if_family_does_not_exist();
204211
throw_error_if_link_function_does_not_exist();
205212
throw_error_if_tweedie_power_is_invalid();
206-
validate_input_to_fit(X,y,sample_weight,X_names,validation_set_indexes,prioritized_predictors_indexes);
213+
validate_input_to_fit(X,y,sample_weight,X_names,validation_set_indexes,prioritized_predictors_indexes,monotonic_constraints);
207214
define_training_and_validation_sets(X,y,sample_weight,validation_set_indexes);
208215
scale_training_observations_if_using_log_link_function();
209-
initialize(prioritized_predictors_indexes);
216+
initialize(prioritized_predictors_indexes, monotonic_constraints);
210217
execute_boosting_steps();
211218
update_coefficients_for_all_steps();
212219
print_final_summary();
@@ -258,7 +265,9 @@ void APLRRegressor::throw_error_if_tweedie_power_is_invalid()
258265
throw std::runtime_error("Tweedie power is invalid. It must not equal 1.0 or 2.0 and cannot be below 1.0.");
259266
}
260267

261-
void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes, const std::vector<size_t> &prioritized_predictors_indexes)
268+
void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,
269+
const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes,
270+
const std::vector<size_t> &prioritized_predictors_indexes, const std::vector<int> &monotonic_constraints)
262271
{
263272
if(X.rows()!=y.size()) throw std::runtime_error("X and y must have the same number of rows.");
264273
if(X.rows()<2) throw std::runtime_error("X and y cannot have less than two rows.");
@@ -269,6 +278,7 @@ void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,co
269278
throw_error_if_matrix_has_nan_or_infinite_elements(sample_weight, "sample_weight");
270279
throw_error_if_validation_set_indexes_has_invalid_indexes(y, validation_set_indexes);
271280
throw_error_if_prioritized_predictors_indexes_has_invalid_indexes(X, prioritized_predictors_indexes);
281+
throw_error_if_monotonic_constraints_has_invalid_indexes(X, monotonic_constraints);
272282
throw_error_if_response_contains_invalid_values(y);
273283
}
274284

@@ -296,6 +306,13 @@ void APLRRegressor::throw_error_if_prioritized_predictors_indexes_has_invalid_in
296306
}
297307
}
298308

309+
void APLRRegressor::throw_error_if_monotonic_constraints_has_invalid_indexes(const MatrixXd &X, const std::vector<int> &monotonic_constraints)
310+
{
311+
bool error{monotonic_constraints.size()>0 && monotonic_constraints.size() != X.cols()};
312+
if(error)
313+
throw std::runtime_error("monotonic_constraints must either be empty or a vector with one integer for each column in X.");
314+
}
315+
299316
void APLRRegressor::throw_error_if_response_contains_invalid_values(const VectorXd &y)
300317
{
301318
if(link_function=="logit" || family=="binomial")
@@ -434,7 +451,7 @@ void APLRRegressor::scale_training_observations_if_using_log_link_function()
434451
}
435452
}
436453

437-
void APLRRegressor::initialize(const std::vector<size_t> &prioritized_predictors_indexes)
454+
void APLRRegressor::initialize(const std::vector<size_t> &prioritized_predictors_indexes, const std::vector<int> &monotonic_constraints)
438455
{
439456
number_of_base_terms=static_cast<size_t>(X_train.cols());
440457

@@ -463,6 +480,16 @@ void APLRRegressor::initialize(const std::vector<size_t> &prioritized_predictors
463480
}
464481
this->prioritized_predictors_indexes=prioritized_predictors_indexes;
465482

483+
this->monotonic_constraints=monotonic_constraints;
484+
bool monotonic_constraints_provided{monotonic_constraints.size()>0};
485+
if(monotonic_constraints_provided)
486+
{
487+
for(auto &term_eligible_current:terms_eligible_current)
488+
{
489+
term_eligible_current.set_monotonic_constraint(monotonic_constraints[term_eligible_current.base_term]);
490+
}
491+
}
492+
466493
linear_predictor_current=VectorXd::Constant(y_train.size(),intercept);
467494
linear_predictor_null_model=linear_predictor_current;
468495
linear_predictor_current_validation=VectorXd::Constant(y_validation.size(),intercept);
@@ -695,6 +722,7 @@ void APLRRegressor::determine_interactions_to_consider(const std::vector<size_t>
695722
{
696723
interactions_to_consider=std::vector<Term>();
697724
interactions_to_consider.reserve(static_cast<size_t>(X_train.cols())*terms.size());
725+
bool monotonic_constraints_provided{monotonic_constraints.size()>0};
698726

699727
VectorXi indexes_for_terms_to_consider_as_interaction_partners{find_indexes_for_terms_to_consider_as_interaction_partners()};
700728
for (auto &model_term_index:indexes_for_terms_to_consider_as_interaction_partners)
@@ -705,12 +733,18 @@ void APLRRegressor::determine_interactions_to_consider(const std::vector<size_t>
705733
if(term_is_eligible)
706734
{
707735
Term interaction{Term(new_term_index)};
736+
if(monotonic_constraints_provided)
737+
interaction.set_monotonic_constraint(monotonic_constraints[new_term_index]);
708738
Term model_term_without_given_terms{terms[model_term_index]};
709739
model_term_without_given_terms.given_terms.clear();
710740
model_term_without_given_terms.cleanup_when_this_term_was_added_as_a_given_term();
711741
Term model_term_with_added_given_term{terms[model_term_index]};
712-
model_term_with_added_given_term.given_terms.push_back(model_term_without_given_terms);
742+
bool model_term_without_given_terms_can_be_a_given_term{model_term_without_given_terms.get_monotonic_constraint()==0};
743+
if(model_term_without_given_terms_can_be_a_given_term)
744+
model_term_with_added_given_term.given_terms.push_back(model_term_without_given_terms);
713745
add_necessary_given_terms_to_interaction(interaction, model_term_with_added_given_term);
746+
bool interaction_is_invalid{interaction.given_terms.size()==0};
747+
if(interaction_is_invalid) continue;
714748
bool interaction_level_is_too_high{interaction.get_interaction_level()>max_interaction_level};
715749
if(interaction_level_is_too_high) continue;
716750
bool interaction_is_already_in_the_model{false};
@@ -836,12 +870,18 @@ void APLRRegressor::find_sorted_indexes_for_errors_for_interactions_to_consider(
836870
void APLRRegressor::add_promising_interactions_and_select_the_best_one()
837871
{
838872
size_t best_term_before_interactions{best_term_index};
873+
bool best_term_before_interactions_was_not_selected{best_term_before_interactions==std::numeric_limits<size_t>::max()};
874+
bool error_is_less_than_for_best_term_before_interactions;
839875
for (size_t j = 0; j < static_cast<size_t>(sorted_indexes_of_errors_for_interactions_to_consider.size()); ++j) //for each interaction to consider starting from lowest to highest error
840876
{
841877
bool allowed_to_add_one_interaction{interactions_eligible<max_interactions};
842878
if(allowed_to_add_one_interaction)
843879
{
844-
bool error_is_less_than_for_best_term_before_interactions{std::isless(interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]].split_point_search_errors_sum,terms_eligible_current[best_term_before_interactions].split_point_search_errors_sum)};
880+
if(best_term_before_interactions_was_not_selected)
881+
error_is_less_than_for_best_term_before_interactions = std::isless(interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]].split_point_search_errors_sum, neg_gradient_nullmodel_errors_sum);
882+
else
883+
error_is_less_than_for_best_term_before_interactions = std::isless(interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]].split_point_search_errors_sum, terms_eligible_current[best_term_before_interactions].split_point_search_errors_sum);
884+
845885
if(error_is_less_than_for_best_term_before_interactions)
846886
{
847887
add_term_to_terms_eligible_current(interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]]);
@@ -1201,6 +1241,7 @@ void APLRRegressor::cleanup_after_fit()
12011241
}
12021242
predictor_indexes.clear();
12031243
prioritized_predictors_indexes.clear();
1244+
monotonic_constraints.clear();
12041245
}
12051246

12061247
VectorXd APLRRegressor::predict(const MatrixXd &X, bool cap_predictions_to_minmax_in_training)

cpp/pythonbinding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ PYBIND11_MODULE(aplr_cpp, m) {
2121
py::arg("group_size_for_validation_group_mse")=100)
2222
.def("fit", &APLRRegressor::fit,py::arg("X"),py::arg("y"),py::arg("sample_weight")=VectorXd(0),py::arg("X_names")=std::vector<std::string>(),
2323
py::arg("validation_set_indexes")=std::vector<size_t>(),py::arg("prioritized_predictors_indexes")=std::vector<size_t>(),
24-
py::call_guard<py::scoped_ostream_redirect,py::scoped_estream_redirect>())
24+
py::arg("monotonic_constraints")=std::vector<int>(),py::call_guard<py::scoped_ostream_redirect,py::scoped_estream_redirect>())
2525
.def("predict", &APLRRegressor::predict,py::arg("X"),py::arg("bool cap_predictions_to_minmax_in_training")=true)
2626
.def("set_term_names", &APLRRegressor::set_term_names,py::arg("X_names"))
2727
.def("calculate_local_feature_importance",&APLRRegressor::calculate_local_feature_importance,py::arg("X"))

0 commit comments

Comments
 (0)