@@ -46,13 +46,17 @@ class APLRRegressor
4646 double scaling_factor_for_log_link_function;
4747 std::vector<size_t > predictor_indexes;
4848 std::vector<size_t > prioritized_predictors_indexes;
49+ std::vector<int > monotonic_constraints; // Make this VectorXi and validate for nan/inf input
4950
5051 // Methods
51- void validate_input_to_fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t > &validation_set_indexes, const std::vector<size_t > &prioritized_predictors_indexes);
52+ void validate_input_to_fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,
53+ const std::vector<size_t > &validation_set_indexes, const std::vector<size_t > &prioritized_predictors_indexes,
54+ const std::vector<int > &monotonic_constraints);
5255 void throw_error_if_validation_set_indexes_has_invalid_indexes (const VectorXd &y, const std::vector<size_t > &validation_set_indexes);
5356 void throw_error_if_prioritized_predictors_indexes_has_invalid_indexes (const MatrixXd &X, const std::vector<size_t > &prioritized_predictors_indexes);
57+ void throw_error_if_monotonic_constraints_has_invalid_indexes (const MatrixXd &X, const std::vector<int > &monotonic_constraints);
5458 void define_training_and_validation_sets (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight, const std::vector<size_t > &validation_set_indexes);
55- void initialize (const std::vector<size_t > &prioritized_predictors_indexes);
59+ void initialize (const std::vector<size_t > &prioritized_predictors_indexes, const std::vector< int > &monotonic_constraints );
5660 bool check_if_base_term_has_only_one_unique_value (size_t base_term);
5761 void add_term_to_terms_eligible_current (Term &term);
5862 VectorXd calculate_neg_gradient_current ();
@@ -141,7 +145,8 @@ class APLRRegressor
141145 size_t group_size_for_validation_group_mse=100 );
142146 APLRRegressor (const APLRRegressor &other);
143147 ~APLRRegressor ();
144- void fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight=VectorXd(0 ),const std::vector<std::string> &X_names={},const std::vector<size_t > &validation_set_indexes={},const std::vector<size_t > &prioritized_predictors_indexes={});
148+ void fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight=VectorXd(0 ),const std::vector<std::string> &X_names={},const std::vector<size_t > &validation_set_indexes={},
149+ const std::vector<size_t > &prioritized_predictors_indexes={}, const std::vector<int > &monotonic_constraints={});
145150 VectorXd predict (const MatrixXd &X, bool cap_predictions_to_minmax_in_training=true );
146151 void set_term_names (const std::vector<std::string> &X_names);
147152 MatrixXd calculate_local_feature_importance (const MatrixXd &X);
@@ -198,15 +203,17 @@ APLRRegressor::~APLRRegressor()
198203// X_names specifies names for each column in X. If not specified then X1, X2, X3, ... will be used as names for each column in X.
199204// If validation_set_indexes.size()>0 then validation_set_indexes defines which of the indexes in X, y and sample_weight are used to validate,
200205// invalidating validation_ratio. The rest of indexes are used to train.
201- void APLRRegressor::fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,const std::vector<size_t > &validation_set_indexes,const std::vector<size_t > &prioritized_predictors_indexes)
206+ void APLRRegressor::fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,
207+ const std::vector<size_t > &validation_set_indexes,const std::vector<size_t > &prioritized_predictors_indexes,
208+ const std::vector<int > &monotonic_constraints)
202209{
203210 throw_error_if_family_does_not_exist ();
204211 throw_error_if_link_function_does_not_exist ();
205212 throw_error_if_tweedie_power_is_invalid ();
206- validate_input_to_fit (X,y,sample_weight,X_names,validation_set_indexes,prioritized_predictors_indexes);
213+ validate_input_to_fit (X,y,sample_weight,X_names,validation_set_indexes,prioritized_predictors_indexes,monotonic_constraints );
207214 define_training_and_validation_sets (X,y,sample_weight,validation_set_indexes);
208215 scale_training_observations_if_using_log_link_function ();
209- initialize (prioritized_predictors_indexes);
216+ initialize (prioritized_predictors_indexes, monotonic_constraints );
210217 execute_boosting_steps ();
211218 update_coefficients_for_all_steps ();
212219 print_final_summary ();
@@ -258,7 +265,9 @@ void APLRRegressor::throw_error_if_tweedie_power_is_invalid()
258265 throw std::runtime_error (" Tweedie power is invalid. It must not equal 1.0 or 2.0 and cannot be below 1.0." );
259266}
260267
261- void APLRRegressor::validate_input_to_fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t > &validation_set_indexes, const std::vector<size_t > &prioritized_predictors_indexes)
268+ void APLRRegressor::validate_input_to_fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,
269+ const std::vector<std::string> &X_names, const std::vector<size_t > &validation_set_indexes,
270+ const std::vector<size_t > &prioritized_predictors_indexes, const std::vector<int > &monotonic_constraints)
262271{
263272 if (X.rows ()!=y.size ()) throw std::runtime_error (" X and y must have the same number of rows." );
264273 if (X.rows ()<2 ) throw std::runtime_error (" X and y cannot have less than two rows." );
@@ -269,6 +278,7 @@ void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,co
269278 throw_error_if_matrix_has_nan_or_infinite_elements (sample_weight, " sample_weight" );
270279 throw_error_if_validation_set_indexes_has_invalid_indexes (y, validation_set_indexes);
271280 throw_error_if_prioritized_predictors_indexes_has_invalid_indexes (X, prioritized_predictors_indexes);
281+ throw_error_if_monotonic_constraints_has_invalid_indexes (X, monotonic_constraints);
272282 throw_error_if_response_contains_invalid_values (y);
273283}
274284
@@ -296,6 +306,13 @@ void APLRRegressor::throw_error_if_prioritized_predictors_indexes_has_invalid_in
296306 }
297307}
298308
309+ void APLRRegressor::throw_error_if_monotonic_constraints_has_invalid_indexes (const MatrixXd &X, const std::vector<int > &monotonic_constraints)
310+ {
311+ bool error{monotonic_constraints.size ()>0 && monotonic_constraints.size () != X.cols ()};
312+ if (error)
313+ throw std::runtime_error (" monotonic_constraints must either be empty or a vector with one integer for each column in X." );
314+ }
315+
299316void APLRRegressor::throw_error_if_response_contains_invalid_values (const VectorXd &y)
300317{
301318 if (link_function==" logit" || family==" binomial" )
@@ -434,7 +451,7 @@ void APLRRegressor::scale_training_observations_if_using_log_link_function()
434451 }
435452}
436453
437- void APLRRegressor::initialize (const std::vector<size_t > &prioritized_predictors_indexes)
454+ void APLRRegressor::initialize (const std::vector<size_t > &prioritized_predictors_indexes, const std::vector< int > &monotonic_constraints )
438455{
439456 number_of_base_terms=static_cast <size_t >(X_train.cols ());
440457
@@ -463,6 +480,16 @@ void APLRRegressor::initialize(const std::vector<size_t> &prioritized_predictors
463480 }
464481 this ->prioritized_predictors_indexes =prioritized_predictors_indexes;
465482
483+ this ->monotonic_constraints =monotonic_constraints;
484+ bool monotonic_constraints_provided{monotonic_constraints.size ()>0 };
485+ if (monotonic_constraints_provided)
486+ {
487+ for (auto &term_eligible_current:terms_eligible_current)
488+ {
489+ term_eligible_current.set_monotonic_constraint (monotonic_constraints[term_eligible_current.base_term ]);
490+ }
491+ }
492+
466493 linear_predictor_current=VectorXd::Constant (y_train.size (),intercept);
467494 linear_predictor_null_model=linear_predictor_current;
468495 linear_predictor_current_validation=VectorXd::Constant (y_validation.size (),intercept);
@@ -695,6 +722,7 @@ void APLRRegressor::determine_interactions_to_consider(const std::vector<size_t>
695722{
696723 interactions_to_consider=std::vector<Term>();
697724 interactions_to_consider.reserve (static_cast <size_t >(X_train.cols ())*terms.size ());
725+ bool monotonic_constraints_provided{monotonic_constraints.size ()>0 };
698726
699727 VectorXi indexes_for_terms_to_consider_as_interaction_partners{find_indexes_for_terms_to_consider_as_interaction_partners ()};
700728 for (auto &model_term_index:indexes_for_terms_to_consider_as_interaction_partners)
@@ -705,12 +733,18 @@ void APLRRegressor::determine_interactions_to_consider(const std::vector<size_t>
705733 if (term_is_eligible)
706734 {
707735 Term interaction{Term (new_term_index)};
736+ if (monotonic_constraints_provided)
737+ interaction.set_monotonic_constraint (monotonic_constraints[new_term_index]);
708738 Term model_term_without_given_terms{terms[model_term_index]};
709739 model_term_without_given_terms.given_terms .clear ();
710740 model_term_without_given_terms.cleanup_when_this_term_was_added_as_a_given_term ();
711741 Term model_term_with_added_given_term{terms[model_term_index]};
712- model_term_with_added_given_term.given_terms .push_back (model_term_without_given_terms);
742+ bool model_term_without_given_terms_can_be_a_given_term{model_term_without_given_terms.get_monotonic_constraint ()==0 };
743+ if (model_term_without_given_terms_can_be_a_given_term)
744+ model_term_with_added_given_term.given_terms .push_back (model_term_without_given_terms);
713745 add_necessary_given_terms_to_interaction (interaction, model_term_with_added_given_term);
746+ bool interaction_is_invalid{interaction.given_terms .size ()==0 };
747+ if (interaction_is_invalid) continue ;
714748 bool interaction_level_is_too_high{interaction.get_interaction_level ()>max_interaction_level};
715749 if (interaction_level_is_too_high) continue ;
716750 bool interaction_is_already_in_the_model{false };
@@ -836,12 +870,18 @@ void APLRRegressor::find_sorted_indexes_for_errors_for_interactions_to_consider(
836870void APLRRegressor::add_promising_interactions_and_select_the_best_one ()
837871{
838872 size_t best_term_before_interactions{best_term_index};
873+ bool best_term_before_interactions_was_not_selected{best_term_before_interactions==std::numeric_limits<size_t >::max ()};
874+ bool error_is_less_than_for_best_term_before_interactions;
839875 for (size_t j = 0 ; j < static_cast <size_t >(sorted_indexes_of_errors_for_interactions_to_consider.size ()); ++j) // for each interaction to consider starting from lowest to highest error
840876 {
841877 bool allowed_to_add_one_interaction{interactions_eligible<max_interactions};
842878 if (allowed_to_add_one_interaction)
843879 {
844- bool error_is_less_than_for_best_term_before_interactions{std::isless (interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]].split_point_search_errors_sum ,terms_eligible_current[best_term_before_interactions].split_point_search_errors_sum )};
880+ if (best_term_before_interactions_was_not_selected)
881+ error_is_less_than_for_best_term_before_interactions = std::isless (interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]].split_point_search_errors_sum , neg_gradient_nullmodel_errors_sum);
882+ else
883+ error_is_less_than_for_best_term_before_interactions = std::isless (interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]].split_point_search_errors_sum , terms_eligible_current[best_term_before_interactions].split_point_search_errors_sum );
884+
845885 if (error_is_less_than_for_best_term_before_interactions)
846886 {
847887 add_term_to_terms_eligible_current (interactions_to_consider[sorted_indexes_of_errors_for_interactions_to_consider[j]]);
@@ -1201,6 +1241,7 @@ void APLRRegressor::cleanup_after_fit()
12011241 }
12021242 predictor_indexes.clear ();
12031243 prioritized_predictors_indexes.clear ();
1244+ monotonic_constraints.clear ();
12041245}
12051246
12061247VectorXd APLRRegressor::predict (const MatrixXd &X, bool cap_predictions_to_minmax_in_training)
0 commit comments