@@ -85,11 +85,11 @@ class APLRRegressor
8585 void update_gradient_and_errors ();
8686 void add_new_term (size_t boosting_step);
8787 void prune_terms (size_t boosting_step);
88+ void update_coefficient_steps (size_t boosting_step);
8889 void calculate_and_validate_validation_error (size_t boosting_step);
8990 void calculate_validation_error (size_t boosting_step, const VectorXd &predictions);
9091 void update_term_eligibility ();
9192 void print_summary_after_boosting_step (size_t boosting_step);
92- void update_coefficients_for_all_steps ();
9393 void print_final_summary ();
9494 void find_optimal_m_and_update_model_accordingly ();
9595 void merge_similar_terms ();
@@ -110,7 +110,7 @@ class APLRRegressor
110110 void throw_error_if_response_is_not_greater_than_zero (const VectorXd &y, const std::string &error_message);
111111 void throw_error_if_dispersion_parameter_is_invalid ();
112112 VectorXd differentiate_predictions_wrt_linear_predictor ();
113- void scale_training_observations_if_using_log_link_function ();
113+ void scale_response_if_using_log_link_function ();
114114 void revert_scaling_if_using_log_link_function ();
115115 void cap_predictions_to_minmax_in_training (VectorXd &predictions);
116116 std::string compute_raw_base_term_name (const Term &term, const std::string &X_name);
@@ -252,10 +252,9 @@ void APLRRegressor::fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sa
252252 validate_input_to_fit (X, y, sample_weight, X_names, validation_set_indexes, prioritized_predictors_indexes, monotonic_constraints, group,
253253 interaction_constraints, other_data);
254254 define_training_and_validation_sets (X, y, sample_weight, validation_set_indexes, group, other_data);
255- scale_training_observations_if_using_log_link_function ();
255+ scale_response_if_using_log_link_function ();
256256 initialize (prioritized_predictors_indexes, monotonic_constraints, interaction_constraints);
257257 execute_boosting_steps ();
258- update_coefficients_for_all_steps ();
259258 print_final_summary ();
260259 find_optimal_m_and_update_model_accordingly ();
261260 merge_similar_terms ();
@@ -596,7 +595,7 @@ void APLRRegressor::define_training_and_validation_sets(const MatrixXd &X, const
596595 }
597596}
598597
599- void APLRRegressor::scale_training_observations_if_using_log_link_function ()
598+ void APLRRegressor::scale_response_if_using_log_link_function ()
600599{
601600 if (link_function == " log" )
602601 {
@@ -606,6 +605,7 @@ void APLRRegressor::scale_training_observations_if_using_log_link_function()
606605 {
607606 scaling_factor_for_log_link_function = 1 / inverse_scaling_factor;
608607 y_train *= scaling_factor_for_log_link_function;
608+ y_validation *= scaling_factor_for_log_link_function;
609609 }
610610 else
611611 scaling_factor_for_log_link_function = 1.0 ;
@@ -835,6 +835,7 @@ void APLRRegressor::execute_boosting_step(size_t boosting_step)
835835 consider_interactions (predictor_indexes, boosting_step);
836836 select_the_best_term_and_update_errors (boosting_step);
837837 prune_terms (boosting_step);
838+ update_coefficient_steps (boosting_step);
838839 }
839840 if (abort_boosting)
840841 return ;
@@ -1201,7 +1202,6 @@ void APLRRegressor::update_terms(size_t boosting_step)
12011202 if (term_is_already_in_model)
12021203 {
12031204 terms[j].coefficient += terms_eligible_current[best_term_index].coefficient ;
1204- terms[j].coefficient_steps [boosting_step] = terms[j].coefficient ;
12051205 found = true ;
12061206 break ;
12071207 }
@@ -1216,7 +1216,6 @@ void APLRRegressor::update_terms(size_t boosting_step)
12161216void APLRRegressor::add_new_term (size_t boosting_step)
12171217{
12181218 terms_eligible_current[best_term_index].coefficient_steps = VectorXd::Constant (m, 0 );
1219- terms_eligible_current[best_term_index].coefficient_steps [boosting_step] = terms_eligible_current[best_term_index].coefficient ;
12201219 terms.push_back (Term (terms_eligible_current[best_term_index]));
12211220}
12221221
@@ -1229,6 +1228,11 @@ void APLRRegressor::prune_terms(size_t boosting_step)
12291228 return ;
12301229 }
12311230
1231+ if (verbosity >= 1 )
1232+ {
1233+ std::cout << " \n Pruning terms. This can be computationally intensive especially if the model gets many terms. To speed up the algorithm (potentially at the expense of slightly lower predictiveness) you can disable pruning by setting boosting_steps_before_pruning_is_done to 0.\n\n " ;
1234+ }
1235+
12321236 pruning_was_done_in_the_current_boosting_step = true ;
12331237 double best_error{neg_gradient_nullmodel_errors_sum};
12341238 double new_error;
@@ -1256,36 +1260,41 @@ void APLRRegressor::prune_terms(size_t boosting_step)
12561260 if (removal_of_term_is_better)
12571261 {
12581262 linear_predictor_update = -terms[index_for_term_to_remove].calculate_contribution_to_linear_predictor (X_train);
1263+ linear_predictor_update_validation = -terms[index_for_term_to_remove].calculate_contribution_to_linear_predictor (X_validation);
12591264 terms[index_for_term_to_remove].coefficient = 0.0 ;
12601265 update_linear_predictor_and_predictions ();
12611266 update_gradient_and_errors ();
12621267 update_intercept (boosting_step);
12631268 new_error = neg_gradient_nullmodel_errors_sum;
12641269 best_error = new_error;
12651270 ++terms_pruned;
1271+ if (verbosity >= 2 )
1272+ {
1273+ std::cout << " Pruning. Reset coefficient for " << std::to_string (terms_pruned) << " terms so far.\n " ;
1274+ }
12661275 }
12671276 } while (std::islessequal (new_error, best_error) && terms_pruned < terms.size ());
12681277 if (terms_pruned > 0 )
12691278 {
1270- remove_unused_terms ();
12711279 remove_ineligibility ();
12721280 if (verbosity >= 2 )
12731281 {
1274- std::cout << " Pruned " << std::to_string (terms_pruned) <<" terms.\n " ;
1282+ std::cout << " Done pruning. Reset coefficient for " << std::to_string (terms_pruned) << " terms in total .\n " ;
12751283 }
12761284 }
12771285}
12781286
1279- void APLRRegressor::calculate_and_validate_validation_error (size_t boosting_step)
1287+ void APLRRegressor::update_coefficient_steps (size_t boosting_step)
12801288{
1281- if (link_function == " log " )
1289+ for ( auto &term : terms )
12821290 {
1283- VectorXd rescaled_predictions_current_validation{predictions_current_validation / scaling_factor_for_log_link_function};
1284- calculate_validation_error (boosting_step, rescaled_predictions_current_validation);
1291+ term.coefficient_steps [boosting_step] = term.coefficient ;
12851292 }
1286- else
1287- calculate_validation_error (boosting_step, predictions_current_validation);
1293+ }
12881294
1295+ void APLRRegressor::calculate_and_validate_validation_error (size_t boosting_step)
1296+ {
1297+ calculate_validation_error (boosting_step, predictions_current_validation);
12891298 bool validation_error_is_invalid{!std::isfinite (validation_error_steps[boosting_step])};
12901299 if (validation_error_is_invalid)
12911300 {
@@ -1391,26 +1400,6 @@ void APLRRegressor::print_summary_after_boosting_step(size_t boosting_step)
13911400 }
13921401}
13931402
1394- void APLRRegressor::update_coefficients_for_all_steps ()
1395- {
1396- for (size_t j = 0 ; j < m; ++j)
1397- {
1398- bool fill_down_coefficient_steps{j > 0 && is_approximately_zero (intercept_steps[j]) && !is_approximately_zero (intercept_steps[j - 1 ])};
1399- if (fill_down_coefficient_steps)
1400- intercept_steps[j] = intercept_steps[j - 1 ];
1401- }
1402-
1403- for (size_t i = 0 ; i < terms.size (); ++i)
1404- {
1405- for (size_t j = 0 ; j < m; ++j)
1406- {
1407- bool fill_down_coefficient_steps{j > 0 && is_approximately_zero (terms[i].coefficient_steps [j]) && !is_approximately_zero (terms[i].coefficient_steps [j - 1 ])};
1408- if (fill_down_coefficient_steps)
1409- terms[i].coefficient_steps [j] = terms[i].coefficient_steps [j - 1 ];
1410- }
1411- }
1412- }
1413-
14141403void APLRRegressor::print_final_summary ()
14151404{
14161405 if (verbosity >= 1 )
@@ -1485,6 +1474,7 @@ void APLRRegressor::revert_scaling_if_using_log_link_function()
14851474 if (link_function == " log" )
14861475 {
14871476 y_train /= scaling_factor_for_log_link_function;
1477+ y_validation /= scaling_factor_for_log_link_function;
14881478 intercept += std::log (1 / scaling_factor_for_log_link_function);
14891479 for (Eigen::Index i = 0 ; i < intercept_steps.size (); ++i)
14901480 {
0 commit comments