@@ -64,8 +64,10 @@ class APLRRegressor
6464 void add_promising_interactions_and_select_the_best_one ();
6565 void update_intercept (size_t boosting_step);
6666 void select_the_best_term_and_update_errors (size_t boosting_step);
67+ void update_terms (size_t boosting_step);
6768 void update_gradient_and_errors ();
6869 void add_new_term (size_t boosting_step);
70+ void calculate_and_validate_validation_error (size_t boosting_step);
6971 void update_term_eligibility ();
7072 void print_summary_after_boosting_step (size_t boosting_step);
7173 void update_coefficients_for_all_steps ();
@@ -505,9 +507,12 @@ void APLRRegressor::execute_boosting_steps()
505507void APLRRegressor::execute_boosting_step (size_t boosting_step)
506508{
507509 update_intercept (boosting_step);
508- find_best_split_for_each_eligible_term ();
509- consider_interactions ();
510- select_the_best_term_and_update_errors (boosting_step);
510+ if (!abort_boosting)
511+ {
512+ find_best_split_for_each_eligible_term ();
513+ consider_interactions ();
514+ select_the_best_term_and_update_errors (boosting_step);
515+ }
511516 if (abort_boosting) return ;
512517 update_term_eligibility ();
513518 print_summary_after_boosting_step (boosting_step);
@@ -522,10 +527,29 @@ void APLRRegressor::update_intercept(size_t boosting_step)
522527 intercept_update=v*(neg_gradient_current.array ()*sample_weight_train.array ()).sum ()/sample_weight_train.array ().sum ();
523528 linear_predictor_update=VectorXd::Constant (neg_gradient_current.size (),intercept_update);
524529 linear_predictor_update_validation=VectorXd::Constant (y_validation.size (),intercept_update);
525- intercept+=intercept_update;
526- intercept_steps[boosting_step]=intercept;
527530 update_linear_predictor_and_predictors ();
528531 update_gradient_and_errors ();
532+ calculate_and_validate_validation_error (boosting_step);
533+ if (!abort_boosting)
534+ {
535+ intercept+=intercept_update;
536+ intercept_steps[boosting_step]=intercept;
537+ }
538+ }
539+
540+ void APLRRegressor::update_linear_predictor_and_predictors ()
541+ {
542+ linear_predictor_current+=linear_predictor_update;
543+ linear_predictor_current_validation+=linear_predictor_update_validation;
544+ predictions_current=transform_linear_predictor_to_predictions (linear_predictor_current,link_function,tweedie_power);
545+ predictions_current_validation=transform_linear_predictor_to_predictions (linear_predictor_current_validation,link_function,tweedie_power);
546+ }
547+
548+ void APLRRegressor::update_gradient_and_errors ()
549+ {
550+ neg_gradient_current=calculate_neg_gradient_current ();
551+ neg_gradient_nullmodel_errors=calculate_errors (neg_gradient_current,linear_predictor_null_model,sample_weight_train);
552+ neg_gradient_nullmodel_errors_sum=calculate_sum_error (neg_gradient_nullmodel_errors);
529553}
530554
531555void APLRRegressor::find_best_split_for_each_eligible_term ()
@@ -745,61 +769,46 @@ void APLRRegressor::select_the_best_term_and_update_errors(size_t boosting_step)
745769 if (no_improvement)
746770 {
747771 abort_boosting=true ;
748- return ;
749772 }
750773 else
751774 {
752775 update_linear_predictor_and_predictors ();
753776 update_gradient_and_errors ();
754-
755- // Has the term been entered into the model before?
756- if (terms.size ()==0 ) // If nothing is in the model add the term
757- add_new_term (boosting_step);
758- else // If at least one term was added before
759- {
760- // Searching in existing terms
761- bool found{false };
762- for (size_t j = 0 ; j < terms.size (); ++j)
763- {
764- if (terms[j]==terms_eligible_current[best_term]) // if term was found, update coefficient and coefficient_steps
765- {
766- terms[j].coefficient +=terms_eligible_current[best_term].coefficient ;
767- terms[j].coefficient_steps [boosting_step]=terms[j].coefficient ;
768- found=true ;
769- break ;
770- }
771- }
772- // term was not in the model and is added to the model
773- if (!found)
774- {
775- add_new_term (boosting_step);
776- }
777- }
778- }
779-
780- validation_error_steps[boosting_step]=calculate_mean_error (calculate_errors (y_validation,predictions_current_validation,sample_weight_validation,family,tweedie_power),sample_weight_validation);
781- bool validation_error_is_invalid{std::isinf (validation_error_steps[boosting_step])};
782- if (validation_error_is_invalid)
783- {
784- abort_boosting=true ;
785- std::string warning_message{" Warning: Encountered numerical problems when calculating prediction errors in the previous boosting step. Not continuing with further boosting steps. One potential reason is if the combination of family and link_function is invalid." };
786- std::cout<<warning_message<<" \n " ;
777+ double backup_of_validation_error{validation_error_steps[boosting_step]};
778+ calculate_and_validate_validation_error (boosting_step);
779+ if (abort_boosting)
780+ validation_error_steps[boosting_step]=backup_of_validation_error;
781+ else
782+ update_terms (boosting_step);
787783 }
788784}
789785
790- void APLRRegressor::update_linear_predictor_and_predictors ( )
786+ void APLRRegressor::update_terms ( size_t boosting_step )
791787{
792- linear_predictor_current+=linear_predictor_update;
793- linear_predictor_current_validation+=linear_predictor_update_validation;
794- predictions_current=transform_linear_predictor_to_predictions (linear_predictor_current,link_function,tweedie_power);
795- predictions_current_validation=transform_linear_predictor_to_predictions (linear_predictor_current_validation,link_function,tweedie_power);
796- }
797-
798- void APLRRegressor::update_gradient_and_errors ()
799- {
800- neg_gradient_current=calculate_neg_gradient_current ();
801- neg_gradient_nullmodel_errors=calculate_errors (neg_gradient_current,linear_predictor_null_model,sample_weight_train);
802- neg_gradient_nullmodel_errors_sum=calculate_sum_error (neg_gradient_nullmodel_errors);
788+ bool no_term_is_in_model{terms.size ()==0 };
789+ if (no_term_is_in_model)
790+ add_new_term (boosting_step);
791+ else
792+ {
793+ // Searching in existing terms
794+ bool found{false };
795+ for (size_t j = 0 ; j < terms.size (); ++j)
796+ {
797+ bool term_is_already_in_model{terms[j]==terms_eligible_current[best_term]};
798+ if (term_is_already_in_model)
799+ {
800+ terms[j].coefficient +=terms_eligible_current[best_term].coefficient ;
801+ terms[j].coefficient_steps [boosting_step]=terms[j].coefficient ;
802+ found=true ;
803+ break ;
804+ }
805+ }
806+ // term was not in the model and is added to the model
807+ if (!found)
808+ {
809+ add_new_term (boosting_step);
810+ }
811+ }
803812}
804813
805814void APLRRegressor::add_new_term (size_t boosting_step)
@@ -812,6 +821,18 @@ void APLRRegressor::add_new_term(size_t boosting_step)
812821 terms.push_back (Term (terms_eligible_current[best_term]));
813822}
814823
824+ void APLRRegressor::calculate_and_validate_validation_error (size_t boosting_step)
825+ {
826+ validation_error_steps[boosting_step]=calculate_mean_error (calculate_errors (y_validation,predictions_current_validation,sample_weight_validation,family,tweedie_power),sample_weight_validation);
827+ bool validation_error_is_invalid{std::isinf (validation_error_steps[boosting_step])};
828+ if (validation_error_is_invalid)
829+ {
830+ abort_boosting=true ;
831+ std::string warning_message{" Warning: Encountered numerical problems when calculating prediction errors in the previous boosting step. Not continuing with further boosting steps. One potential reason is if the combination of family and link_function is invalid." };
832+ std::cout<<warning_message<<" \n " ;
833+ }
834+ }
835+
815836void APLRRegressor::update_term_eligibility ()
816837{
817838 number_of_eligible_terms=terms_eligible_current.size ();
0 commit comments