@@ -551,7 +551,7 @@ void APLRRegressor::throw_error_if_response_contains_invalid_values(const Vector
551551        std::string error_message{" Response values for the logit link function or binomial loss_function cannot be less than zero or greater than one."  };
552552        throw_error_if_response_is_not_between_0_and_1 (y, error_message);
553553    }
554-     else  if  (loss_function == " gamma"   || (loss_function == " tweedie"   && std::isgreater (dispersion_parameter, 2 )))
554+     else  if  (loss_function == " gamma"   || (loss_function == " tweedie"   && std::isgreater (dispersion_parameter, 2.0  )))
555555    {
556556        std::string error_message;
557557        if  (loss_function == " tweedie"  )
@@ -560,7 +560,7 @@ void APLRRegressor::throw_error_if_response_contains_invalid_values(const Vector
560560            error_message = " Response values for the "   + loss_function + "  loss_function must be greater than zero."  ;
561561        throw_error_if_vector_contains_non_positive_values (y, error_message);
562562    }
563-     else  if  (link_function == " log"   || loss_function == " poisson"   || loss_function == " negative_binomial"   || loss_function == " weibull"   || (loss_function == " tweedie"   && std::isless (dispersion_parameter, 2 ) && std::isgreater (dispersion_parameter, 1 )))
563+     else  if  (link_function == " log"   || loss_function == " poisson"   || loss_function == " negative_binomial"   || loss_function == " weibull"   || (loss_function == " tweedie"   && std::isless (dispersion_parameter, 2.0  ) && std::isgreater (dispersion_parameter, 1.0  )))
564564    {
565565        std::string error_message{" Response values for the log link function or poisson loss_function or negative binomial loss function or weibull loss function or tweedie loss_function when dispersion_parameter<2 cannot be less than zero."  };
566566        throw_error_if_vector_contains_negative_values (y, error_message);
@@ -569,7 +569,7 @@ void APLRRegressor::throw_error_if_response_contains_invalid_values(const Vector
569569    {
570570        std::string error_message{" Response values cannot be negative when using the negative_gini validation_tuning_metric."  };
571571        throw_error_if_vector_contains_negative_values (y, error_message);
572-         bool  sum_is_zero{y.sum () ==  0 };
572+         bool  sum_is_zero{is_approximately_zero ( y.sum ()) };
573573        if  (sum_is_zero)
574574            throw  std::runtime_error (" Response values cannot sum to zero when using the negative_gini validation_tuning_metric."  );
575575    }
@@ -687,7 +687,10 @@ void APLRRegressor::define_training_and_validation_sets(const MatrixXd &X, const
687687        {
688688            sample_weight_train[i] = sample_weight[train_indexes[i]];
689689        }
690+         sample_weight_train /= sample_weight_train.mean ();
690691    }
692+     else 
693+         sample_weight_train = VectorXd::Constant (y_train.rows (), 1.0 );
691694    bool  groups_are_provided{group.size () > 0 };
692695    if  (groups_are_provided)
693696    {
@@ -720,7 +723,10 @@ void APLRRegressor::define_training_and_validation_sets(const MatrixXd &X, const
720723        {
721724            sample_weight_validation[i] = sample_weight[validation_indexes[i]];
722725        }
726+         sample_weight_validation /= sample_weight_validation.mean ();
723727    }
728+     else 
729+         sample_weight_validation = VectorXd::Constant (y_validation.rows (), 1.0 );
724730    if  (groups_are_provided)
725731    {
726732        group_validation.resize (validation_indexes.size ());
@@ -937,7 +943,7 @@ VectorXd APLRRegressor::calculate_neg_gradient_current()
937943        output = (y_train.array () - predictions_current.array ()).sign () * mae;
938944        for  (Eigen::Index i = 0 ; i < y_train.size (); ++i)
939945        {
940-             if  (y_train[i] <  predictions_current[i])
946+             if  (std::isless ( y_train[i],  predictions_current[i]) )
941947                output[i] *= 1  - quantile;
942948            else 
943949                output[i] *= quantile;
@@ -984,20 +990,9 @@ VectorXd APLRRegressor::calculate_neg_gradient_current_for_group_mse(GroupData &
984990    }
985991
986992    VectorXd output{VectorXd (y_train.rows ())};
987-     bool  sample_weight_is_provided{sample_weight_train.size () > 0 };
988-     if  (sample_weight_is_provided)
993+     for  (Eigen::Index i = 0 ; i < y_train.size (); ++i)
989994    {
990-         for  (Eigen::Index i = 0 ; i < y_train.size (); ++i)
991-         {
992-             output[i] = group_residuals_and_count.error [group[i]] * sample_weight_train[i];
993-         }
994-     }
995-     else 
996-     {
997-         for  (Eigen::Index i = 0 ; i < y_train.size (); ++i)
998-         {
999-             output[i] = group_residuals_and_count.error [group[i]];
1000-         }
995+         output[i] = group_residuals_and_count.error [group[i]] * sample_weight_train[i];
1001996    }
1002997
1003998    return  output;
@@ -1093,10 +1088,7 @@ void APLRRegressor::execute_boosting_step(size_t boosting_step, Eigen::Index fol
10931088void  APLRRegressor::update_intercept (size_t  boosting_step)
10941089{
10951090    double  intercept_update;
1096-     if  (sample_weight_train.size () == 0 )
1097-         intercept_update = v * neg_gradient_current.mean ();
1098-     else 
1099-         intercept_update = v * (neg_gradient_current.array () * sample_weight_train.array ()).sum () / sample_weight_train.array ().sum ();
1091+     intercept_update = v * (neg_gradient_current.array () * sample_weight_train.array ()).sum () / sample_weight_train.array ().sum ();
11001092    if  (model_has_changed_in_this_boosting_step == false )
11011093        model_has_changed_in_this_boosting_step = !is_approximately_equal (intercept_update, 0.0 );
11021094    linear_predictor_update = VectorXd::Constant (neg_gradient_current.size (), intercept_update);
@@ -1630,7 +1622,7 @@ void APLRRegressor::merge_similar_terms(const MatrixXd &X)
16301622                {
16311623                    VectorXd values_i{terms[i].calculate (X)};
16321624                    VectorXd values_j{terms[j].calculate (X)};
1633-                     bool  terms_are_similar{values_i ==  values_j};
1625+                     bool  terms_are_similar{all_are_equal ( values_i,  values_j) };
16341626                    if  (terms_are_similar)
16351627                    {
16361628                        if  (terms[i].get_interaction_level () > terms[j].get_interaction_level ())
@@ -1744,7 +1736,7 @@ std::string APLRRegressor::compute_raw_base_term_name(const Term &term, const st
17441736    {
17451737        double  temp_split_point{term.split_point };
17461738        std::string sign{" -"  };
1747-         if  (std::isless (temp_split_point, 0 ))
1739+         if  (std::isless (temp_split_point, 0.0  ))
17481740        {
17491741            temp_split_point = -temp_split_point;
17501742            sign = " +"  ;
@@ -1839,15 +1831,7 @@ void APLRRegressor::write_output_to_cv_fold_models(Eigen::Index fold_index)
18391831    cv_fold_models[fold_index].fold_index  = fold_index;
18401832    cv_fold_models[fold_index].min_training_prediction_or_response  = min_training_prediction_or_response;
18411833    cv_fold_models[fold_index].max_training_prediction_or_response  = max_training_prediction_or_response;
1842-     bool  sample_weight_is_provided{sample_weight_train.size () > 0 };
1843-     if  (sample_weight_is_provided)
1844-     {
1845-         cv_fold_models[fold_index].sample_weight_train_sum  = sample_weight_train.sum ();
1846-     }
1847-     else 
1848-     {
1849-         cv_fold_models[fold_index].sample_weight_train_sum  = static_cast <double >(y_train.rows ());
1850-     }
1834+     cv_fold_models[fold_index].sample_weight_train_sum  = sample_weight_train.sum ();
18511835}
18521836
18531837void  APLRRegressor::cleanup_after_fit ()
@@ -1989,7 +1973,7 @@ void APLRRegressor::sort_terms()
19891973{
19901974    std::sort (terms.begin (), terms.end (),
19911975              [](const  Term &a, const  Term &b)
1992-               { return  a.estimated_term_importance  >  b.estimated_term_importance  ||
1976+               { return  std::isgreater ( a.estimated_term_importance ,  b.estimated_term_importance )  ||
19931977                       (is_approximately_equal (a.estimated_term_importance , b.estimated_term_importance ) && (a.base_term  < b.base_term )) ||
19941978                       (is_approximately_equal (a.estimated_term_importance , b.estimated_term_importance ) && (a.base_term  == b.base_term ) &&
19951979                        std::isless (a.coefficient , b.coefficient )); });
0 commit comments