removing intercept_steps

mathias-von-ottenbreit · mathias-von-ottenbreit · commit a5c6c2c40a1e · 2023-01-06T08:15:13.000+01:00
diff --git a/API_REFERENCE.md b/API_REFERENCE.md
@@ -166,11 +166,6 @@ The index of the term selected. So ***0*** is the first term, ***1*** is the sec
 ***Returns the regression coefficient of the intercept term.***
 
 
-## Method: get_intercept_steps()
-
-***Returns a numpy vector containing the regression coefficients of the intercept term by boosting step.***
-
-
 ## Method: get_m()
 
 ***Returns the number of boosting steps in the model (the value that minimized validation error).***
diff --git a/aplr/aplr.py b/aplr/aplr.py
@@ -85,9 +85,6 @@ def get_feature_importance(self)->npt.ArrayLike:
     def get_intercept(self)->float:
         return self.APLRRegressor.get_intercept()
 
-    def get_intercept_steps(self)->npt.ArrayLike:
-        return self.APLRRegressor.get_intercept_steps()
-
     def get_m(self)->int:
         return self.APLRRegressor.get_m()
 
diff --git a/cpp/APLRRegressor.h b/cpp/APLRRegressor.h
@@ -109,7 +109,6 @@ class APLRRegressor
     std::vector<std::string> term_names;
     VectorXd term_coefficients;
     size_t max_interaction_level;
-    VectorXd intercept_steps;
     size_t max_interactions; //max interactions allowed to add (counted in interactions_eligible)
     size_t interactions_eligible; //interactions that were eligible when training the model
     VectorXd validation_error_steps; //validation error for each boosting step
@@ -146,7 +145,6 @@ class APLRRegressor
     VectorXd get_validation_error_steps();
     VectorXd get_feature_importance();
     double get_intercept();
-    VectorXd get_intercept_steps();
     size_t get_m();
     double get_validation_group_mse();
 };
@@ -159,7 +157,7 @@ APLRRegressor::APLRRegressor(size_t m,double v,uint_fast32_t random_state,std::s
         reserved_terms_times_num_x{reserved_terms_times_num_x},intercept{intercept},m{m},v{v},
         family{family},link_function{link_function},validation_ratio{validation_ratio},n_jobs{n_jobs},random_state{random_state},
         bins{bins},verbosity{verbosity},max_interaction_level{max_interaction_level},
-        intercept_steps{VectorXd(0)},max_interactions{max_interactions},interactions_eligible{0},validation_error_steps{VectorXd(0)},
+        max_interactions{max_interactions},interactions_eligible{0},validation_error_steps{VectorXd(0)},
         min_observations_in_split{min_observations_in_split},ineligible_boosting_steps_added{ineligible_boosting_steps_added},
         max_eligible_terms{max_eligible_terms},number_of_base_terms{0},tweedie_power{tweedie_power},min_training_prediction_or_response{NAN_DOUBLE},
         max_training_prediction_or_response{NAN_DOUBLE},validation_group_mse{NAN_DOUBLE},group_size_for_validation_group_mse{group_size_for_validation_group_mse}
@@ -172,8 +170,8 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other):
     family{other.family},link_function{other.link_function},validation_ratio{other.validation_ratio},
     n_jobs{other.n_jobs},random_state{other.random_state},bins{other.bins},
     verbosity{other.verbosity},term_names{other.term_names},term_coefficients{other.term_coefficients},
-    max_interaction_level{other.max_interaction_level},intercept_steps{other.intercept_steps},
-    max_interactions{other.max_interactions},interactions_eligible{other.interactions_eligible},validation_error_steps{other.validation_error_steps},
+    max_interaction_level{other.max_interaction_level},max_interactions{other.max_interactions},
+    interactions_eligible{other.interactions_eligible},validation_error_steps{other.validation_error_steps},
     min_observations_in_split{other.min_observations_in_split},ineligible_boosting_steps_added{other.ineligible_boosting_steps_added},
     max_eligible_terms{other.max_eligible_terms},number_of_base_terms{other.number_of_base_terms},
     feature_importance{other.feature_importance},tweedie_power{other.tweedie_power},min_training_prediction_or_response{other.min_training_prediction_or_response},
@@ -420,7 +418,6 @@ void APLRRegressor::initialize()
     terms.clear();
 
     intercept=0;
-    intercept_steps=VectorXd::Constant(m,0);
 
     terms_eligible_current.reserve(X_train.cols()*reserved_terms_times_num_x);
     for (size_t i = 0; i < static_cast<size_t>(X_train.cols()); ++i)
@@ -519,7 +516,6 @@ void APLRRegressor::update_intercept()
         intercept=neg_gradient_current.mean();
     else
         intercept=(neg_gradient_current.array()*sample_weight_train.array()).sum()/sample_weight_train.array().sum();
-    intercept_steps=VectorXd::Constant(m,intercept);
     linear_predictor_update=VectorXd::Constant(neg_gradient_current.size(),intercept);
     linear_predictor_update_validation=VectorXd::Constant(y_validation.size(),intercept);
     update_linear_predictor_and_predictors();
@@ -886,13 +882,6 @@ void APLRRegressor::print_summary_after_boosting_step(size_t boosting_step)
 
 void APLRRegressor::update_coefficients_for_all_steps()
 {
-    //Filling down coefficient_steps for the intercept
-    for (size_t j = 0; j < m; ++j) //For each boosting step
-    {
-        if(j>0 && is_approximately_zero(intercept_steps[j]) && !is_approximately_zero(intercept_steps[j-1]))
-            intercept_steps[j]=intercept_steps[j-1];
-    }
-    //Filling down coefficient_steps for each term in the model
     for (size_t i = 0; i < terms.size(); ++i) //For each term
     {
         for (size_t j = 0; j < m; ++j) //For each boosting step
@@ -916,7 +905,6 @@ void APLRRegressor::find_optimal_m_and_update_model_accordingly()
     //Choosing optimal m and updating coefficients
     size_t best_boosting_step_index;
     validation_error_steps.minCoeff(&best_boosting_step_index); //boosting step with lowest error
-    intercept=intercept_steps[best_boosting_step_index];
     for (size_t i = 0; i < terms.size(); ++i) //for each term set coefficient
     {
         terms[i].coefficient=terms[i].coefficient_steps[best_boosting_step_index];
@@ -1194,11 +1182,6 @@ double APLRRegressor::get_intercept()
     return intercept;
 }
 
-VectorXd APLRRegressor::get_intercept_steps()
-{
-    return intercept_steps;
-}
-
 size_t APLRRegressor::get_m()
 {
     return m;
diff --git a/cpp/pythonbinding.cpp b/cpp/pythonbinding.cpp
@@ -32,11 +32,9 @@ PYBIND11_MODULE(aplr_cpp, m) {
         .def("get_validation_error_steps", &APLRRegressor::get_validation_error_steps)
         .def("get_feature_importance", &APLRRegressor::get_feature_importance)
         .def("get_intercept", &APLRRegressor::get_intercept)
-        .def("get_intercept_steps", &APLRRegressor::get_intercept_steps)
         .def("get_m", &APLRRegressor::get_m)
         .def("get_validation_group_mse", &APLRRegressor::get_validation_group_mse)
         .def_readwrite("intercept", &APLRRegressor::intercept)
-        .def_readwrite("intercept_steps", &APLRRegressor::intercept_steps)
         .def_readwrite("m", &APLRRegressor::m)
         .def_readwrite("v", &APLRRegressor::v)
         .def_readwrite("max_interaction_level", &APLRRegressor::max_interaction_level)
@@ -67,35 +65,34 @@ PYBIND11_MODULE(aplr_cpp, m) {
             [](const APLRRegressor &a) { // __getstate__
                 /* Return a tuple that fully encodes the state of the object */
                 return py::make_tuple(a.m,a.v,a.random_state,a.family,a.n_jobs,a.validation_ratio,a.intercept,a.bins,a.verbosity,
-                    a.max_interaction_level,a.max_interactions,a.validation_error_steps,a.term_names,a.term_coefficients,a.terms,a.intercept_steps,
+                    a.max_interaction_level,a.max_interactions,a.validation_error_steps,a.term_names,a.term_coefficients,a.terms,
                     a.interactions_eligible,a.min_observations_in_split,a.ineligible_boosting_steps_added,a.max_eligible_terms,
                     a.number_of_base_terms,a.feature_importance,a.link_function,a.tweedie_power,a.min_training_prediction_or_response,a.max_training_prediction_or_response,
                     a.validation_group_mse,a.group_size_for_validation_group_mse);
             },
             [](py::tuple t) { // __setstate__
-                if (t.size() != 28)
+                if (t.size() != 27)
                     throw std::runtime_error("Invalid state!");
 
                 /* Create a new C++ instance */
                 APLRRegressor a(t[0].cast<size_t>(),t[1].cast<double>(),t[2].cast<uint_fast32_t>(),t[3].cast<std::string>(),
-                    t[22].cast<std::string>(),t[4].cast<size_t>(),t[5].cast<double>(),
-                    t[6].cast<double>(),100,t[7].cast<size_t>(),t[8].cast<size_t>(),t[9].cast<size_t>(),t[10].cast<double>(),t[17].cast<size_t>(),
-                    t[23].cast<double>());
+                    t[21].cast<std::string>(),t[4].cast<size_t>(),t[5].cast<double>(),
+                    t[6].cast<double>(),100,t[7].cast<size_t>(),t[8].cast<size_t>(),t[9].cast<size_t>(),t[10].cast<double>(),t[16].cast<size_t>(),
+                    t[22].cast<double>());
 
                 a.validation_error_steps=t[11].cast<VectorXd>();
                 a.term_names=t[12].cast<std::vector<std::string>>();
                 a.term_coefficients=t[13].cast<VectorXd>();
                 a.terms=t[14].cast<std::vector<Term>>();
-                a.intercept_steps=t[15].cast<VectorXd>();
-                a.interactions_eligible=t[16].cast<size_t>();
-                a.ineligible_boosting_steps_added=t[18].cast<size_t>();
-                a.max_eligible_terms=t[19].cast<size_t>();
-                a.number_of_base_terms=t[20].cast<size_t>();
-                a.feature_importance=t[21].cast<VectorXd>();
-                a.min_training_prediction_or_response=t[24].cast<double>();
-                a.max_training_prediction_or_response=t[25].cast<double>();
-                a.validation_group_mse=t[26].cast<double>();
-                a.group_size_for_validation_group_mse=t[27].cast<size_t>();
+                a.interactions_eligible=t[15].cast<size_t>();
+                a.ineligible_boosting_steps_added=t[17].cast<size_t>();
+                a.max_eligible_terms=t[18].cast<size_t>();
+                a.number_of_base_terms=t[19].cast<size_t>();
+                a.feature_importance=t[20].cast<VectorXd>();
+                a.min_training_prediction_or_response=t[23].cast<double>();
+                a.max_training_prediction_or_response=t[24].cast<double>();
+                a.validation_group_mse=t[25].cast<double>();
+                a.group_size_for_validation_group_mse=t[26].cast<size_t>();
 
                 return a;
             }
diff --git a/examples/train_aplr_cross_validation.py b/examples/train_aplr_cross_validation.py
@@ -48,8 +48,7 @@
 #Terms in the best model
 terms=pd.DataFrame({"term":best_model.get_term_names(),"coefficient":best_model.get_term_coefficients()})
 
-#Coefficients for intercept and the first term per boosting step
-intercept_coefficient_per_boosting_step = best_model.get_intercept_steps()
+#Coefficients for the first term per boosting step
 first_term_coefficient_per_boosting_step = best_model.get_term_coefficient_steps(term_index=0)
 
 #Estimated feature importance was estimated on the validation set when the best model was trained
diff --git a/examples/train_aplr_validation.py b/examples/train_aplr_validation.py
@@ -59,8 +59,7 @@
 #Terms in the best model
 terms=pd.DataFrame({"term":best_model.get_term_names(),"coefficient":best_model.get_term_coefficients()})
 
-#Coefficients for intercept and the first term per boosting step
-intercept_coefficient_per_boosting_step = best_model.get_intercept_steps()
+#Coefficients for the first term per boosting step
 first_term_coefficient_per_boosting_step = best_model.get_term_coefficient_steps(term_index=0)
 
 #Estimated feature importance was estimated on the validation set when the best model was trained