@@ -118,22 +118,18 @@ class APLRRegressor
118118 size_t number_of_base_terms;
119119 VectorXd feature_importance; // Populated in fit() using validation set. Rows are in the same order as in X.
120120 double tweedie_power;
121- bool cap_outliers_in_validation_set;
122- bool cap_outliers_when_using_the_model;
123121
124122 // Methods
125123 APLRRegressor (size_t m=1000 ,double v=0.1 ,uint_fast32_t random_state=std::numeric_limits<uint_fast32_t >::lowest(),std::string family=" gaussian" ,
126124 std::string link_function=" identity" , size_t n_jobs=0 , double validation_ratio=0.2 ,double intercept=NAN_DOUBLE,
127125 size_t reserved_terms_times_num_x=100 , size_t bins=300 ,size_t verbosity=0 ,size_t max_interaction_level=1 ,size_t max_interactions=100000 ,
128- size_t min_observations_in_split=20 , size_t ineligible_boosting_steps_added=10 , size_t max_eligible_terms=5 ,double tweedie_power=1.5 ,
129- bool cap_outliers_in_validation_set=true , bool cap_outliers_when_using_the_model=true );
126+ size_t min_observations_in_split=20 , size_t ineligible_boosting_steps_added=10 , size_t max_eligible_terms=5 ,double tweedie_power=1.5 );
130127 APLRRegressor (const APLRRegressor &other);
131128 ~APLRRegressor ();
132129 void fit (const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight=VectorXd(0 ),const std::vector<std::string> &X_names={},const std::vector<size_t > &validation_set_indexes={});
133130 VectorXd predict (const MatrixXd &X);
134131 void set_term_names (const std::vector<std::string> &X_names);
135132 MatrixXd calculate_local_feature_importance (const MatrixXd &X);
136- MatrixXd calculate_local_feature_importance_base_function (const MatrixXd &X, bool cap_outliers);
137133 MatrixXd calculate_local_feature_importance_for_terms (const MatrixXd &X);
138134 MatrixXd calculate_terms (const MatrixXd &X);
139135 std::vector<std::string> get_term_names ();
@@ -144,21 +140,18 @@ class APLRRegressor
144140 double get_intercept ();
145141 VectorXd get_intercept_steps ();
146142 size_t get_m ();
147- void set_cap_outliers_when_using_the_model (bool cap_outliers_when_using_the_model);
148143};
149144
150145// Regular constructor
151146APLRRegressor::APLRRegressor (size_t m,double v,uint_fast32_t random_state,std::string family,std::string link_function,size_t n_jobs,
152147 double validation_ratio,double intercept,size_t reserved_terms_times_num_x,size_t bins,size_t verbosity,size_t max_interaction_level,
153- size_t max_interactions,size_t min_observations_in_split,size_t ineligible_boosting_steps_added,size_t max_eligible_terms,double tweedie_power,
154- bool cap_outliers_in_validation_set, bool cap_outliers_when_using_the_model):
148+ size_t max_interactions,size_t min_observations_in_split,size_t ineligible_boosting_steps_added,size_t max_eligible_terms,double tweedie_power):
155149 reserved_terms_times_num_x{reserved_terms_times_num_x},intercept{intercept},m{m},v{v},
156150 family{family},link_function{link_function},validation_ratio{validation_ratio},n_jobs{n_jobs},random_state{random_state},
157151 bins{bins},verbosity{verbosity},max_interaction_level{max_interaction_level},
158152 intercept_steps{VectorXd (0 )},max_interactions{max_interactions},interactions_eligible{0 },validation_error_steps{VectorXd (0 )},
159153 min_observations_in_split{min_observations_in_split},ineligible_boosting_steps_added{ineligible_boosting_steps_added},
160- max_eligible_terms{max_eligible_terms},number_of_base_terms{0 },tweedie_power{tweedie_power},
161- cap_outliers_in_validation_set{cap_outliers_in_validation_set},cap_outliers_when_using_the_model{cap_outliers_when_using_the_model}
154+ max_eligible_terms{max_eligible_terms},number_of_base_terms{0 },tweedie_power{tweedie_power}
162155{
163156}
164157
@@ -172,8 +165,7 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other):
172165 max_interactions{other.max_interactions },interactions_eligible{other.interactions_eligible },validation_error_steps{other.validation_error_steps },
173166 min_observations_in_split{other.min_observations_in_split },ineligible_boosting_steps_added{other.ineligible_boosting_steps_added },
174167 max_eligible_terms{other.max_eligible_terms },number_of_base_terms{other.number_of_base_terms },
175- feature_importance{other.feature_importance },tweedie_power{other.tweedie_power },
176- cap_outliers_in_validation_set{other.cap_outliers_in_validation_set },cap_outliers_when_using_the_model{other.cap_outliers_when_using_the_model }
168+ feature_importance{other.feature_importance },tweedie_power{other.tweedie_power }
177169{
178170}
179171
@@ -749,7 +741,7 @@ void APLRRegressor::select_the_best_term_and_update_errors(size_t boosting_step)
749741
750742 // Updating current predictions
751743 VectorXd values{terms_eligible_current[best_term].calculate (X_train)};
752- VectorXd values_validation{terms_eligible_current[best_term].calculate (X_validation, cap_outliers_in_validation_set )};
744+ VectorXd values_validation{terms_eligible_current[best_term].calculate (X_validation)};
753745 linear_predictor_update=values*terms_eligible_current[best_term].coefficient ;
754746 linear_predictor_update_validation=values_validation*terms_eligible_current[best_term].coefficient ;
755747 double error_after_updating_term=calculate_sum_error (calculate_errors (neg_gradient_current,linear_predictor_update,sample_weight_train));
@@ -1002,7 +994,7 @@ void APLRRegressor::set_term_names(const std::vector<std::string> &X_names)
1002994void APLRRegressor::calculate_feature_importance_on_validation_set ()
1003995{
1004996 feature_importance=VectorXd::Constant (number_of_base_terms,0 );
1005- MatrixXd li{calculate_local_feature_importance_base_function (X_validation, cap_outliers_in_validation_set )};
997+ MatrixXd li{calculate_local_feature_importance (X_validation)};
1006998 for (size_t i = 0 ; i < static_cast <size_t >(li.cols ()); ++i) // for each column calculate mean abs values
1007999 {
10081000 feature_importance[i]=li.col (i).cwiseAbs ().mean ();
@@ -1012,11 +1004,6 @@ void APLRRegressor::calculate_feature_importance_on_validation_set()
10121004// Computes local feature importance on data X.
10131005// Output matrix has columns for each base term in the same order as in X and observations in rows.
10141006MatrixXd APLRRegressor::calculate_local_feature_importance (const MatrixXd &X)
1015- {
1016- return calculate_local_feature_importance_base_function (X, cap_outliers_when_using_the_model);
1017- }
1018-
1019- MatrixXd APLRRegressor::calculate_local_feature_importance_base_function (const MatrixXd &X, bool cap_outliers)
10201007{
10211008 validate_that_model_can_be_used (X);
10221009
@@ -1025,7 +1012,7 @@ MatrixXd APLRRegressor::calculate_local_feature_importance_base_function(const M
10251012 // Terms
10261013 for (size_t i = 0 ; i < terms.size (); ++i) // for each term
10271014 {
1028- VectorXd contrib{terms[i].calculate_prediction_contribution (X, cap_outliers )};
1015+ VectorXd contrib{terms[i].calculate_prediction_contribution (X)};
10291016 output.col (terms[i].base_term )+=contrib;
10301017 }
10311018
@@ -1084,7 +1071,7 @@ VectorXd APLRRegressor::calculate_linear_predictor(const MatrixXd &X)
10841071 VectorXd predictions{VectorXd::Constant (X.rows (),intercept)};
10851072 for (size_t i = 0 ; i < terms.size (); ++i) // for each term
10861073 {
1087- VectorXd contrib{terms[i].calculate_prediction_contribution (X, cap_outliers_when_using_the_model )};
1074+ VectorXd contrib{terms[i].calculate_prediction_contribution (X)};
10881075 predictions+=contrib;
10891076 }
10901077 return predictions;
@@ -1099,7 +1086,7 @@ MatrixXd APLRRegressor::calculate_local_feature_importance_for_terms(const Matri
10991086 // Terms
11001087 for (size_t i = 0 ; i < terms.size (); ++i) // for each term
11011088 {
1102- VectorXd contrib{terms[i].calculate_prediction_contribution (X, cap_outliers_when_using_the_model )};
1089+ VectorXd contrib{terms[i].calculate_prediction_contribution (X)};
11031090 output.col (i)+=contrib;
11041091 }
11051092
@@ -1115,7 +1102,7 @@ MatrixXd APLRRegressor::calculate_terms(const MatrixXd &X)
11151102 // Terms
11161103 for (size_t i = 0 ; i < terms.size (); ++i) // for each term
11171104 {
1118- VectorXd values{terms[i].calculate (X, cap_outliers_when_using_the_model )};
1105+ VectorXd values{terms[i].calculate (X)};
11191106 output.col (i)+=values;
11201107 }
11211108
@@ -1160,9 +1147,4 @@ VectorXd APLRRegressor::get_intercept_steps()
11601147size_t APLRRegressor::get_m ()
11611148{
11621149 return m;
1163- }
1164-
1165- void APLRRegressor::set_cap_outliers_when_using_the_model (bool cap_outliers_when_using_the_model)
1166- {
1167- this ->cap_outliers_when_using_the_model =cap_outliers_when_using_the_model;
11681150}
0 commit comments