@@ -21,7 +21,6 @@ struct ModelForCVFold
2121 size_t m_optimal;
2222 double sample_weight_train_sum;
2323 double fold_weight;
24- VectorXd feature_importance;
2524 Eigen::Index fold_index;
2625 double min_training_prediction_or_response;
2726 double max_training_prediction_or_response;
@@ -70,6 +69,7 @@ class APLRRegressor
7069 std::vector<VectorXi> group_cycle_train;
7170 size_t group_cycle_predictor_index;
7271 std::vector<ModelForCVFold> cv_fold_models;
72+ VectorXd intercept_steps;
7373
7474 void validate_input_to_fit (const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight, const std::vector<std::string> &X_names,
7575 const MatrixXi &cv_observations, const std::vector<size_t > &prioritized_predictors_indexes,
@@ -125,16 +125,16 @@ class APLRRegressor
125125 void merge_similar_terms (const MatrixXd &X);
126126 void remove_unused_terms ();
127127 void name_terms (const MatrixXd &X, const std::vector<std::string> &X_names);
128- void calculate_feature_importance (const MatrixXd &X, const MatrixXd &sample_weight);
129128 void find_min_and_max_training_predictions_or_responses ();
130129 void write_output_to_cv_fold_models (Eigen::Index fold_index);
131130 void cleanup_after_fit ();
132131 void check_term_integrity ();
133- void create_final_model (const MatrixXd &X);
132+ void create_final_model (const MatrixXd &X, const VectorXd &sample_weight );
134133 void compute_fold_weights ();
135134 void update_intercept_and_term_weights ();
136135 void create_terms (const MatrixXd &X);
137- void calculate_final_feature_importance ();
136+ void estimate_feature_and_term_importances (const MatrixXd &X, const VectorXd &sample_weight);
137+ void sort_terms ();
138138 void compute_cv_error ();
139139 void concatenate_validation_error_steps ();
140140 void find_final_min_and_max_training_predictions_or_responses ();
@@ -160,6 +160,7 @@ class APLRRegressor
160160 void throw_error_if_m_is_invalid ();
161161 bool model_has_not_been_trained ();
162162 std::vector<size_t > compute_relevant_term_indexes (size_t predictor_index);
163+ void validate_sample_weight (const MatrixXd &X, const VectorXd &sample_weight);
163164
164165public:
165166 double intercept;
@@ -177,7 +178,6 @@ class APLRRegressor
177178 std::vector<std::string> term_names;
178179 VectorXd term_coefficients;
179180 size_t max_interaction_level;
180- VectorXd intercept_steps;
181181 size_t max_interactions;
182182 size_t interactions_eligible;
183183 MatrixXd validation_error_steps;
@@ -186,6 +186,7 @@ class APLRRegressor
186186 size_t max_eligible_terms;
187187 size_t number_of_base_terms;
188188 VectorXd feature_importance;
189+ VectorXd term_importance;
189190 double dispersion_parameter;
190191 double min_training_prediction_or_response;
191192 double max_training_prediction_or_response;
@@ -222,13 +223,16 @@ class APLRRegressor
222223 const MatrixXd &other_data = MatrixXd(0 , 0 ));
223224 VectorXd predict (const MatrixXd &X, bool cap_predictions_to_minmax_in_training = true );
224225 void set_term_names (const std::vector<std::string> &X_names);
226+ VectorXd calculate_feature_importance (const MatrixXd &X, const VectorXd &sample_weight);
227+ VectorXd calculate_term_importance (const MatrixXd &X, const VectorXd &sample_weight);
225228 MatrixXd calculate_local_feature_contribution (const MatrixXd &X);
226- MatrixXd calculate_local_feature_contribution_for_terms (const MatrixXd &X);
229+ MatrixXd calculate_local_term_contribution (const MatrixXd &X);
227230 MatrixXd calculate_terms (const MatrixXd &X);
228231 std::vector<std::string> get_term_names ();
229232 VectorXd get_term_coefficients ();
230233 MatrixXd get_validation_error_steps ();
231234 VectorXd get_feature_importance ();
235+ VectorXd get_term_importance ();
232236 double get_intercept ();
233237 size_t get_optimal_m ();
234238 std::string get_validation_tuning_metric ();
@@ -251,7 +255,7 @@ APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std
251255 size_t group_mse_by_prediction_bins, size_t group_mse_cycle_min_obs_in_bin)
252256 : reserved_terms_times_num_x{reserved_terms_times_num_x}, intercept{NAN_DOUBLE}, m{m}, v{v},
253257 loss_function{loss_function}, link_function{link_function}, cv_folds{cv_folds}, n_jobs{n_jobs}, random_state{random_state},
254- bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level}, intercept_steps{ VectorXd ( 0 )},
258+ bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
255259 max_interactions{max_interactions}, interactions_eligible{0 }, validation_error_steps{MatrixXd (0 , 0 )},
256260 min_observations_in_split{min_observations_in_split}, ineligible_boosting_steps_added{ineligible_boosting_steps_added},
257261 max_eligible_terms{max_eligible_terms}, number_of_base_terms{0 }, dispersion_parameter{dispersion_parameter}, min_training_prediction_or_response{NAN_DOUBLE},
@@ -271,11 +275,12 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other)
271275 loss_function{other.loss_function }, link_function{other.link_function }, cv_folds{other.cv_folds },
272276 n_jobs{other.n_jobs }, random_state{other.random_state }, bins{other.bins },
273277 verbosity{other.verbosity }, term_names{other.term_names }, term_coefficients{other.term_coefficients },
274- max_interaction_level{other.max_interaction_level }, intercept_steps{other. intercept_steps }, max_interactions{other.max_interactions },
278+ max_interaction_level{other.max_interaction_level }, max_interactions{other.max_interactions },
275279 interactions_eligible{other.interactions_eligible }, validation_error_steps{other.validation_error_steps },
276280 min_observations_in_split{other.min_observations_in_split }, ineligible_boosting_steps_added{other.ineligible_boosting_steps_added },
277281 max_eligible_terms{other.max_eligible_terms }, number_of_base_terms{other.number_of_base_terms },
278- feature_importance{other.feature_importance }, dispersion_parameter{other.dispersion_parameter }, min_training_prediction_or_response{other.min_training_prediction_or_response },
282+ feature_importance{other.feature_importance }, term_importance{other.term_importance }, dispersion_parameter{other.dispersion_parameter },
283+ min_training_prediction_or_response{other.min_training_prediction_or_response },
279284 max_training_prediction_or_response{other.max_training_prediction_or_response }, validation_tuning_metric{other.validation_tuning_metric },
280285 quantile{other.quantile }, m_optimal{other.m_optimal },
281286 calculate_custom_validation_error_function{other.calculate_custom_validation_error_function },
@@ -310,7 +315,7 @@ void APLRRegressor::fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sa
310315 {
311316 fit_model_for_cv_fold (X, y, sample_weight, X_names, cv_observations_used.col (i), monotonic_constraints, group, other_data, i);
312317 }
313- create_final_model (X);
318+ create_final_model (X, sample_weight );
314319}
315320
316321void APLRRegressor::preprocess_prioritized_predictors_and_interaction_constraints (
@@ -347,7 +352,6 @@ void APLRRegressor::fit_model_for_cv_fold(const MatrixXd &X, const VectorXd &y,
347352 remove_unused_terms ();
348353 revert_scaling_if_using_log_link_function ();
349354 name_terms (X, X_names);
350- calculate_feature_importance (X_validation, sample_weight_validation);
351355 find_min_and_max_training_predictions_or_responses ();
352356 write_output_to_cv_fold_models (fold_index);
353357 cleanup_after_fit ();
@@ -1732,14 +1736,42 @@ std::string APLRRegressor::compute_raw_base_term_name(const Term &term, const st
17321736 return name;
17331737}
17341738
1735- void APLRRegressor::calculate_feature_importance (const MatrixXd &X, const MatrixXd &sample_weight)
1739+ VectorXd APLRRegressor::calculate_feature_importance (const MatrixXd &X, const VectorXd &sample_weight)
17361740{
1737- feature_importance = VectorXd::Constant (number_of_base_terms, 0 );
1741+ validate_that_model_can_be_used (X);
1742+ validate_sample_weight (X, sample_weight);
1743+ VectorXd feature_importance = VectorXd::Constant (number_of_base_terms, 0 );
17381744 MatrixXd li{calculate_local_feature_contribution (X)};
17391745 for (Eigen::Index i = 0 ; i < li.cols (); ++i) // For each column calculate standard deviation of contribution to linear predictor
17401746 {
17411747 feature_importance[i] = calculate_standard_deviation (li.col (i), sample_weight);
17421748 }
1749+ return feature_importance;
1750+ }
1751+
1752+ void APLRRegressor::validate_sample_weight (const MatrixXd &X, const VectorXd &sample_weight)
1753+ {
1754+ bool sample_weight_is_provided{sample_weight.size () > 0 };
1755+ if (sample_weight_is_provided)
1756+ {
1757+ bool sample_weight_is_invalid{sample_weight.rows () != X.rows ()};
1758+ if (sample_weight_is_invalid)
1759+ throw std::runtime_error (" If sample_weight is provided then it needs to contain as many rows as X does." );
1760+ }
1761+ }
1762+
1763+ VectorXd APLRRegressor::calculate_term_importance (const MatrixXd &X, const VectorXd &sample_weight)
1764+ {
1765+ validate_that_model_can_be_used (X);
1766+ validate_sample_weight (X, sample_weight);
1767+ VectorXd term_importance = VectorXd::Constant (terms.size (), 0 );
1768+ for (size_t i = 0 ; i < terms.size (); ++i)
1769+ {
1770+ VectorXd contrib{terms[i].calculate_contribution_to_linear_predictor (X)};
1771+ double std_dev_of_contribution (calculate_standard_deviation (contrib, sample_weight));
1772+ term_importance[i] = std_dev_of_contribution;
1773+ }
1774+ return term_importance;
17431775}
17441776
17451777MatrixXd APLRRegressor::calculate_local_feature_contribution (const MatrixXd &X)
@@ -1783,7 +1815,6 @@ void APLRRegressor::write_output_to_cv_fold_models(Eigen::Index fold_index)
17831815 cv_fold_models[fold_index].validation_error_steps = validation_error_steps;
17841816 cv_fold_models[fold_index].validation_error = validation_error_steps.col (0 ).minCoeff ();
17851817 cv_fold_models[fold_index].m_optimal = get_optimal_m ();
1786- cv_fold_models[fold_index].feature_importance = get_feature_importance ();
17871818 cv_fold_models[fold_index].fold_index = fold_index;
17881819 cv_fold_models[fold_index].min_training_prediction_or_response = min_training_prediction_or_response;
17891820 cv_fold_models[fold_index].max_training_prediction_or_response = max_training_prediction_or_response;
@@ -1867,12 +1898,13 @@ void APLRRegressor::check_term_integrity()
18671898 }
18681899}
18691900
1870- void APLRRegressor::create_final_model (const MatrixXd &X)
1901+ void APLRRegressor::create_final_model (const MatrixXd &X, const VectorXd &sample_weight )
18711902{
18721903 compute_fold_weights ();
18731904 update_intercept_and_term_weights ();
18741905 create_terms (X);
1875- calculate_final_feature_importance ();
1906+ estimate_feature_and_term_importances (X, sample_weight);
1907+ sort_terms ();
18761908 compute_cv_error ();
18771909 concatenate_validation_error_steps ();
18781910 find_final_min_and_max_training_predictions_or_responses ();
@@ -1919,22 +1951,30 @@ void APLRRegressor::create_terms(const MatrixXd &X)
19191951 }
19201952 merge_similar_terms (X);
19211953 remove_unused_terms ();
1922- std::sort (terms.begin (), terms.end (),
1923- [](const Term &a, const Term &b)
1924- { return a.base_term < b.base_term ||
1925- (a.base_term == b.base_term && std::isless (a.coefficient , b.coefficient )); });
19261954}
19271955
1928- void APLRRegressor::calculate_final_feature_importance ( )
1956+ void APLRRegressor::estimate_feature_and_term_importances ( const MatrixXd &X, const VectorXd &sample_weight )
19291957{
1930- for (auto &cv_fold_model : cv_fold_models)
1958+ feature_importance = calculate_feature_importance (X, sample_weight);
1959+ term_importance = calculate_term_importance (X, sample_weight);
1960+ for (size_t i = 0 ; i < terms.size (); ++i)
19311961 {
1932- cv_fold_model. feature_importance *= cv_fold_model. fold_weight ;
1962+ terms[i]. estimated_term_importance = term_importance[i] ;
19331963 }
1934- feature_importance = VectorXd::Constant (feature_importance.rows (), 0.0 );
1935- for (auto &cv_fold_model : cv_fold_models)
1964+ }
1965+
1966+ void APLRRegressor::sort_terms ()
1967+ {
1968+ std::sort (terms.begin (), terms.end (),
1969+ [](const Term &a, const Term &b)
1970+ { return a.estimated_term_importance > b.estimated_term_importance ||
1971+ (is_approximately_equal (a.estimated_term_importance , b.estimated_term_importance ) && (a.base_term < b.base_term )) ||
1972+ (is_approximately_equal (a.estimated_term_importance , b.estimated_term_importance ) && (a.base_term == b.base_term ) &&
1973+ std::isless (a.coefficient , b.coefficient )); });
1974+
1975+ for (size_t i = 0 ; i < terms.size (); ++i)
19361976 {
1937- feature_importance += cv_fold_model. feature_importance ;
1977+ term_importance[i] = terms[i]. estimated_term_importance ;
19381978 }
19391979}
19401980
@@ -2038,7 +2078,7 @@ void APLRRegressor::cap_predictions_to_minmax_in_training(VectorXd &predictions)
20382078 }
20392079}
20402080
2041- MatrixXd APLRRegressor::calculate_local_feature_contribution_for_terms (const MatrixXd &X)
2081+ MatrixXd APLRRegressor::calculate_local_term_contribution (const MatrixXd &X)
20422082{
20432083 validate_that_model_can_be_used (X);
20442084
@@ -2088,6 +2128,11 @@ VectorXd APLRRegressor::get_feature_importance()
20882128 return feature_importance;
20892129}
20902130
2131+ VectorXd APLRRegressor::get_term_importance ()
2132+ {
2133+ return term_importance;
2134+ }
2135+
20912136double APLRRegressor::get_intercept ()
20922137{
20932138 return intercept;
0 commit comments