Skip to content

Commit 644e97b

Browse files
9.0.0
1 parent f38f7e6 commit 644e97b

File tree

11 files changed

+205
-67
lines changed

11 files changed

+205
-67
lines changed

API_REFERENCE_FOR_CLASSIFICATION.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ Parameters are the same as in ***predict_class_probabilities()***.
101101

102102
## Method: calculate_local_feature_contribution(X:npt.ArrayLike)
103103

104-
***Returns a numpy matrix containing local feature importance for new data by each predictor in X.***
104+
***Returns a numpy matrix containing estimated feature contribution to the linear predictor in X for each predictor.***
105105

106106
### Parameters
107107

API_REFERENCE_FOR_REGRESSION.md

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,39 @@ If ***True*** then predictions are capped so that they are not less than the min
175175
A list of strings containing names for each predictor in the ***X*** matrix that the model was trained on.
176176

177177

178+
## Method: calculate_feature_importance(X:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0))
179+
180+
***Returns a numpy matrix containing estimated feature importance in X for each predictor.***
181+
182+
### Parameters
183+
184+
#### X
185+
A numpy matrix with predictor values.
186+
187+
188+
## Method: calculate_term_importance(X:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0))
189+
190+
***Returns a numpy matrix containing estimated term importance in X for each term in the model.***
191+
192+
### Parameters
193+
194+
#### X
195+
A numpy matrix with predictor values.
196+
197+
178198
## Method: calculate_local_feature_contribution(X:npt.ArrayLike)
179199

180-
***Returns a numpy matrix containing local feature importance for new data by each predictor in X.***
200+
***Returns a numpy matrix containing estimated feature contribution to the linear predictor in X for each predictor.***
181201

182202
### Parameters
183203

184204
#### X
185205
A numpy matrix with predictor values.
186206

187207

188-
## Method: calculate_local_feature_contribution_for_terms(X:npt.ArrayLike)
208+
## Method: calculate_local_term_contribution(X:npt.ArrayLike)
189209

190-
***Returns a numpy matrix containing local feature importance for new data by each term in the model.***
210+
***Returns a numpy matrix containing term contribution to the linear predictor in X for each term in the model.***
191211

192212
### Parameters
193213

@@ -232,17 +252,16 @@ The index of the term selected. So ***0*** is the first term, ***1*** is the sec
232252

233253
## Method: get_feature_importance()
234254

235-
***Returns a numpy vector containing the feature importance of each predictor.***
255+
***Returns a numpy vector containing the estimated feature importance in the training data for each predictor.***
236256

257+
## Method: get_term_importance()
237258

238-
## Method: get_intercept()
239-
240-
***Returns the regression coefficient of the intercept term.***
259+
***Returns a numpy vector containing the estimated feature importance in the training data for each term.***
241260

242261

243-
## Method: get_intercept_steps()
262+
## Method: get_intercept()
244263

245-
***Returns a numpy vector containing the regression coefficients of the intercept term by boosting step.***
264+
***Returns the regression coefficient of the intercept term.***
246265

247266

248267
## Method: get_optimal_m()

aplr/aplr.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,21 @@ def predict(
196196
def set_term_names(self, X_names: List[str]):
197197
self.APLRRegressor.set_term_names(X_names)
198198

199+
def calculate_feature_importance(
200+
self, X: npt.ArrayLike, sample_weight: npt.ArrayLike = np.empty(0)
201+
) -> npt.ArrayLike:
202+
return self.APLRRegressor.calculate_feature_importance(X, sample_weight)
203+
204+
def calculate_term_importance(
205+
self, X: npt.ArrayLike, sample_weight: npt.ArrayLike = np.empty(0)
206+
) -> npt.ArrayLike:
207+
return self.APLRRegressor.calculate_term_importance(X, sample_weight)
208+
199209
def calculate_local_feature_contribution(self, X: npt.ArrayLike) -> npt.ArrayLike:
200210
return self.APLRRegressor.calculate_local_feature_contribution(X)
201211

202-
def calculate_local_feature_contribution_for_terms(
203-
self, X: npt.ArrayLike
204-
) -> npt.ArrayLike:
205-
return self.APLRRegressor.calculate_local_feature_contribution_for_terms(X)
212+
def calculate_local_term_contribution(self, X: npt.ArrayLike) -> npt.ArrayLike:
213+
return self.APLRRegressor.calculate_local_term_contribution(X)
206214

207215
def calculate_terms(self, X: npt.ArrayLike) -> npt.ArrayLike:
208216
return self.APLRRegressor.calculate_terms(X)
@@ -219,6 +227,9 @@ def get_validation_error_steps(self) -> npt.ArrayLike:
219227
def get_feature_importance(self) -> npt.ArrayLike:
220228
return self.APLRRegressor.get_feature_importance()
221229

230+
def get_term_importance(self) -> npt.ArrayLike:
231+
return self.APLRRegressor.get_term_importance()
232+
222233
def get_intercept(self) -> float:
223234
return self.APLRRegressor.get_intercept()
224235

cpp/APLRRegressor.h

Lines changed: 72 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ struct ModelForCVFold
2121
size_t m_optimal;
2222
double sample_weight_train_sum;
2323
double fold_weight;
24-
VectorXd feature_importance;
2524
Eigen::Index fold_index;
2625
double min_training_prediction_or_response;
2726
double max_training_prediction_or_response;
@@ -70,6 +69,7 @@ class APLRRegressor
7069
std::vector<VectorXi> group_cycle_train;
7170
size_t group_cycle_predictor_index;
7271
std::vector<ModelForCVFold> cv_fold_models;
72+
VectorXd intercept_steps;
7373

7474
void validate_input_to_fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sample_weight, const std::vector<std::string> &X_names,
7575
const MatrixXi &cv_observations, const std::vector<size_t> &prioritized_predictors_indexes,
@@ -125,16 +125,16 @@ class APLRRegressor
125125
void merge_similar_terms(const MatrixXd &X);
126126
void remove_unused_terms();
127127
void name_terms(const MatrixXd &X, const std::vector<std::string> &X_names);
128-
void calculate_feature_importance(const MatrixXd &X, const MatrixXd &sample_weight);
129128
void find_min_and_max_training_predictions_or_responses();
130129
void write_output_to_cv_fold_models(Eigen::Index fold_index);
131130
void cleanup_after_fit();
132131
void check_term_integrity();
133-
void create_final_model(const MatrixXd &X);
132+
void create_final_model(const MatrixXd &X, const VectorXd &sample_weight);
134133
void compute_fold_weights();
135134
void update_intercept_and_term_weights();
136135
void create_terms(const MatrixXd &X);
137-
void calculate_final_feature_importance();
136+
void estimate_feature_and_term_importances(const MatrixXd &X, const VectorXd &sample_weight);
137+
void sort_terms();
138138
void compute_cv_error();
139139
void concatenate_validation_error_steps();
140140
void find_final_min_and_max_training_predictions_or_responses();
@@ -160,6 +160,7 @@ class APLRRegressor
160160
void throw_error_if_m_is_invalid();
161161
bool model_has_not_been_trained();
162162
std::vector<size_t> compute_relevant_term_indexes(size_t predictor_index);
163+
void validate_sample_weight(const MatrixXd &X, const VectorXd &sample_weight);
163164

164165
public:
165166
double intercept;
@@ -177,7 +178,6 @@ class APLRRegressor
177178
std::vector<std::string> term_names;
178179
VectorXd term_coefficients;
179180
size_t max_interaction_level;
180-
VectorXd intercept_steps;
181181
size_t max_interactions;
182182
size_t interactions_eligible;
183183
MatrixXd validation_error_steps;
@@ -186,6 +186,7 @@ class APLRRegressor
186186
size_t max_eligible_terms;
187187
size_t number_of_base_terms;
188188
VectorXd feature_importance;
189+
VectorXd term_importance;
189190
double dispersion_parameter;
190191
double min_training_prediction_or_response;
191192
double max_training_prediction_or_response;
@@ -222,13 +223,16 @@ class APLRRegressor
222223
const MatrixXd &other_data = MatrixXd(0, 0));
223224
VectorXd predict(const MatrixXd &X, bool cap_predictions_to_minmax_in_training = true);
224225
void set_term_names(const std::vector<std::string> &X_names);
226+
VectorXd calculate_feature_importance(const MatrixXd &X, const VectorXd &sample_weight);
227+
VectorXd calculate_term_importance(const MatrixXd &X, const VectorXd &sample_weight);
225228
MatrixXd calculate_local_feature_contribution(const MatrixXd &X);
226-
MatrixXd calculate_local_feature_contribution_for_terms(const MatrixXd &X);
229+
MatrixXd calculate_local_term_contribution(const MatrixXd &X);
227230
MatrixXd calculate_terms(const MatrixXd &X);
228231
std::vector<std::string> get_term_names();
229232
VectorXd get_term_coefficients();
230233
MatrixXd get_validation_error_steps();
231234
VectorXd get_feature_importance();
235+
VectorXd get_term_importance();
232236
double get_intercept();
233237
size_t get_optimal_m();
234238
std::string get_validation_tuning_metric();
@@ -251,7 +255,7 @@ APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std
251255
size_t group_mse_by_prediction_bins, size_t group_mse_cycle_min_obs_in_bin)
252256
: reserved_terms_times_num_x{reserved_terms_times_num_x}, intercept{NAN_DOUBLE}, m{m}, v{v},
253257
loss_function{loss_function}, link_function{link_function}, cv_folds{cv_folds}, n_jobs{n_jobs}, random_state{random_state},
254-
bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level}, intercept_steps{VectorXd(0)},
258+
bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
255259
max_interactions{max_interactions}, interactions_eligible{0}, validation_error_steps{MatrixXd(0, 0)},
256260
min_observations_in_split{min_observations_in_split}, ineligible_boosting_steps_added{ineligible_boosting_steps_added},
257261
max_eligible_terms{max_eligible_terms}, number_of_base_terms{0}, dispersion_parameter{dispersion_parameter}, min_training_prediction_or_response{NAN_DOUBLE},
@@ -271,11 +275,12 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other)
271275
loss_function{other.loss_function}, link_function{other.link_function}, cv_folds{other.cv_folds},
272276
n_jobs{other.n_jobs}, random_state{other.random_state}, bins{other.bins},
273277
verbosity{other.verbosity}, term_names{other.term_names}, term_coefficients{other.term_coefficients},
274-
max_interaction_level{other.max_interaction_level}, intercept_steps{other.intercept_steps}, max_interactions{other.max_interactions},
278+
max_interaction_level{other.max_interaction_level}, max_interactions{other.max_interactions},
275279
interactions_eligible{other.interactions_eligible}, validation_error_steps{other.validation_error_steps},
276280
min_observations_in_split{other.min_observations_in_split}, ineligible_boosting_steps_added{other.ineligible_boosting_steps_added},
277281
max_eligible_terms{other.max_eligible_terms}, number_of_base_terms{other.number_of_base_terms},
278-
feature_importance{other.feature_importance}, dispersion_parameter{other.dispersion_parameter}, min_training_prediction_or_response{other.min_training_prediction_or_response},
282+
feature_importance{other.feature_importance}, term_importance{other.term_importance}, dispersion_parameter{other.dispersion_parameter},
283+
min_training_prediction_or_response{other.min_training_prediction_or_response},
279284
max_training_prediction_or_response{other.max_training_prediction_or_response}, validation_tuning_metric{other.validation_tuning_metric},
280285
quantile{other.quantile}, m_optimal{other.m_optimal},
281286
calculate_custom_validation_error_function{other.calculate_custom_validation_error_function},
@@ -310,7 +315,7 @@ void APLRRegressor::fit(const MatrixXd &X, const VectorXd &y, const VectorXd &sa
310315
{
311316
fit_model_for_cv_fold(X, y, sample_weight, X_names, cv_observations_used.col(i), monotonic_constraints, group, other_data, i);
312317
}
313-
create_final_model(X);
318+
create_final_model(X, sample_weight);
314319
}
315320

316321
void APLRRegressor::preprocess_prioritized_predictors_and_interaction_constraints(
@@ -347,7 +352,6 @@ void APLRRegressor::fit_model_for_cv_fold(const MatrixXd &X, const VectorXd &y,
347352
remove_unused_terms();
348353
revert_scaling_if_using_log_link_function();
349354
name_terms(X, X_names);
350-
calculate_feature_importance(X_validation, sample_weight_validation);
351355
find_min_and_max_training_predictions_or_responses();
352356
write_output_to_cv_fold_models(fold_index);
353357
cleanup_after_fit();
@@ -1732,14 +1736,42 @@ std::string APLRRegressor::compute_raw_base_term_name(const Term &term, const st
17321736
return name;
17331737
}
17341738

1735-
void APLRRegressor::calculate_feature_importance(const MatrixXd &X, const MatrixXd &sample_weight)
1739+
VectorXd APLRRegressor::calculate_feature_importance(const MatrixXd &X, const VectorXd &sample_weight)
17361740
{
1737-
feature_importance = VectorXd::Constant(number_of_base_terms, 0);
1741+
validate_that_model_can_be_used(X);
1742+
validate_sample_weight(X, sample_weight);
1743+
VectorXd feature_importance = VectorXd::Constant(number_of_base_terms, 0);
17381744
MatrixXd li{calculate_local_feature_contribution(X)};
17391745
for (Eigen::Index i = 0; i < li.cols(); ++i) // For each column calculate standard deviation of contribution to linear predictor
17401746
{
17411747
feature_importance[i] = calculate_standard_deviation(li.col(i), sample_weight);
17421748
}
1749+
return feature_importance;
1750+
}
1751+
1752+
void APLRRegressor::validate_sample_weight(const MatrixXd &X, const VectorXd &sample_weight)
1753+
{
1754+
bool sample_weight_is_provided{sample_weight.size() > 0};
1755+
if (sample_weight_is_provided)
1756+
{
1757+
bool sample_weight_is_invalid{sample_weight.rows() != X.rows()};
1758+
if (sample_weight_is_invalid)
1759+
throw std::runtime_error("If sample_weight is provided then it needs to contain as many rows as X does.");
1760+
}
1761+
}
1762+
1763+
VectorXd APLRRegressor::calculate_term_importance(const MatrixXd &X, const VectorXd &sample_weight)
1764+
{
1765+
validate_that_model_can_be_used(X);
1766+
validate_sample_weight(X, sample_weight);
1767+
VectorXd term_importance = VectorXd::Constant(terms.size(), 0);
1768+
for (size_t i = 0; i < terms.size(); ++i)
1769+
{
1770+
VectorXd contrib{terms[i].calculate_contribution_to_linear_predictor(X)};
1771+
double std_dev_of_contribution(calculate_standard_deviation(contrib, sample_weight));
1772+
term_importance[i] = std_dev_of_contribution;
1773+
}
1774+
return term_importance;
17431775
}
17441776

17451777
MatrixXd APLRRegressor::calculate_local_feature_contribution(const MatrixXd &X)
@@ -1783,7 +1815,6 @@ void APLRRegressor::write_output_to_cv_fold_models(Eigen::Index fold_index)
17831815
cv_fold_models[fold_index].validation_error_steps = validation_error_steps;
17841816
cv_fold_models[fold_index].validation_error = validation_error_steps.col(0).minCoeff();
17851817
cv_fold_models[fold_index].m_optimal = get_optimal_m();
1786-
cv_fold_models[fold_index].feature_importance = get_feature_importance();
17871818
cv_fold_models[fold_index].fold_index = fold_index;
17881819
cv_fold_models[fold_index].min_training_prediction_or_response = min_training_prediction_or_response;
17891820
cv_fold_models[fold_index].max_training_prediction_or_response = max_training_prediction_or_response;
@@ -1867,12 +1898,13 @@ void APLRRegressor::check_term_integrity()
18671898
}
18681899
}
18691900

1870-
void APLRRegressor::create_final_model(const MatrixXd &X)
1901+
void APLRRegressor::create_final_model(const MatrixXd &X, const VectorXd &sample_weight)
18711902
{
18721903
compute_fold_weights();
18731904
update_intercept_and_term_weights();
18741905
create_terms(X);
1875-
calculate_final_feature_importance();
1906+
estimate_feature_and_term_importances(X, sample_weight);
1907+
sort_terms();
18761908
compute_cv_error();
18771909
concatenate_validation_error_steps();
18781910
find_final_min_and_max_training_predictions_or_responses();
@@ -1919,22 +1951,30 @@ void APLRRegressor::create_terms(const MatrixXd &X)
19191951
}
19201952
merge_similar_terms(X);
19211953
remove_unused_terms();
1922-
std::sort(terms.begin(), terms.end(),
1923-
[](const Term &a, const Term &b)
1924-
{ return a.base_term < b.base_term ||
1925-
(a.base_term == b.base_term && std::isless(a.coefficient, b.coefficient)); });
19261954
}
19271955

1928-
void APLRRegressor::calculate_final_feature_importance()
1956+
void APLRRegressor::estimate_feature_and_term_importances(const MatrixXd &X, const VectorXd &sample_weight)
19291957
{
1930-
for (auto &cv_fold_model : cv_fold_models)
1958+
feature_importance = calculate_feature_importance(X, sample_weight);
1959+
term_importance = calculate_term_importance(X, sample_weight);
1960+
for (size_t i = 0; i < terms.size(); ++i)
19311961
{
1932-
cv_fold_model.feature_importance *= cv_fold_model.fold_weight;
1962+
terms[i].estimated_term_importance = term_importance[i];
19331963
}
1934-
feature_importance = VectorXd::Constant(feature_importance.rows(), 0.0);
1935-
for (auto &cv_fold_model : cv_fold_models)
1964+
}
1965+
1966+
void APLRRegressor::sort_terms()
1967+
{
1968+
std::sort(terms.begin(), terms.end(),
1969+
[](const Term &a, const Term &b)
1970+
{ return a.estimated_term_importance > b.estimated_term_importance ||
1971+
(is_approximately_equal(a.estimated_term_importance, b.estimated_term_importance) && (a.base_term < b.base_term)) ||
1972+
(is_approximately_equal(a.estimated_term_importance, b.estimated_term_importance) && (a.base_term == b.base_term) &&
1973+
std::isless(a.coefficient, b.coefficient)); });
1974+
1975+
for (size_t i = 0; i < terms.size(); ++i)
19361976
{
1937-
feature_importance += cv_fold_model.feature_importance;
1977+
term_importance[i] = terms[i].estimated_term_importance;
19381978
}
19391979
}
19401980

@@ -2038,7 +2078,7 @@ void APLRRegressor::cap_predictions_to_minmax_in_training(VectorXd &predictions)
20382078
}
20392079
}
20402080

2041-
MatrixXd APLRRegressor::calculate_local_feature_contribution_for_terms(const MatrixXd &X)
2081+
MatrixXd APLRRegressor::calculate_local_term_contribution(const MatrixXd &X)
20422082
{
20432083
validate_that_model_can_be_used(X);
20442084

@@ -2088,6 +2128,11 @@ VectorXd APLRRegressor::get_feature_importance()
20882128
return feature_importance;
20892129
}
20902130

2131+
VectorXd APLRRegressor::get_term_importance()
2132+
{
2133+
return term_importance;
2134+
}
2135+
20912136
double APLRRegressor::get_intercept()
20922137
{
20932138
return intercept;

0 commit comments

Comments
 (0)