Skip to content

Commit 0eb1e30

Browse files
bugfix
1 parent 0c42ff4 commit 0eb1e30

File tree

9 files changed

+79
-78
lines changed

9 files changed

+79
-78
lines changed

API_REFERENCE_FOR_REGRESSION.md

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,15 +331,23 @@ A numpy matrix with predictor values.
331331

332332
***For the predictor in X specified by predictor_index, get_main_effect_shape returns a dictionary with keys equal to predictor values and values equal to the corresponding contribution to the linear predictor (interactions with other predictors are ignored). This method makes it easier to interpret main effects, for example by visualizing the output in a line plot.***
333333

334+
### Parameters
335+
336+
#### predictor_index
337+
The index of the predictor. So if ***predictor_index*** is ***1*** then the second predictor in ***X*** is used.
334338

335-
## Method: get_unique_term_affiliation_shape(unique_term_affiliation: str)
336339

337-
***Returns a matrix containing one column for each predictor used in the unique term affiliation, in addition to one column for the contribution to the linear predictor. For main effects or two-way interactions this can be visualized in for example line plots and surface plots respectively. See this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). Please note that the get_unique_term_affiliation_shape method is currently very memory intensive when handling interactions and may crash without warning on larger models. Consider using either of the calculate_local_feature_contribution or calculate_local_contribution_from_selected_terms methods to interpret interactions on larger models.***
340+
## Method: get_unique_term_affiliation_shape(unique_term_affiliation:str, max_rows_before_sampling:int = 100000)
341+
342+
***Returns a matrix containing one column for each predictor used in the unique term affiliation, in addition to one column for the contribution to the linear predictor. For main effects or two-way interactions this can be visualized in for example line plots and surface plots respectively. See this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py).***
338343

339344
### Parameters
340345

341-
#### predictor_index
342-
The index of the predictor. So if ***predictor_index*** is ***1*** then the second predictor in ***X*** is used.
346+
#### unique_term_affiliation
347+
A string specifying which unique_term_affiliation to use.
348+
349+
#### max_rows_before_sampling
350+
Prevents the output from having significantly more than ***max_rows_before_sampling*** rows by randomly sampling if necessary. This threshold can be triggered for example in interaction terms in larger models.
343351

344352

345353
## Method: get_cv_error()

aplr/aplr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,10 +294,10 @@ def get_main_effect_shape(self, predictor_index: int) -> Dict[float, float]:
294294
return self.APLRRegressor.get_main_effect_shape(predictor_index)
295295

296296
def get_unique_term_affiliation_shape(
297-
self, unique_term_affiliation: str
297+
self, unique_term_affiliation: str, max_rows_before_sampling: int = 100000
298298
) -> FloatMatrix:
299299
return self.APLRRegressor.get_unique_term_affiliation_shape(
300-
unique_term_affiliation
300+
unique_term_affiliation, max_rows_before_sampling
301301
)
302302

303303
def get_cv_error(self) -> float:

cpp/APLRRegressor.h

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <random>
66
#include <vector>
77
#include <thread>
8+
#include <unordered_map>
89
#include "../dependencies/eigen-3.4.0/Eigen/Dense"
910
#include "functions.h"
1011
#include "term.h"
@@ -187,7 +188,8 @@ class APLRRegressor
187188
bool model_has_not_been_trained();
188189
std::vector<size_t> compute_relevant_term_indexes(const std::string &unique_term_affiliation);
189190
std::vector<double> compute_split_points(size_t predictor_index, const std::vector<size_t> &relevant_term_indexes);
190-
VectorXd compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X, const std::vector<size_t> &term_indexes);
191+
VectorXd compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X, const std::vector<size_t> &term_indexes,
192+
const std::vector<size_t> &base_predictors_used);
191193
void validate_sample_weight(const MatrixXd &X, const VectorXd &sample_weight);
192194
void set_term_coefficients();
193195

@@ -291,7 +293,9 @@ class APLRRegressor
291293
size_t get_optimal_m();
292294
std::string get_validation_tuning_metric();
293295
std::map<double, double> get_main_effect_shape(size_t predictor_index);
294-
MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation);
296+
MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling = 100000);
297+
MatrixXd generate_predictor_values_and_contribution(const std::vector<size_t> &relevant_term_indexes,
298+
size_t unique_term_affiliation_index);
295299
double get_cv_error();
296300

297301
friend class APLRClassifier;
@@ -2524,12 +2528,13 @@ std::map<double, double> APLRRegressor::get_main_effect_shape(size_t predictor_i
25242528
std::vector<size_t> relevant_term_indexes{compute_relevant_term_indexes(unique_term_affiliation)};
25252529
std::vector<double> split_points{compute_split_points(predictor_index, relevant_term_indexes)};
25262530

2527-
MatrixXd X{MatrixXd::Constant(split_points.size(), number_of_base_terms, 0)};
2531+
MatrixXd X{MatrixXd::Constant(split_points.size(), 1, 0)};
25282532
for (size_t i = 0; i < split_points.size(); ++i)
25292533
{
2530-
X.col(predictor_index)[i] = split_points[i];
2534+
X(i, 0) = split_points[i];
25312535
}
2532-
VectorXd contribution_to_linear_predictor{compute_contribution_to_linear_predictor_from_specific_terms(X, relevant_term_indexes)};
2536+
VectorXd contribution_to_linear_predictor{compute_contribution_to_linear_predictor_from_specific_terms(X, relevant_term_indexes,
2537+
{predictor_index})};
25332538
for (size_t i = 0; i < split_points.size(); ++i)
25342539
{
25352540
main_effect_shape[split_points[i]] = contribution_to_linear_predictor[i];
@@ -2598,19 +2603,32 @@ std::vector<double> APLRRegressor::compute_split_points(size_t predictor_index,
25982603
return split_points;
25992604
}
26002605

2601-
VectorXd APLRRegressor::compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X, const std::vector<size_t> &term_indexes)
2606+
VectorXd APLRRegressor::compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X,
2607+
const std::vector<size_t> &term_indexes,
2608+
const std::vector<size_t> &base_predictors_used)
26022609
{
2603-
VectorXd contribution_from_specific_terms{VectorXd::Constant(X.rows(), 0.0)};
2604-
2610+
VectorXd contribution_from_specific_terms = VectorXd::Zero(X.rows());
2611+
std::unordered_map<size_t, size_t> X_map;
2612+
for (size_t i = 0; i < base_predictors_used.size(); ++i)
2613+
{
2614+
X_map[base_predictors_used[i]] = i;
2615+
}
26052616
for (auto &term_index_used : term_indexes)
26062617
{
2607-
contribution_from_specific_terms += terms[term_index_used].calculate_contribution_to_linear_predictor(X);
2618+
auto &term = terms[term_index_used];
2619+
VectorXd contribution_from_this_term = term.coefficient * term.calculate_without_interactions(X.col(X_map[term.base_term]));
2620+
for (auto &given_term : term.given_terms)
2621+
{
2622+
VectorXd values_from_given_term = given_term.calculate_without_interactions(X.col(X_map[given_term.base_term]));
2623+
VectorXi indicator = calculate_indicator(values_from_given_term);
2624+
contribution_from_this_term = contribution_from_this_term.array() * indicator.cast<double>().array();
2625+
}
2626+
contribution_from_specific_terms += contribution_from_this_term;
26082627
}
2609-
26102628
return contribution_from_specific_terms;
26112629
}
26122630

2613-
MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation)
2631+
MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling)
26142632
{
26152633
if (model_has_not_been_trained())
26162634
throw std::runtime_error("The model must have been trained before using get_unique_term_affiliation_shape().");
@@ -2630,30 +2648,40 @@ MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &uni
26302648
std::vector<size_t> relevant_term_indexes{compute_relevant_term_indexes(unique_term_affiliation)};
26312649
size_t unique_term_affiliation_index{unique_term_affiliation_map[unique_term_affiliation]};
26322650
size_t num_predictors_used_in_the_affiliation{base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index].size()};
2633-
if (num_predictors_used_in_the_affiliation > 1)
2634-
{
2635-
std::string warning{"Please note that the get_unique_term_affiliation_shape method is currently very memory intensive when handling interactions and may crash without warning on larger models. Consider using either of the calculate_local_feature_contribution or calculate_local_contribution_from_selected_terms methods to interpret interactions on larger models."};
2636-
std::cout << warning << std::endl;
2637-
}
2638-
26392651
std::vector<std::vector<double>> split_points_in_each_predictor(num_predictors_used_in_the_affiliation);
26402652
for (size_t i = 0; i < num_predictors_used_in_the_affiliation; ++i)
26412653
{
26422654
split_points_in_each_predictor[i] = compute_split_points(base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index][i], relevant_term_indexes);
26432655
}
26442656

2645-
MatrixXd output{generate_combinations_and_one_additional_column(split_points_in_each_predictor)};
2646-
2647-
MatrixXd X{MatrixXd::Constant(output.rows(), number_of_base_terms, 0)};
2648-
for (Eigen::Index i = 0; i < output.rows(); ++i)
2657+
size_t num_split_point_combinations = 1;
2658+
for (size_t i = 0; i < split_points_in_each_predictor.size(); ++i)
2659+
{
2660+
num_split_point_combinations *= split_points_in_each_predictor[i].size();
2661+
}
2662+
bool need_to_sample{num_split_point_combinations > max_rows_before_sampling};
2663+
if (need_to_sample)
26492664
{
2650-
for (size_t j = 0; j < num_predictors_used_in_the_affiliation; ++j)
2665+
double num_split_point_combinations_sqrt = std::sqrt(static_cast<double>(num_split_point_combinations));
2666+
double factor = std::pow(max_rows_before_sampling / num_split_point_combinations_sqrt, 1.0 / split_points_in_each_predictor.size());
2667+
std::mt19937 seed(random_state);
2668+
for (auto &split_points : split_points_in_each_predictor)
26512669
{
2652-
X(i, base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index][j]) = output(i, j);
2670+
size_t current_num_observations = split_points.size();
2671+
size_t num_observations_to_keep = std::round(factor * std::sqrt(current_num_observations));
2672+
if (current_num_observations > num_observations_to_keep)
2673+
{
2674+
std::shuffle(split_points.begin(), split_points.end(), seed);
2675+
split_points.resize(num_observations_to_keep);
2676+
std::sort(split_points.begin(), split_points.end());
2677+
}
26532678
}
26542679
}
26552680

2656-
output.col(num_predictors_used_in_the_affiliation) = compute_contribution_to_linear_predictor_from_specific_terms(X, relevant_term_indexes);
2681+
MatrixXd output{generate_combinations_and_one_additional_column(split_points_in_each_predictor)};
2682+
output.col(num_predictors_used_in_the_affiliation) = compute_contribution_to_linear_predictor_from_specific_terms(output.block(0, 0, output.rows(), output.cols() - 1),
2683+
relevant_term_indexes,
2684+
base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index]);
26572685

26582686
return output;
26592687
}

cpp/functions.h

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -544,33 +544,27 @@ double calculate_standard_deviation(const VectorXd &vector, const VectorXd &samp
544544

545545
MatrixXd generate_combinations_and_one_additional_column(const std::vector<std::vector<double>> &vectors)
546546
{
547-
int num_vectors = vectors.size();
548-
std::vector<int> sizes(num_vectors);
549-
int num_rows = 1;
547+
size_t num_vectors = vectors.size();
548+
std::vector<size_t> sizes(num_vectors);
549+
size_t num_rows = 1;
550550

551-
// Calculate the number of rows in the result matrix
552-
for (int i = 0; i < num_vectors; ++i)
551+
for (size_t i = 0; i < num_vectors; ++i)
553552
{
554553
sizes[i] = vectors[i].size();
555554
num_rows *= sizes[i];
556555
}
557556

558-
// Initialize the result matrix with an additional unused column
559557
MatrixXd result(num_rows, num_vectors + 1);
560558

561-
// Generate all combinations
562-
for (int row = 0; row < num_rows; ++row)
559+
for (size_t row = 0; row < num_rows; ++row)
563560
{
564-
int index = row;
565-
for (int col = 0; col < num_vectors; ++col)
561+
size_t index = row;
562+
for (size_t col = 0; col < num_vectors; ++col)
566563
{
567-
int vec_size = sizes[col];
564+
size_t vec_size = sizes[col];
568565
result(row, col) = vectors[col][index % vec_size];
569566
index /= vec_size;
570567
}
571-
// Set the additional unused column to zero (or any other value)
572-
result(row, num_vectors) = 0;
573568
}
574-
575569
return result;
576570
}

cpp/pythonbinding.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ PYBIND11_MODULE(aplr_cpp, m)
7474
.def("get_optimal_m", &APLRRegressor::get_optimal_m)
7575
.def("get_validation_tuning_metric", &APLRRegressor::get_validation_tuning_metric)
7676
.def("get_main_effect_shape", &APLRRegressor::get_main_effect_shape, py::arg("predictor_index"))
77-
.def("get_unique_term_affiliation_shape", &APLRRegressor::get_unique_term_affiliation_shape, py::arg("unique_term_affiliation"))
77+
.def("get_unique_term_affiliation_shape", &APLRRegressor::get_unique_term_affiliation_shape, py::arg("unique_term_affiliation"),
78+
py::arg("max_rows_before_sampling"))
7879
.def("get_cv_error", &APLRRegressor::get_cv_error)
7980
.def_readwrite("intercept", &APLRRegressor::intercept)
8081
.def_readwrite("m", &APLRRegressor::m)
Binary file not shown.

documentation/model_interpretation_for_regression.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Use the ***calculate_feature_importance*** method or the ***calculate_local_feat
1010
Use the ***get_main_effect_shape*** method or the ***get_unique_term_affiliation_shape*** method to interpret main effects as shown in this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). For each main effect, you may plot the output in a line plot.
1111

1212
## Interactions
13-
For best interpretability of interactions, do not use a higher ***max_interaction_level*** than 1. Use the ***get_unique_term_affiliation_shape*** method if your computer has enough memory (the method is currently very memory intensive when handling interaction terms and may crash without warning on larger models) or either of the ***calculate_local_feature_contribution*** or ***calculate_local_contribution_from_selected_terms*** methods to interpret interactions as shown in this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). For each two-way interaction of interest you may plot the output in a 3D surface plot.
13+
For best interpretability of interactions, do not use a higher ***max_interaction_level*** than 1. Use the ***get_unique_term_affiliation_shape*** method to interpret interactions as shown in this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). For each two-way interaction of interest you may plot the output in a 3D surface plot.
1414

1515
## Interpretation of model terms and their regression coefficients
1616
The above interpretations of main effects and interactions are sufficient to interpret an APLR model. However, it is possible to also inspect the underlying terms for those who wish to do so. For an example on how to interpret the terms in an APLR model, please see ***Section 5.1.3*** in the published article about APLR. You can find this article on [https://link.springer.com/article/10.1007/s00180-024-01475-4](https://link.springer.com/article/10.1007/s00180-024-01475-4) and [https://rdcu.be/dz7bF](https://rdcu.be/dz7bF).

examples/train_aplr_regression.py

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,7 @@
104104

105105
# Shapes for all term affiliations in the model. For each term affiliation, contains predictor values and the corresponding
106106
# contributions to the linear predictor. Plots are created for main effects and two-way interactions.
107-
# This is probably the most useful method to use for understanding how the model works but it is currently very memory intensive when
108-
# handling interactions and may crash without warning on larger models. Consider using either of the calculate_local_feature_contribution
109-
# or calculate_local_contribution_from_selected_terms methods to interpret interactions on larger models.
107+
# This is probably the most useful method to use for understanding how the model works.
110108
shapes: Dict[str, pd.DataFrame] = {}
111109
predictors_in_each_affiliation = (
112110
best_model.get_base_predictors_in_each_unique_term_affiliation()
@@ -162,34 +160,6 @@
162160
best_model.calculate_local_feature_contribution(data_train[predictors]),
163161
columns=best_model.get_unique_term_affiliations(),
164162
)
165-
# Combining predictor values with local feature contribution for the second feature in best_model.get_unique_term_affiliations().
166-
# This can be visualized if it is a main effect or a two-way interaction.
167-
unique_term_affiliation_index = 1
168-
predictors_in_the_second_feature = [
169-
predictors[predictor_index]
170-
for predictor_index in best_model.get_base_predictors_in_each_unique_term_affiliation()[
171-
unique_term_affiliation_index
172-
]
173-
]
174-
data_to_visualize = pd.DataFrame(
175-
np.concatenate(
176-
(
177-
data_train[predictors_in_the_second_feature].values,
178-
local_feature_contribution[
179-
[
180-
best_model.get_unique_term_affiliations()[
181-
unique_term_affiliation_index
182-
]
183-
]
184-
],
185-
),
186-
axis=1,
187-
),
188-
columns=predictors_in_the_second_feature
189-
+ [
190-
f"contribution from {best_model.get_unique_term_affiliations()[unique_term_affiliation_index]}"
191-
],
192-
)
193163

194164
# Local (observation specific) contribution to the linear predictor from selected interacting predictors.
195165
# In this example this concerns two-way interaction terms in the model where the fourth and the seventh predictors in X interact.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
setuptools.setup(
2929
name="aplr",
30-
version="10.4.1",
30+
version="10.4.2",
3131
description="Automatic Piecewise Linear Regression",
3232
ext_modules=[sfc_module],
3333
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)