bugfix

mathias-von-ottenbreit · mathias-von-ottenbreit · commit 0eb1e30b88d8 · 2024-06-10T20:15:46.000+02:00
diff --git a/API_REFERENCE_FOR_REGRESSION.md b/API_REFERENCE_FOR_REGRESSION.md
@@ -331,15 +331,23 @@ A numpy matrix with predictor values.
 
 ***For the predictor in X specified by predictor_index, get_main_effect_shape returns a dictionary with keys equal to predictor values and values equal to the corresponding contribution to the linear predictor (interactions with other predictors are ignored). This method makes it easier to interpret main effects, for example by visualizing the output in a line plot.***
 
+### Parameters
+
+#### predictor_index
+The index of the predictor. So if ***predictor_index*** is ***1*** then the second predictor in ***X*** is used.
 
-## Method: get_unique_term_affiliation_shape(unique_term_affiliation: str)
 
-***Returns a matrix containing one column for each predictor used in the unique term affiliation, in addition to one column for the contribution to the linear predictor. For main effects or two-way interactions this can be visualized in for example line plots and surface plots respectively. See this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). Please note that the get_unique_term_affiliation_shape method is currently very memory intensive when handling interactions and may crash without warning on larger models. Consider using either of the calculate_local_feature_contribution or calculate_local_contribution_from_selected_terms methods to interpret interactions on larger models.***
+## Method: get_unique_term_affiliation_shape(unique_term_affiliation:str, max_rows_before_sampling:int = 100000)
+
+***Returns a matrix containing one column for each predictor used in the unique term affiliation, in addition to one column for the contribution to the linear predictor. For main effects or two-way interactions this can be visualized in for example line plots and surface plots respectively. See this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py).***
 
 ### Parameters
 
-#### predictor_index
-The index of the predictor. So if ***predictor_index*** is ***1*** then the second predictor in ***X*** is used.
+#### unique_term_affiliation
+A string specifying which unique_term_affiliation to use.
+
+#### max_rows_before_sampling
+Prevents the output from having significantly more than ***max_rows_before_sampling*** rows by randomly sampling if necessary. This threshold can be triggered for example in interaction terms in larger models.
 
 
 ## Method: get_cv_error()
diff --git a/aplr/aplr.py b/aplr/aplr.py
@@ -294,10 +294,10 @@ def get_main_effect_shape(self, predictor_index: int) -> Dict[float, float]:
         return self.APLRRegressor.get_main_effect_shape(predictor_index)
 
     def get_unique_term_affiliation_shape(
-        self, unique_term_affiliation: str
+        self, unique_term_affiliation: str, max_rows_before_sampling: int = 100000
     ) -> FloatMatrix:
         return self.APLRRegressor.get_unique_term_affiliation_shape(
-            unique_term_affiliation
+            unique_term_affiliation, max_rows_before_sampling
         )
 
     def get_cv_error(self) -> float:
diff --git a/cpp/APLRRegressor.h b/cpp/APLRRegressor.h
@@ -5,6 +5,7 @@
 #include <random>
 #include <vector>
 #include <thread>
+#include <unordered_map>
 #include "../dependencies/eigen-3.4.0/Eigen/Dense"
 #include "functions.h"
 #include "term.h"
@@ -187,7 +188,8 @@ class APLRRegressor
     bool model_has_not_been_trained();
     std::vector<size_t> compute_relevant_term_indexes(const std::string &unique_term_affiliation);
     std::vector<double> compute_split_points(size_t predictor_index, const std::vector<size_t> &relevant_term_indexes);
-    VectorXd compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X, const std::vector<size_t> &term_indexes);
+    VectorXd compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X, const std::vector<size_t> &term_indexes,
+                                                                          const std::vector<size_t> &base_predictors_used);
     void validate_sample_weight(const MatrixXd &X, const VectorXd &sample_weight);
     void set_term_coefficients();
 
@@ -291,7 +293,9 @@ class APLRRegressor
     size_t get_optimal_m();
     std::string get_validation_tuning_metric();
     std::map<double, double> get_main_effect_shape(size_t predictor_index);
-    MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation);
+    MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling = 100000);
+    MatrixXd generate_predictor_values_and_contribution(const std::vector<size_t> &relevant_term_indexes,
+                                                        size_t unique_term_affiliation_index);
     double get_cv_error();
 
     friend class APLRClassifier;
@@ -2524,12 +2528,13 @@ std::map<double, double> APLRRegressor::get_main_effect_shape(size_t predictor_i
     std::vector<size_t> relevant_term_indexes{compute_relevant_term_indexes(unique_term_affiliation)};
     std::vector<double> split_points{compute_split_points(predictor_index, relevant_term_indexes)};
 
-    MatrixXd X{MatrixXd::Constant(split_points.size(), number_of_base_terms, 0)};
+    MatrixXd X{MatrixXd::Constant(split_points.size(), 1, 0)};
     for (size_t i = 0; i < split_points.size(); ++i)
     {
-        X.col(predictor_index)[i] = split_points[i];
+        X(i, 0) = split_points[i];
     }
-    VectorXd contribution_to_linear_predictor{compute_contribution_to_linear_predictor_from_specific_terms(X, relevant_term_indexes)};
+    VectorXd contribution_to_linear_predictor{compute_contribution_to_linear_predictor_from_specific_terms(X, relevant_term_indexes,
+                                                                                                           {predictor_index})};
     for (size_t i = 0; i < split_points.size(); ++i)
     {
         main_effect_shape[split_points[i]] = contribution_to_linear_predictor[i];
@@ -2598,19 +2603,32 @@ std::vector<double> APLRRegressor::compute_split_points(size_t predictor_index,
     return split_points;
 }
 
-VectorXd APLRRegressor::compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X, const std::vector<size_t> &term_indexes)
+VectorXd APLRRegressor::compute_contribution_to_linear_predictor_from_specific_terms(const MatrixXd &X,
+                                                                                     const std::vector<size_t> &term_indexes,
+                                                                                     const std::vector<size_t> &base_predictors_used)
 {
-    VectorXd contribution_from_specific_terms{VectorXd::Constant(X.rows(), 0.0)};
-
+    VectorXd contribution_from_specific_terms = VectorXd::Zero(X.rows());
+    std::unordered_map<size_t, size_t> X_map;
+    for (size_t i = 0; i < base_predictors_used.size(); ++i)
+    {
+        X_map[base_predictors_used[i]] = i;
+    }
     for (auto &term_index_used : term_indexes)
     {
-        contribution_from_specific_terms += terms[term_index_used].calculate_contribution_to_linear_predictor(X);
+        auto &term = terms[term_index_used];
+        VectorXd contribution_from_this_term = term.coefficient * term.calculate_without_interactions(X.col(X_map[term.base_term]));
+        for (auto &given_term : term.given_terms)
+        {
+            VectorXd values_from_given_term = given_term.calculate_without_interactions(X.col(X_map[given_term.base_term]));
+            VectorXi indicator = calculate_indicator(values_from_given_term);
+            contribution_from_this_term = contribution_from_this_term.array() * indicator.cast<double>().array();
+        }
+        contribution_from_specific_terms += contribution_from_this_term;
     }
-
     return contribution_from_specific_terms;
 }
 
-MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation)
+MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling)
 {
     if (model_has_not_been_trained())
         throw std::runtime_error("The model must have been trained before using get_unique_term_affiliation_shape().");
@@ -2630,30 +2648,40 @@ MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &uni
     std::vector<size_t> relevant_term_indexes{compute_relevant_term_indexes(unique_term_affiliation)};
     size_t unique_term_affiliation_index{unique_term_affiliation_map[unique_term_affiliation]};
     size_t num_predictors_used_in_the_affiliation{base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index].size()};
-    if (num_predictors_used_in_the_affiliation > 1)
-    {
-        std::string warning{"Please note that the get_unique_term_affiliation_shape method is currently very memory intensive when handling interactions and may crash without warning on larger models. Consider using either of the calculate_local_feature_contribution or calculate_local_contribution_from_selected_terms methods to interpret interactions on larger models."};
-        std::cout << warning << std::endl;
-    }
-
     std::vector<std::vector<double>> split_points_in_each_predictor(num_predictors_used_in_the_affiliation);
     for (size_t i = 0; i < num_predictors_used_in_the_affiliation; ++i)
     {
         split_points_in_each_predictor[i] = compute_split_points(base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index][i], relevant_term_indexes);
     }
 
-    MatrixXd output{generate_combinations_and_one_additional_column(split_points_in_each_predictor)};
-
-    MatrixXd X{MatrixXd::Constant(output.rows(), number_of_base_terms, 0)};
-    for (Eigen::Index i = 0; i < output.rows(); ++i)
+    size_t num_split_point_combinations = 1;
+    for (size_t i = 0; i < split_points_in_each_predictor.size(); ++i)
+    {
+        num_split_point_combinations *= split_points_in_each_predictor[i].size();
+    }
+    bool need_to_sample{num_split_point_combinations > max_rows_before_sampling};
+    if (need_to_sample)
     {
-        for (size_t j = 0; j < num_predictors_used_in_the_affiliation; ++j)
+        double num_split_point_combinations_sqrt = std::sqrt(static_cast<double>(num_split_point_combinations));
+        double factor = std::pow(max_rows_before_sampling / num_split_point_combinations_sqrt, 1.0 / split_points_in_each_predictor.size());
+        std::mt19937 seed(random_state);
+        for (auto &split_points : split_points_in_each_predictor)
         {
-            X(i, base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index][j]) = output(i, j);
+            size_t current_num_observations = split_points.size();
+            size_t num_observations_to_keep = std::round(factor * std::sqrt(current_num_observations));
+            if (current_num_observations > num_observations_to_keep)
+            {
+                std::shuffle(split_points.begin(), split_points.end(), seed);
+                split_points.resize(num_observations_to_keep);
+                std::sort(split_points.begin(), split_points.end());
+            }
         }
     }
 
-    output.col(num_predictors_used_in_the_affiliation) = compute_contribution_to_linear_predictor_from_specific_terms(X, relevant_term_indexes);
+    MatrixXd output{generate_combinations_and_one_additional_column(split_points_in_each_predictor)};
+    output.col(num_predictors_used_in_the_affiliation) = compute_contribution_to_linear_predictor_from_specific_terms(output.block(0, 0, output.rows(), output.cols() - 1),
+                                                                                                                      relevant_term_indexes,
+                                                                                                                      base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index]);
 
     return output;
 }
diff --git a/cpp/functions.h b/cpp/functions.h
@@ -544,33 +544,27 @@ double calculate_standard_deviation(const VectorXd &vector, const VectorXd &samp
 
 MatrixXd generate_combinations_and_one_additional_column(const std::vector<std::vector<double>> &vectors)
 {
-    int num_vectors = vectors.size();
-    std::vector<int> sizes(num_vectors);
-    int num_rows = 1;
+    size_t num_vectors = vectors.size();
+    std::vector<size_t> sizes(num_vectors);
+    size_t num_rows = 1;
 
-    // Calculate the number of rows in the result matrix
-    for (int i = 0; i < num_vectors; ++i)
+    for (size_t i = 0; i < num_vectors; ++i)
     {
         sizes[i] = vectors[i].size();
         num_rows *= sizes[i];
     }
 
-    // Initialize the result matrix with an additional unused column
     MatrixXd result(num_rows, num_vectors + 1);
 
-    // Generate all combinations
-    for (int row = 0; row < num_rows; ++row)
+    for (size_t row = 0; row < num_rows; ++row)
     {
-        int index = row;
-        for (int col = 0; col < num_vectors; ++col)
+        size_t index = row;
+        for (size_t col = 0; col < num_vectors; ++col)
         {
-            int vec_size = sizes[col];
+            size_t vec_size = sizes[col];
             result(row, col) = vectors[col][index % vec_size];
             index /= vec_size;
         }
-        // Set the additional unused column to zero (or any other value)
-        result(row, num_vectors) = 0;
     }
-
     return result;
 }
diff --git a/cpp/pythonbinding.cpp b/cpp/pythonbinding.cpp
@@ -74,7 +74,8 @@ PYBIND11_MODULE(aplr_cpp, m)
         .def("get_optimal_m", &APLRRegressor::get_optimal_m)
         .def("get_validation_tuning_metric", &APLRRegressor::get_validation_tuning_metric)
         .def("get_main_effect_shape", &APLRRegressor::get_main_effect_shape, py::arg("predictor_index"))
-        .def("get_unique_term_affiliation_shape", &APLRRegressor::get_unique_term_affiliation_shape, py::arg("unique_term_affiliation"))
+        .def("get_unique_term_affiliation_shape", &APLRRegressor::get_unique_term_affiliation_shape, py::arg("unique_term_affiliation"),
+             py::arg("max_rows_before_sampling"))
         .def("get_cv_error", &APLRRegressor::get_cv_error)
         .def_readwrite("intercept", &APLRRegressor::intercept)
         .def_readwrite("m", &APLRRegressor::m)
diff --git a/documentation/APLR 10.4.2.pdf b/documentation/APLR 10.4.2.pdf
diff --git a/documentation/model_interpretation_for_regression.md b/documentation/model_interpretation_for_regression.md
@@ -10,7 +10,7 @@ Use the ***calculate_feature_importance*** method or the ***calculate_local_feat
 Use the ***get_main_effect_shape*** method or the ***get_unique_term_affiliation_shape*** method to interpret main effects as shown in this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). For each main effect, you may plot the output in a line plot.
 
 ## Interactions
-For best interpretability of interactions, do not use a higher ***max_interaction_level*** than 1. Use the ***get_unique_term_affiliation_shape*** method if your computer has enough memory (the method is currently very memory intensive when handling interaction terms and may crash without warning on larger models) or either of the ***calculate_local_feature_contribution*** or ***calculate_local_contribution_from_selected_terms*** methods to interpret interactions as shown in this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). For each two-way interaction of interest you may plot the output in a 3D surface plot.
+For best interpretability of interactions, do not use a higher ***max_interaction_level*** than 1. Use the ***get_unique_term_affiliation_shape*** method to interpret interactions as shown in this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py). For each two-way interaction of interest you may plot the output in a 3D surface plot.
 
 ## Interpretation of model terms and their regression coefficients
 The above interpretations of main effects and interactions are sufficient to interpret an APLR model. However, it is possible to also inspect the underlying terms for those who wish to do so. For an example on how to interpret the terms in an APLR model, please see ***Section 5.1.3*** in the published article about APLR. You can find this article on [https://link.springer.com/article/10.1007/s00180-024-01475-4](https://link.springer.com/article/10.1007/s00180-024-01475-4) and [https://rdcu.be/dz7bF](https://rdcu.be/dz7bF).
diff --git a/examples/train_aplr_regression.py b/examples/train_aplr_regression.py
@@ -104,9 +104,7 @@
 
 # Shapes for all term affiliations in the model. For each term affiliation, contains predictor values and the corresponding
 # contributions to the linear predictor. Plots are created for main effects and two-way interactions.
-# This is probably the most useful method to use for understanding how the model works but it is currently very memory intensive when 
-# handling interactions and may crash without warning on larger models. Consider using either of the calculate_local_feature_contribution 
-# or calculate_local_contribution_from_selected_terms methods to interpret interactions on larger models.
+# This is probably the most useful method to use for understanding how the model works.
 shapes: Dict[str, pd.DataFrame] = {}
 predictors_in_each_affiliation = (
     best_model.get_base_predictors_in_each_unique_term_affiliation()
@@ -162,34 +160,6 @@
     best_model.calculate_local_feature_contribution(data_train[predictors]),
     columns=best_model.get_unique_term_affiliations(),
 )
-# Combining predictor values with local feature contribution for the second feature in best_model.get_unique_term_affiliations().
-# This can be visualized if it is a main effect or a two-way interaction.
-unique_term_affiliation_index = 1
-predictors_in_the_second_feature = [
-    predictors[predictor_index]
-    for predictor_index in best_model.get_base_predictors_in_each_unique_term_affiliation()[
-        unique_term_affiliation_index
-    ]
-]
-data_to_visualize = pd.DataFrame(
-    np.concatenate(
-        (
-            data_train[predictors_in_the_second_feature].values,
-            local_feature_contribution[
-                [
-                    best_model.get_unique_term_affiliations()[
-                        unique_term_affiliation_index
-                    ]
-                ]
-            ],
-        ),
-        axis=1,
-    ),
-    columns=predictors_in_the_second_feature
-    + [
-        f"contribution from {best_model.get_unique_term_affiliations()[unique_term_affiliation_index]}"
-    ],
-)
 
 # Local (observation specific) contribution to the linear predictor from selected interacting predictors.
 # In this example this concerns two-way interaction terms in the model where the fourth and the seventh predictors in X interact.
diff --git a/setup.py b/setup.py
@@ -27,7 +27,7 @@
 
 setuptools.setup(
     name="aplr",
-    version="10.4.1",
+    version="10.4.2",
     description="Automatic Piecewise Linear Regression",
     ext_modules=[sfc_module],
     author="Mathias von Ottenbreit",

Original file line number	Diff line number	Diff line change
`@@ -544,33 +544,27 @@ double calculate_standard_deviation(const VectorXd &vector, const VectorXd &samp`
`544`	`544`
`545`	`545`	`MatrixXd generate_combinations_and_one_additional_column(const std::vector<std::vector<double>> &vectors)`
`546`	`546`	`{`
`547`		`- int num_vectors = vectors.size();`
`548`		`- std::vector<int> sizes(num_vectors);`
`549`		`- int num_rows = 1;`
	`547`	`+ size_t num_vectors = vectors.size();`
	`548`	`+ std::vector<size_t> sizes(num_vectors);`
	`549`	`+ size_t num_rows = 1;`
`550`	`550`
`551`		`- // Calculate the number of rows in the result matrix`
`552`		`- for (int i = 0; i < num_vectors; ++i)`
	`551`	`+ for (size_t i = 0; i < num_vectors; ++i)`
`553`	`552`	`{`
`554`	`553`	`sizes[i] = vectors[i].size();`
`555`	`554`	`num_rows *= sizes[i];`
`556`	`555`	`}`
`557`	`556`
`558`		`- // Initialize the result matrix with an additional unused column`
`559`	`557`	`MatrixXd result(num_rows, num_vectors + 1);`
`560`	`558`
`561`		`- // Generate all combinations`
`562`		`- for (int row = 0; row < num_rows; ++row)`
	`559`	`+ for (size_t row = 0; row < num_rows; ++row)`
`563`	`560`	`{`
`564`		`- int index = row;`
`565`		`- for (int col = 0; col < num_vectors; ++col)`
	`561`	`+ size_t index = row;`
	`562`	`+ for (size_t col = 0; col < num_vectors; ++col)`
`566`	`563`	`{`
`567`		`- int vec_size = sizes[col];`
	`564`	`+ size_t vec_size = sizes[col];`
`568`	`565`	`result(row, col) = vectors[col][index % vec_size];`
`569`	`566`	`index /= vec_size;`
`570`	`567`	`}`
`571`		`- // Set the additional unused column to zero (or any other value)`
`572`		`- result(row, num_vectors) = 0;`
`573`	`568`	`}`
`574`		`-`
`575`	`569`	`return result;`
`576`	`570`	`}`