10.11.0

mathias-von-ottenbreit · mathias-von-ottenbreit · commit 5591d2473a29 · 2025-09-12T19:47:42.000+02:00
diff --git a/API_REFERENCE_FOR_REGRESSION.md b/API_REFERENCE_FOR_REGRESSION.md
@@ -343,7 +343,7 @@ A numpy matrix with predictor values.
 The index of the predictor. So if ***predictor_index*** is ***1*** then the second predictor in ***X*** is used.
 
 
-## Method: get_unique_term_affiliation_shape(unique_term_affiliation:str, max_rows_before_sampling:int = 100000)
+## Method: get_unique_term_affiliation_shape(unique_term_affiliation:str, max_rows_before_sampling:int = 500000, additional_points: int = 250)
 
 ***Returns a matrix containing one column for each predictor used in the unique term affiliation, in addition to one column for the contribution to the linear predictor. For main effects or two-way interactions this can be visualized in for example line plots and surface plots respectively. See this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py).***
 
@@ -355,6 +355,9 @@ A string specifying which unique_term_affiliation to use.
 #### max_rows_before_sampling
 Prevents the output from having significantly more than ***max_rows_before_sampling*** rows by randomly sampling if necessary. This threshold can be triggered for example in interaction terms in larger models.
 
+#### additional_points
+Used for two-way or higher-order interactions. Specifies the number of evenly spaced points to add to the output - on top of split points for each predictor and nearby points - before any random sampling is applied. Valid values are zero or greater. This helps generate enough points to visualize the interaction effect smoothly and avoid artifacts from sparse data. If set to 0 then no points are added. A default of 250 is typically sufficient for most use cases, but this may be too high if the number of points is already high enough without added points or if the interaction order is high.
+
 
 ## Method: get_cv_error()
 
diff --git a/aplr/aplr.py b/aplr/aplr.py
@@ -300,10 +300,13 @@ def get_main_effect_shape(self, predictor_index: int) -> Dict[float, float]:
         return self.APLRRegressor.get_main_effect_shape(predictor_index)
 
     def get_unique_term_affiliation_shape(
-        self, unique_term_affiliation: str, max_rows_before_sampling: int = 100000
+        self,
+        unique_term_affiliation: str,
+        max_rows_before_sampling: int = 500000,
+        additional_points: int = 250,
     ) -> FloatMatrix:
         return self.APLRRegressor.get_unique_term_affiliation_shape(
-            unique_term_affiliation, max_rows_before_sampling
+            unique_term_affiliation, max_rows_before_sampling, additional_points
         )
 
     def get_cv_error(self) -> float:
diff --git a/cpp/APLRRegressor.h b/cpp/APLRRegressor.h
@@ -299,7 +299,7 @@ class APLRRegressor
     size_t get_optimal_m();
     std::string get_validation_tuning_metric();
     std::map<double, double> get_main_effect_shape(size_t predictor_index);
-    MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling = 100000);
+    MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling = 500000, size_t additional_points = 250);
     MatrixXd generate_predictor_values_and_contribution(const std::vector<size_t> &relevant_term_indexes,
                                                         size_t unique_term_affiliation_index);
     double get_cv_error();
@@ -2705,7 +2705,7 @@ VectorXd APLRRegressor::compute_contribution_to_linear_predictor_from_specific_t
     return contribution_from_specific_terms;
 }
 
-MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling)
+MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling, size_t additional_points)
 {
     if (model_has_not_been_trained())
         throw std::runtime_error("The model must have been trained before using get_unique_term_affiliation_shape().");
@@ -2729,6 +2729,22 @@ MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &uni
     for (size_t i = 0; i < num_predictors_used_in_the_affiliation; ++i)
     {
         split_points_in_each_predictor[i] = compute_split_points(base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index][i], relevant_term_indexes);
+
+        if (num_predictors_used_in_the_affiliation > 1 && additional_points > 0)
+        {
+            double min_val = *std::min_element(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end());
+            double max_val = *std::max_element(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end());
+            std::vector<double> interpolated;
+            interpolated.reserve(additional_points);
+            for (size_t j = 1; j <= additional_points; ++j)
+            {
+                double val = min_val + (max_val - min_val) * j / (additional_points + 1);
+                interpolated.push_back(val);
+            }
+            split_points_in_each_predictor[i].insert(split_points_in_each_predictor[i].end(), interpolated.begin(), interpolated.end());
+            std::sort(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end());
+            split_points_in_each_predictor[i].erase(std::unique(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end()), split_points_in_each_predictor[i].end());
+        }
     }
 
     size_t num_split_point_combinations = 1;
diff --git a/cpp/pythonbinding.cpp b/cpp/pythonbinding.cpp
@@ -77,7 +77,7 @@ PYBIND11_MODULE(aplr_cpp, m)
         .def("get_validation_tuning_metric", &APLRRegressor::get_validation_tuning_metric)
         .def("get_main_effect_shape", &APLRRegressor::get_main_effect_shape, py::arg("predictor_index"))
         .def("get_unique_term_affiliation_shape", &APLRRegressor::get_unique_term_affiliation_shape, py::arg("unique_term_affiliation"),
-             py::arg("max_rows_before_sampling") = 100000)
+             py::arg("max_rows_before_sampling") = 500000, py::arg("additional_points") = 250)
         .def("get_cv_error", &APLRRegressor::get_cv_error)
         .def("set_intercept", &APLRRegressor::set_intercept, py::arg("value"))
         .def("remove_provided_custom_functions", &APLRRegressor::remove_provided_custom_functions)
diff --git a/cpp/tests.cpp b/cpp/tests.cpp
@@ -1772,8 +1772,8 @@ class Tests
         tests.push_back(li_for_particular_terms_mean_is_correct);
         tests.push_back(base_predictors_in_the_second_affiliation == correct_base_predictors_in_the_second_affiliation);
         tests.push_back(the_second_unique_term_affiliation == the_correct_second_unique_term_affiliation);
-        tests.push_back(is_approximately_equal(unique_term_affiliation_shape.mean(), 85.582024243321399));
-        tests.push_back(unique_term_affiliation_shape.rows() == 36);
+        tests.push_back(is_approximately_equal(unique_term_affiliation_shape.mean(), 85.239971686680235));
+        tests.push_back(unique_term_affiliation_shape.rows() == 65536);
         tests.push_back(unique_term_affiliation_shape.cols() == 3);
         tests.push_back(main_effect_shape_keys == unique_term_affiliation_shape_for_X2.col(0));
         tests.push_back(main_effect_shape_values == unique_term_affiliation_shape_for_X2.col(1));
diff --git a/documentation/APLR 10.11.0.pdf b/documentation/APLR 10.11.0.pdf
diff --git a/examples/train_aplr_regression.py b/examples/train_aplr_regression.py
@@ -129,17 +129,28 @@
         plt.savefig(f"shape of {affiliation}.png")
         plt.close()
     elif is_two_way_interaction:
-        plt.figure()
-        ax = plt.axes(projection="3d")
-        ax.plot_trisurf(
-            shape_df.iloc[:, 0],
-            shape_df.iloc[:, 1],
-            shape_df.iloc[:, 2],
-            cmap="Greys",
+        pivot_table = shape_df.pivot_table(
+            index=shape_df.columns[0],
+            columns=shape_df.columns[1],
+            values=shape_df.columns[2],
+            aggfunc="mean",
         )
-        ax.set_xlabel(shape_df.columns[0])
-        ax.set_ylabel(shape_df.columns[1])
-        ax.set_zlabel("contribution")
+        plt.figure(figsize=(8, 6))
+        plt.imshow(
+            pivot_table.values,
+            aspect="auto",
+            origin="lower",
+            extent=[
+                pivot_table.columns.min(),
+                pivot_table.columns.max(),
+                pivot_table.index.min(),
+                pivot_table.index.max(),
+            ],
+            cmap="Blues_r",
+        )
+        plt.colorbar(label="contribution")
+        plt.xlabel(shape_df.columns[1])
+        plt.ylabel(shape_df.columns[0])
         plt.title("Contribution to the linear predictor")
         plt.savefig(f"shape of {affiliation}.png")
         plt.close()
@@ -183,7 +194,7 @@
 mse = ((data_test[response] - data_test[predicted]) ** 2).mean()
 mae = (data_test[response] - data_test[predicted]).abs().mean()
 goodness_of_fit = pd.DataFrame(
-    {"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"][0]]}
+    {"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"].iloc[0]]}
 )
 goodness_of_fit["r_squared"] = goodness_of_fit["correlation"] ** 2
 
diff --git a/examples/train_aplr_regression_using_aplr_tuner.py b/examples/train_aplr_regression_using_aplr_tuner.py
@@ -120,17 +120,28 @@
         plt.savefig(f"shape of {affiliation}.png")
         plt.close()
     elif is_two_way_interaction:
-        plt.figure()
-        ax = plt.axes(projection="3d")
-        ax.plot_trisurf(
-            shape_df.iloc[:, 0],
-            shape_df.iloc[:, 1],
-            shape_df.iloc[:, 2],
-            cmap="Greys",
+        pivot_table = shape_df.pivot_table(
+            index=shape_df.columns[0],
+            columns=shape_df.columns[1],
+            values=shape_df.columns[2],
+            aggfunc="mean",
         )
-        ax.set_xlabel(shape_df.columns[0])
-        ax.set_ylabel(shape_df.columns[1])
-        ax.set_zlabel("contribution")
+        plt.figure(figsize=(8, 6))
+        plt.imshow(
+            pivot_table.values,
+            aspect="auto",
+            origin="lower",
+            extent=[
+                pivot_table.columns.min(),
+                pivot_table.columns.max(),
+                pivot_table.index.min(),
+                pivot_table.index.max(),
+            ],
+            cmap="Blues_r",
+        )
+        plt.colorbar(label="contribution")
+        plt.xlabel(shape_df.columns[1])
+        plt.ylabel(shape_df.columns[0])
         plt.title("Contribution to the linear predictor")
         plt.savefig(f"shape of {affiliation}.png")
         plt.close()
@@ -174,7 +185,7 @@
 mse = ((data_test[response] - data_test[predicted]) ** 2).mean()
 mae = (data_test[response] - data_test[predicted]).abs().mean()
 goodness_of_fit = pd.DataFrame(
-    {"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"][0]]}
+    {"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"].iloc[0]]}
 )
 goodness_of_fit["r_squared"] = goodness_of_fit["correlation"] ** 2
 
diff --git a/setup.py b/setup.py
@@ -28,7 +28,7 @@
 
 setuptools.setup(
     name="aplr",
-    version="10.10.0",
+    version="10.11.0",
     description="Automatic Piecewise Linear Regression",
     ext_modules=[sfc_module],
     author="Mathias von Ottenbreit",