Skip to content

Commit 5591d24

Browse files
10.11.0
1 parent 11b74f3 commit 5591d24

File tree

9 files changed

+75
-31
lines changed

9 files changed

+75
-31
lines changed

API_REFERENCE_FOR_REGRESSION.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ A numpy matrix with predictor values.
343343
The index of the predictor. So if ***predictor_index*** is ***1*** then the second predictor in ***X*** is used.
344344

345345

346-
## Method: get_unique_term_affiliation_shape(unique_term_affiliation:str, max_rows_before_sampling:int = 100000)
346+
## Method: get_unique_term_affiliation_shape(unique_term_affiliation:str, max_rows_before_sampling:int = 500000, additional_points: int = 250)
347347

348348
***Returns a matrix containing one column for each predictor used in the unique term affiliation, in addition to one column for the contribution to the linear predictor. For main effects or two-way interactions this can be visualized in for example line plots and surface plots respectively. See this [example](https://github.com/ottenbreit-data-science/aplr/blob/main/examples/train_aplr_regression.py).***
349349

@@ -355,6 +355,9 @@ A string specifying which unique_term_affiliation to use.
355355
#### max_rows_before_sampling
356356
Prevents the output from having significantly more than ***max_rows_before_sampling*** rows by randomly sampling if necessary. This threshold can be triggered for example in interaction terms in larger models.
357357

358+
#### additional_points
359+
Used for two-way or higher-order interactions. Specifies the number of evenly spaced points to add to the output - on top of split points for each predictor and nearby points - before any random sampling is applied. Valid values are zero or greater. This helps generate enough points to visualize the interaction effect smoothly and avoid artifacts from sparse data. If set to 0 then no points are added. A default of 250 is typically sufficient for most use cases, but this may be too high if the number of points is already high enough without added points or if the interaction order is high.
360+
358361

359362
## Method: get_cv_error()
360363

aplr/aplr.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,10 +300,13 @@ def get_main_effect_shape(self, predictor_index: int) -> Dict[float, float]:
300300
return self.APLRRegressor.get_main_effect_shape(predictor_index)
301301

302302
def get_unique_term_affiliation_shape(
303-
self, unique_term_affiliation: str, max_rows_before_sampling: int = 100000
303+
self,
304+
unique_term_affiliation: str,
305+
max_rows_before_sampling: int = 500000,
306+
additional_points: int = 250,
304307
) -> FloatMatrix:
305308
return self.APLRRegressor.get_unique_term_affiliation_shape(
306-
unique_term_affiliation, max_rows_before_sampling
309+
unique_term_affiliation, max_rows_before_sampling, additional_points
307310
)
308311

309312
def get_cv_error(self) -> float:

cpp/APLRRegressor.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ class APLRRegressor
299299
size_t get_optimal_m();
300300
std::string get_validation_tuning_metric();
301301
std::map<double, double> get_main_effect_shape(size_t predictor_index);
302-
MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling = 100000);
302+
MatrixXd get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling = 500000, size_t additional_points = 250);
303303
MatrixXd generate_predictor_values_and_contribution(const std::vector<size_t> &relevant_term_indexes,
304304
size_t unique_term_affiliation_index);
305305
double get_cv_error();
@@ -2705,7 +2705,7 @@ VectorXd APLRRegressor::compute_contribution_to_linear_predictor_from_specific_t
27052705
return contribution_from_specific_terms;
27062706
}
27072707

2708-
MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling)
2708+
MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &unique_term_affiliation, size_t max_rows_before_sampling, size_t additional_points)
27092709
{
27102710
if (model_has_not_been_trained())
27112711
throw std::runtime_error("The model must have been trained before using get_unique_term_affiliation_shape().");
@@ -2729,6 +2729,22 @@ MatrixXd APLRRegressor::get_unique_term_affiliation_shape(const std::string &uni
27292729
for (size_t i = 0; i < num_predictors_used_in_the_affiliation; ++i)
27302730
{
27312731
split_points_in_each_predictor[i] = compute_split_points(base_predictors_in_each_unique_term_affiliation[unique_term_affiliation_index][i], relevant_term_indexes);
2732+
2733+
if (num_predictors_used_in_the_affiliation > 1 && additional_points > 0)
2734+
{
2735+
double min_val = *std::min_element(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end());
2736+
double max_val = *std::max_element(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end());
2737+
std::vector<double> interpolated;
2738+
interpolated.reserve(additional_points);
2739+
for (size_t j = 1; j <= additional_points; ++j)
2740+
{
2741+
double val = min_val + (max_val - min_val) * j / (additional_points + 1);
2742+
interpolated.push_back(val);
2743+
}
2744+
split_points_in_each_predictor[i].insert(split_points_in_each_predictor[i].end(), interpolated.begin(), interpolated.end());
2745+
std::sort(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end());
2746+
split_points_in_each_predictor[i].erase(std::unique(split_points_in_each_predictor[i].begin(), split_points_in_each_predictor[i].end()), split_points_in_each_predictor[i].end());
2747+
}
27322748
}
27332749

27342750
size_t num_split_point_combinations = 1;

cpp/pythonbinding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ PYBIND11_MODULE(aplr_cpp, m)
7777
.def("get_validation_tuning_metric", &APLRRegressor::get_validation_tuning_metric)
7878
.def("get_main_effect_shape", &APLRRegressor::get_main_effect_shape, py::arg("predictor_index"))
7979
.def("get_unique_term_affiliation_shape", &APLRRegressor::get_unique_term_affiliation_shape, py::arg("unique_term_affiliation"),
80-
py::arg("max_rows_before_sampling") = 100000)
80+
py::arg("max_rows_before_sampling") = 500000, py::arg("additional_points") = 250)
8181
.def("get_cv_error", &APLRRegressor::get_cv_error)
8282
.def("set_intercept", &APLRRegressor::set_intercept, py::arg("value"))
8383
.def("remove_provided_custom_functions", &APLRRegressor::remove_provided_custom_functions)

cpp/tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,8 +1772,8 @@ class Tests
17721772
tests.push_back(li_for_particular_terms_mean_is_correct);
17731773
tests.push_back(base_predictors_in_the_second_affiliation == correct_base_predictors_in_the_second_affiliation);
17741774
tests.push_back(the_second_unique_term_affiliation == the_correct_second_unique_term_affiliation);
1775-
tests.push_back(is_approximately_equal(unique_term_affiliation_shape.mean(), 85.582024243321399));
1776-
tests.push_back(unique_term_affiliation_shape.rows() == 36);
1775+
tests.push_back(is_approximately_equal(unique_term_affiliation_shape.mean(), 85.239971686680235));
1776+
tests.push_back(unique_term_affiliation_shape.rows() == 65536);
17771777
tests.push_back(unique_term_affiliation_shape.cols() == 3);
17781778
tests.push_back(main_effect_shape_keys == unique_term_affiliation_shape_for_X2.col(0));
17791779
tests.push_back(main_effect_shape_values == unique_term_affiliation_shape_for_X2.col(1));
Binary file not shown.

examples/train_aplr_regression.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -129,17 +129,28 @@
129129
plt.savefig(f"shape of {affiliation}.png")
130130
plt.close()
131131
elif is_two_way_interaction:
132-
plt.figure()
133-
ax = plt.axes(projection="3d")
134-
ax.plot_trisurf(
135-
shape_df.iloc[:, 0],
136-
shape_df.iloc[:, 1],
137-
shape_df.iloc[:, 2],
138-
cmap="Greys",
132+
pivot_table = shape_df.pivot_table(
133+
index=shape_df.columns[0],
134+
columns=shape_df.columns[1],
135+
values=shape_df.columns[2],
136+
aggfunc="mean",
139137
)
140-
ax.set_xlabel(shape_df.columns[0])
141-
ax.set_ylabel(shape_df.columns[1])
142-
ax.set_zlabel("contribution")
138+
plt.figure(figsize=(8, 6))
139+
plt.imshow(
140+
pivot_table.values,
141+
aspect="auto",
142+
origin="lower",
143+
extent=[
144+
pivot_table.columns.min(),
145+
pivot_table.columns.max(),
146+
pivot_table.index.min(),
147+
pivot_table.index.max(),
148+
],
149+
cmap="Blues_r",
150+
)
151+
plt.colorbar(label="contribution")
152+
plt.xlabel(shape_df.columns[1])
153+
plt.ylabel(shape_df.columns[0])
143154
plt.title("Contribution to the linear predictor")
144155
plt.savefig(f"shape of {affiliation}.png")
145156
plt.close()
@@ -183,7 +194,7 @@
183194
mse = ((data_test[response] - data_test[predicted]) ** 2).mean()
184195
mae = (data_test[response] - data_test[predicted]).abs().mean()
185196
goodness_of_fit = pd.DataFrame(
186-
{"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"][0]]}
197+
{"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"].iloc[0]]}
187198
)
188199
goodness_of_fit["r_squared"] = goodness_of_fit["correlation"] ** 2
189200

examples/train_aplr_regression_using_aplr_tuner.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,28 @@
120120
plt.savefig(f"shape of {affiliation}.png")
121121
plt.close()
122122
elif is_two_way_interaction:
123-
plt.figure()
124-
ax = plt.axes(projection="3d")
125-
ax.plot_trisurf(
126-
shape_df.iloc[:, 0],
127-
shape_df.iloc[:, 1],
128-
shape_df.iloc[:, 2],
129-
cmap="Greys",
123+
pivot_table = shape_df.pivot_table(
124+
index=shape_df.columns[0],
125+
columns=shape_df.columns[1],
126+
values=shape_df.columns[2],
127+
aggfunc="mean",
130128
)
131-
ax.set_xlabel(shape_df.columns[0])
132-
ax.set_ylabel(shape_df.columns[1])
133-
ax.set_zlabel("contribution")
129+
plt.figure(figsize=(8, 6))
130+
plt.imshow(
131+
pivot_table.values,
132+
aspect="auto",
133+
origin="lower",
134+
extent=[
135+
pivot_table.columns.min(),
136+
pivot_table.columns.max(),
137+
pivot_table.index.min(),
138+
pivot_table.index.max(),
139+
],
140+
cmap="Blues_r",
141+
)
142+
plt.colorbar(label="contribution")
143+
plt.xlabel(shape_df.columns[1])
144+
plt.ylabel(shape_df.columns[0])
134145
plt.title("Contribution to the linear predictor")
135146
plt.savefig(f"shape of {affiliation}.png")
136147
plt.close()
@@ -174,7 +185,7 @@
174185
mse = ((data_test[response] - data_test[predicted]) ** 2).mean()
175186
mae = (data_test[response] - data_test[predicted]).abs().mean()
176187
goodness_of_fit = pd.DataFrame(
177-
{"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"][0]]}
188+
{"mse": [mse], "mae": [mae], "correlation": [correlation["prediction"].iloc[0]]}
178189
)
179190
goodness_of_fit["r_squared"] = goodness_of_fit["correlation"] ** 2
180191

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
setuptools.setup(
3030
name="aplr",
31-
version="10.10.0",
31+
version="10.11.0",
3232
description="Automatic Piecewise Linear Regression",
3333
ext_modules=[sfc_module],
3434
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)