Skip to content

Commit 068ef00

Browse files
Added method to get validation indexes
1 parent fc92b0a commit 068ef00

File tree

6 files changed

+33
-10
lines changed

6 files changed

+33
-10
lines changed

API_REFERENCE.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,9 @@ The index of the term selected. So ***0*** is the first term, ***1*** is the sec
184184

185185
## Method: get_validation_group_mse()
186186

187-
***Returns mean squared error on grouped data in the validation set.*** See ***group_size_for_validation_group_mse*** for more information.
187+
***Returns mean squared error on grouped data in the validation set.*** See ***group_size_for_validation_group_mse*** for more information.
188+
189+
190+
## Method: get_validation_indexes()
191+
192+
***Returns a numpy vector containing the indexes of the training data observations used for validation and not training.***

aplr/aplr.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ def get_m(self)->int:
9393

9494
def get_validation_group_mse(self)->float:
9595
return self.APLRRegressor.get_validation_group_mse()
96+
97+
def get_validation_indexes(self)->npt.ArrayLike:
98+
return self.APLRRegressor.get_validation_indexes()
9699

97100
#For sklearn
98101
def get_params(self, deep=True):

cpp/APLRRegressor.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class APLRRegressor
136136
double max_training_prediction_or_response;
137137
double validation_group_mse;
138138
size_t group_size_for_validation_group_mse;
139+
std::vector<size_t> validation_indexes;
139140

140141
//Methods
141142
APLRRegressor(size_t m=1000,double v=0.1,uint_fast32_t random_state=std::numeric_limits<uint_fast32_t>::lowest(),std::string family="gaussian",
@@ -161,6 +162,7 @@ class APLRRegressor
161162
VectorXd get_intercept_steps();
162163
size_t get_m();
163164
double get_validation_group_mse();
165+
std::vector<size_t> get_validation_indexes();
164166
};
165167

166168
//Regular constructor
@@ -174,7 +176,8 @@ APLRRegressor::APLRRegressor(size_t m,double v,uint_fast32_t random_state,std::s
174176
intercept_steps{VectorXd(0)},max_interactions{max_interactions},interactions_eligible{0},validation_error_steps{VectorXd(0)},
175177
min_observations_in_split{min_observations_in_split},ineligible_boosting_steps_added{ineligible_boosting_steps_added},
176178
max_eligible_terms{max_eligible_terms},number_of_base_terms{0},tweedie_power{tweedie_power},min_training_prediction_or_response{NAN_DOUBLE},
177-
max_training_prediction_or_response{NAN_DOUBLE},validation_group_mse{NAN_DOUBLE},group_size_for_validation_group_mse{group_size_for_validation_group_mse}
179+
max_training_prediction_or_response{NAN_DOUBLE},validation_group_mse{NAN_DOUBLE},group_size_for_validation_group_mse{group_size_for_validation_group_mse},
180+
validation_indexes{std::vector<size_t>(0)}
178181
{
179182
}
180183

@@ -190,7 +193,7 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other):
190193
max_eligible_terms{other.max_eligible_terms},number_of_base_terms{other.number_of_base_terms},
191194
feature_importance{other.feature_importance},tweedie_power{other.tweedie_power},min_training_prediction_or_response{other.min_training_prediction_or_response},
192195
max_training_prediction_or_response{other.max_training_prediction_or_response},validation_group_mse{other.validation_group_mse},
193-
group_size_for_validation_group_mse{other.group_size_for_validation_group_mse}
196+
group_size_for_validation_group_mse{other.group_size_for_validation_group_mse},validation_indexes{other.validation_indexes}
194197
{
195198
}
196199

@@ -363,20 +366,20 @@ void APLRRegressor::define_training_and_validation_sets(const MatrixXd &X,const
363366
{
364367
size_t y_size{static_cast<size_t>(y.size())};
365368
std::vector<size_t> train_indexes;
366-
std::vector<size_t> validation_indexes;
367369
bool use_validation_set_indexes{validation_set_indexes.size()>0};
368370
if(use_validation_set_indexes)
369371
{
370372
std::vector<size_t> all_indexes(y_size);
371373
std::iota(std::begin(all_indexes),std::end(all_indexes),0);
372374
validation_indexes=validation_set_indexes;
373375
train_indexes.reserve(y_size-validation_indexes.size());
374-
std::remove_copy_if(all_indexes.begin(),all_indexes.end(),std::back_inserter(train_indexes),[&validation_indexes](const size_t &arg)
376+
std::remove_copy_if(all_indexes.begin(),all_indexes.end(),std::back_inserter(train_indexes),[this](const size_t &arg)
375377
{ return (std::find(validation_indexes.begin(),validation_indexes.end(),arg) != validation_indexes.end());});
376378
}
377379
else
378380
{
379381
train_indexes.reserve(y_size);
382+
validation_indexes = std::vector<size_t>(0);
380383
validation_indexes.reserve(y_size);
381384
std::mt19937 mersenne{random_state};
382385
std::uniform_real_distribution<double> distribution(0.0,1.0);
@@ -1354,4 +1357,9 @@ size_t APLRRegressor::get_m()
13541357
double APLRRegressor::get_validation_group_mse()
13551358
{
13561359
return validation_group_mse;
1360+
}
1361+
1362+
std::vector<size_t> APLRRegressor::get_validation_indexes()
1363+
{
1364+
return validation_indexes;
13571365
}

cpp/pythonbinding.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ PYBIND11_MODULE(aplr_cpp, m) {
3636
.def("get_intercept_steps", &APLRRegressor::get_intercept_steps)
3737
.def("get_m", &APLRRegressor::get_m)
3838
.def("get_validation_group_mse", &APLRRegressor::get_validation_group_mse)
39+
.def("get_validation_indexes", &APLRRegressor::get_validation_indexes)
3940
.def_readwrite("intercept", &APLRRegressor::intercept)
4041
.def_readwrite("intercept_steps", &APLRRegressor::intercept_steps)
4142
.def_readwrite("m", &APLRRegressor::m)
@@ -64,17 +65,18 @@ PYBIND11_MODULE(aplr_cpp, m) {
6465
.def_readwrite("max_training_prediction_or_response",&APLRRegressor::max_training_prediction_or_response)
6566
.def_readwrite("validation_group_mse",&APLRRegressor::validation_group_mse)
6667
.def_readwrite("group_size_for_validation_group_mse",&APLRRegressor::group_size_for_validation_group_mse)
68+
.def_readwrite("validation_indexes",&APLRRegressor::validation_indexes)
6769
.def(py::pickle(
6870
[](const APLRRegressor &a) { // __getstate__
6971
/* Return a tuple that fully encodes the state of the object */
7072
return py::make_tuple(a.m,a.v,a.random_state,a.family,a.n_jobs,a.validation_ratio,a.intercept,a.bins,a.verbosity,
7173
a.max_interaction_level,a.max_interactions,a.validation_error_steps,a.term_names,a.term_coefficients,a.terms,a.intercept_steps,
7274
a.interactions_eligible,a.min_observations_in_split,a.ineligible_boosting_steps_added,a.max_eligible_terms,
7375
a.number_of_base_terms,a.feature_importance,a.link_function,a.tweedie_power,a.min_training_prediction_or_response,a.max_training_prediction_or_response,
74-
a.validation_group_mse,a.group_size_for_validation_group_mse);
76+
a.validation_group_mse,a.group_size_for_validation_group_mse,a.validation_indexes);
7577
},
7678
[](py::tuple t) { // __setstate__
77-
if (t.size() != 28)
79+
if (t.size() != 29)
7880
throw std::runtime_error("Invalid state!");
7981

8082
/* Create a new C++ instance */
@@ -97,6 +99,7 @@ PYBIND11_MODULE(aplr_cpp, m) {
9799
a.max_training_prediction_or_response=t[25].cast<double>();
98100
a.validation_group_mse=t[26].cast<double>();
99101
a.group_size_for_validation_group_mse=t[27].cast<size_t>();
102+
a.validation_indexes=t[28].cast<std::vector<size_t>>();
100103

101104
return a;
102105
}

cpp/test ALRRegressor.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ int main()
4242
//model.fit(X_train,y_train);
4343
//model.fit(X_train,y_train,sample_weight);
4444
//model.fit(X_train,y_train,sample_weight,{},{0,1,2,3,4,5,10,static_cast<size_t>(y_train.size()-1)});
45-
model.fit(X_train,y_train,sample_weight,{},{0,1,2,3,4,5,10,static_cast<size_t>(y_train.size()-1)},{1,8});
45+
std::vector<size_t> validation_indexes{0,1,2,3,4,5,10,static_cast<size_t>(y_train.size()-1)};
46+
std::vector<size_t> prioritized_predictor_indexes{1,8};
47+
model.fit(X_train,y_train,sample_weight,{},validation_indexes,prioritized_predictor_indexes);
4648
std::cout<<"feature importance\n"<<model.feature_importance<<"\n\n";
4749

4850
VectorXd predictions{model.predict(X_test)};
@@ -54,7 +56,9 @@ int main()
5456
std::cout<<predictions.mean()<<"\n\n";
5557
tests.push_back(is_approximately_equal(predictions.mean(),23.5049,0.00001));
5658

57-
//std::cout<<model.validation_error_steps<<"\n\n";
59+
std::vector<size_t> validation_indexes_from_model{model.get_validation_indexes()};
60+
bool validation_indexes_from_model_are_correct{validation_indexes_from_model == validation_indexes};
61+
tests.push_back(validation_indexes_from_model_are_correct);
5862

5963
//Test summary
6064
std::cout<<"\n\nTest summary\n"<<"Passed "<<std::accumulate(tests.begin(),tests.end(),0)<<" out of "<<tests.size()<<" tests.";

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setuptools.setup(
1717
name='aplr',
18-
version='1.11.0',
18+
version='1.12.0',
1919
description='Automatic Piecewise Linear Regression',
2020
ext_modules=[sfc_module],
2121
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)