Skip to content

Commit 37d7f58

Browse files
Merge pull request #2 from ottenbreit-data-science/err_handling
validating input
2 parents 1726dac + 78dbaa8 commit 37d7f58

File tree

5 files changed

+63
-6
lines changed

5 files changed

+63
-6
lines changed

cpp/APLRRegressor.h

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ class APLRRegressor
4646
bool abort_boosting;
4747

4848
//Methods
49-
void validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names);
49+
void validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes);
50+
void throw_error_if_validation_set_indexes_has_invalid_indexes(const VectorXd &y, const std::vector<size_t> &validation_set_indexes);
5051
void define_training_and_validation_sets(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight, const std::vector<size_t> &validation_set_indexes);
5152
void initialize(const MatrixXd &X);
5253
void add_term_to_terms_eligible_current(Term &term);
@@ -161,7 +162,7 @@ APLRRegressor::~APLRRegressor()
161162
//invalidating validation_ratio. The rest of indices are used to train.
162163
void APLRRegressor::fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names,const std::vector<size_t> &validation_set_indexes)
163164
{
164-
validate_input_to_fit(X,y,sample_weight,X_names);
165+
validate_input_to_fit(X,y,sample_weight,X_names,validation_set_indexes);
165166
define_training_and_validation_sets(X,y,sample_weight,validation_set_indexes);
166167
initialize(X);
167168
execute_boosting_steps();
@@ -173,12 +174,28 @@ void APLRRegressor::fit(const MatrixXd &X,const VectorXd &y,const VectorXd &samp
173174
cleanup_after_fit();
174175
}
175176

176-
void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names)
177+
void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes)
177178
{
178179
if(X.rows()!=y.size()) throw std::runtime_error("X and y must have the same number of rows.");
179180
if(X.rows()==0) throw std::runtime_error("X and y cannot have zero rows.");
180181
if(sample_weight.size()>0 && sample_weight.size()!=y.size()) throw std::runtime_error("sample_weight must have 0 or as many rows as X and y.");
181182
if(X_names.size()>0 && X_names.size()!=static_cast<size_t>(X.cols())) throw std::runtime_error("X_names must have as many columns as X.");
183+
throw_error_if_matrix_has_nan_or_infinite_elements(X, "X");
184+
throw_error_if_matrix_has_nan_or_infinite_elements(y, "y");
185+
throw_error_if_matrix_has_nan_or_infinite_elements(sample_weight, "sample_weight");
186+
throw_error_if_validation_set_indexes_has_invalid_indexes(y, validation_set_indexes);
187+
}
188+
189+
void APLRRegressor::throw_error_if_validation_set_indexes_has_invalid_indexes(const VectorXd &y, const std::vector<size_t> &validation_set_indexes)
190+
{
191+
bool validation_set_indexes_is_provided{validation_set_indexes.size()>0};
192+
if(validation_set_indexes_is_provided)
193+
{
194+
size_t max_index{*std::max_element(validation_set_indexes.begin(), validation_set_indexes.end())};
195+
bool validation_set_indexes_has_elements_out_of_bounds{max_index > static_cast<size_t>(y.size()-1)};
196+
if(validation_set_indexes_has_elements_out_of_bounds)
197+
throw std::runtime_error("validation_set_indexes has elements that are out of bounds.");
198+
}
182199
}
183200

184201
void APLRRegressor::define_training_and_validation_sets(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight, const std::vector<size_t> &validation_set_indexes)
@@ -815,6 +832,7 @@ void APLRRegressor::validate_that_model_can_be_used(const MatrixXd &X)
815832
if(X.rows()==0) throw std::runtime_error("X cannot have zero rows.");
816833
size_t cols_provided{static_cast<size_t>(X.cols())};
817834
if(cols_provided!=number_of_base_terms) throw std::runtime_error("X must have "+std::to_string(number_of_base_terms) +" columns but "+std::to_string(cols_provided)+" were provided.");
835+
throw_error_if_matrix_has_nan_or_infinite_elements(X, "X");
818836
}
819837

820838
void APLRRegressor::cleanup_after_fit()

cpp/functions.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,4 +183,27 @@ template <typename T> //type must implement a size() method
183183
size_t calculate_max_index_in_vector(T &vector)
184184
{
185185
return vector.size()-static_cast<size_t>(1);
186+
}
187+
188+
template <typename T> //type must be an Eigen Matrix or Vector
189+
bool check_if_matrix_has_nan_or_infinite_elements(const T &x)
190+
{
191+
bool matrix_has_nan_or_infinite_elements{!x.allFinite()};
192+
if(matrix_has_nan_or_infinite_elements)
193+
return true;
194+
else
195+
return false;
196+
}
197+
198+
template <typename T> //type must be an Eigen Matrix or Vector
199+
void throw_error_if_matrix_has_nan_or_infinite_elements(const T &x, const std::string &matrix_name)
200+
{
201+
bool matrix_is_empty{x.size()==0};
202+
if(matrix_is_empty) return;
203+
204+
bool matrix_has_nan_or_infinite_elements{check_if_matrix_has_nan_or_infinite_elements(x)};
205+
if(matrix_has_nan_or_infinite_elements)
206+
{
207+
throw std::runtime_error(matrix_name + " has nan or infinite elements.");
208+
}
186209
}

cpp/test ALRRegressor.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ int main()
3636
std::cout<<X_train;
3737

3838
//Fitting
39-
model.fit(X_train,y_train,sample_weight);
40-
//model.fit(X_train,y_train,sample_weight,{},{0,1,2,3,4,5,10,static_cast<size_t>(y_train.size()-1)});
39+
//model.fit(X_train,y_train);
40+
//model.fit(X_train,y_train,sample_weight);
41+
model.fit(X_train,y_train,sample_weight,{},{0,1,2,3,4,5,10,static_cast<size_t>(y_train.size()-1)});
4142
std::cout<<"feature importance\n"<<model.feature_importance<<"\n\n";
4243

4344
VectorXd predictions{model.predict(X_test)};

cpp/test functions.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,21 @@ int main()
5151
double error_mae_sw{calculate_error(errors_mae_sw,sample_weight)};
5252
std::cout<<"error_mae_sw: "<<error_mae_sw<<"\n\n";
5353
tests.push_back((check_if_approximately_equal(error_mae_sw,0.5666,0.0001)?true:false));
54+
55+
//testing for nan and infinity
56+
//matrix without nan or inf
57+
bool matrix_has_nan_or_inf_elements{check_if_matrix_has_nan_or_infinite_elements(y)};
58+
tests.push_back(!matrix_has_nan_or_inf_elements?true:false);
59+
60+
VectorXd inf(5);
61+
inf<<1.0, 0.2, std::numeric_limits<double>::infinity(), 0.0, 0.5;
62+
matrix_has_nan_or_inf_elements = check_if_matrix_has_nan_or_infinite_elements(inf);
63+
tests.push_back(matrix_has_nan_or_inf_elements?true:false);
64+
65+
VectorXd nan(5);
66+
nan<<1.0, 0.2, NAN_DOUBLE, 0.0, 0.5;
67+
matrix_has_nan_or_inf_elements = check_if_matrix_has_nan_or_infinite_elements(nan);
68+
tests.push_back(matrix_has_nan_or_inf_elements?true:false);
5469

5570
//Test summary
5671
std::cout<<"Test summary\n\n"<<"Passed "<<std::accumulate(tests.begin(),tests.end(),0)<<" out of "<<tests.size()<<" tests.";

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setuptools.setup(
1717
name='aplr',
18-
version='1.0.2',
18+
version='1.0.3',
1919
description='Automatic Piecewise Linear Regression',
2020
ext_modules=[sfc_module],
2121
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)