Skip to content

Commit d68b438

Browse files
Merge pull request #7 from ottenbreit-data-science/bugfix
Bugfix
2 parents 47ab88a + 22bed15 commit d68b438

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

cpp/APLRRegressor.h

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ class APLRRegressor
4949
void validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes);
5050
void throw_error_if_validation_set_indexes_has_invalid_indexes(const VectorXd &y, const std::vector<size_t> &validation_set_indexes);
5151
void define_training_and_validation_sets(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight, const std::vector<size_t> &validation_set_indexes);
52-
void initialize(const MatrixXd &X);
52+
void initialize();
53+
bool check_if_base_term_has_only_one_unique_value(size_t base_term);
5354
void add_term_to_terms_eligible_current(Term &term);
5455
VectorXd calculate_neg_gradient_current(const VectorXd &y,const VectorXd &predictions_current);
5556
void execute_boosting_steps();
@@ -164,7 +165,7 @@ void APLRRegressor::fit(const MatrixXd &X,const VectorXd &y,const VectorXd &samp
164165
{
165166
validate_input_to_fit(X,y,sample_weight,X_names,validation_set_indexes);
166167
define_training_and_validation_sets(X,y,sample_weight,validation_set_indexes);
167-
initialize(X);
168+
initialize();
168169
execute_boosting_steps();
169170
update_coefficients_for_all_steps();
170171
print_final_summary();
@@ -177,7 +178,7 @@ void APLRRegressor::fit(const MatrixXd &X,const VectorXd &y,const VectorXd &samp
177178
void APLRRegressor::validate_input_to_fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight,const std::vector<std::string> &X_names, const std::vector<size_t> &validation_set_indexes)
178179
{
179180
if(X.rows()!=y.size()) throw std::runtime_error("X and y must have the same number of rows.");
180-
if(X.rows()==0) throw std::runtime_error("X and y cannot have zero rows.");
181+
if(X.rows()<2) throw std::runtime_error("X and y cannot have less than two rows.");
181182
if(sample_weight.size()>0 && sample_weight.size()!=y.size()) throw std::runtime_error("sample_weight must have 0 or as many rows as X and y.");
182183
if(X_names.size()>0 && X_names.size()!=static_cast<size_t>(X.cols())) throw std::runtime_error("X_names must have as many columns as X.");
183184
throw_error_if_matrix_has_nan_or_infinite_elements(X, "X");
@@ -270,23 +271,28 @@ void APLRRegressor::define_training_and_validation_sets(const MatrixXd &X,const
270271
}
271272
}
272273

273-
void APLRRegressor::initialize(const MatrixXd &X)
274+
void APLRRegressor::initialize()
274275
{
275-
number_of_base_terms=static_cast<size_t>(X.cols());
276+
number_of_base_terms=static_cast<size_t>(X_train.cols());
276277

277-
terms.reserve(X.cols()*reserved_terms_times_num_x);
278+
terms.reserve(X_train.cols()*reserved_terms_times_num_x);
278279
terms.clear();
279280

280281
intercept=0;
281282
intercept_steps=VectorXd::Constant(m,0);
282283

283284
null_predictions=VectorXd::Constant(y_train.size(),0);
284285

285-
terms_eligible_current.reserve(X.cols()*reserved_terms_times_num_x);
286-
for (size_t i = 0; i < static_cast<size_t>(X.cols()); ++i) //add each base term
286+
terms_eligible_current.reserve(X_train.cols()*reserved_terms_times_num_x);
287+
for (size_t i = 0; i < static_cast<size_t>(X_train.cols()); ++i)
287288
{
289+
bool term_has_one_unique_value{check_if_base_term_has_only_one_unique_value(i)};
288290
Term copy_of_base_term{Term(i)};
289291
add_term_to_terms_eligible_current(copy_of_base_term);
292+
if(term_has_one_unique_value)
293+
{
294+
terms_eligible_current[terms_eligible_current.size()-1].ineligible_boosting_steps=std::numeric_limits<size_t>::max();
295+
}
290296
}
291297

292298
predictions_current=VectorXd::Constant(y_train.size(),0);
@@ -299,6 +305,25 @@ void APLRRegressor::initialize(const MatrixXd &X)
299305
neg_gradient_nullmodel_errors_sum=neg_gradient_nullmodel_errors.sum();
300306
}
301307

308+
bool APLRRegressor::check_if_base_term_has_only_one_unique_value(size_t base_term)
309+
{
310+
size_t rows{static_cast<size_t>(X_train.rows())};
311+
if(rows==1) return true;
312+
313+
bool term_has_one_unique_value{true};
314+
for (size_t i = 1; i < rows; ++i)
315+
{
316+
bool observation_is_equal_to_previous{check_if_approximately_equal(X_train.col(base_term)[i], X_train.col(base_term)[i-1])};
317+
if(!observation_is_equal_to_previous)
318+
{
319+
term_has_one_unique_value=false;
320+
break;
321+
}
322+
}
323+
324+
return term_has_one_unique_value;
325+
}
326+
302327
void APLRRegressor::add_term_to_terms_eligible_current(Term &term)
303328
{
304329
terms_eligible_current.push_back(term);
@@ -549,7 +574,7 @@ void APLRRegressor::consider_updating_intercept()
549574
void APLRRegressor::select_the_best_term_and_update_errors(size_t boosting_step)
550575
{
551576
//If intercept does best
552-
if(std::isless(error_after_updating_intercept,lowest_error_sum))
577+
if(std::islessequal(error_after_updating_intercept,lowest_error_sum))
553578
{
554579
//Updating intercept, current predictions, gradient and errors
555580
lowest_error_sum=error_after_updating_intercept;

cpp/term.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ void Term::estimate_split_point(const MatrixXd &X,const VectorXd &y,const Vector
164164
{
165165
coefficient=0;
166166
split_point_search_errors_sum=std::numeric_limits<double>::infinity();
167+
ineligible_boosting_steps=std::numeric_limits<size_t>::max();
167168
return;
168169
}
169170

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setuptools.setup(
1717
name='aplr',
18-
version='1.0.8',
18+
version='1.0.9',
1919
description='Automatic Piecewise Linear Regression',
2020
ext_modules=[sfc_module],
2121
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)