Skip to content

Commit 7244a98

Browse files
Merge pull request #3 from ottenbreit-data-science/bugfix
fixed bug in bin splitting
2 parents 19c72d8 + c305a3a commit 7244a98

File tree

3 files changed

+12
-20
lines changed

3 files changed

+12
-20
lines changed

cpp/main.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ int main()
1818
model.n_jobs=0;
1919
model.loss_function_mse=true;
2020
model.verbosity=3;
21-
model.min_observations_in_split=30;
21+
model.min_observations_in_split=10;
2222
//model.max_interaction_level=0;
2323
model.max_interaction_level=100;
2424
model.max_interactions=30;
@@ -46,10 +46,10 @@ int main()
4646
//Saving results
4747
save_data("output.csv",predictions);
4848
std::cout<<"min validation_error "<<model.validation_error_steps.minCoeff()<<"\n\n";
49-
std::cout<<check_if_approximately_equal(model.validation_error_steps.minCoeff(),6.39607,0.00001)<<"\n";
49+
std::cout<<check_if_approximately_equal(model.validation_error_steps.minCoeff(),6.01566,0.00001)<<"\n";
5050

5151
std::cout<<"mean prediction "<<predictions.mean()<<"\n\n";
52-
std::cout<<check_if_approximately_equal(predictions.mean(),23.7461,0.0001)<<"\n";
52+
std::cout<<check_if_approximately_equal(predictions.mean(),23.8349,0.0001)<<"\n";
5353

5454
std::cout<<"best_m: "<<model.m<<"\n";
5555

cpp/term.h

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -361,30 +361,22 @@ void Term::setup_bins()
361361
auto ip{std::unique(values_sorted_unique.begin(),values_sorted_unique.end())};
362362
values_sorted_unique.resize(std::distance(values_sorted_unique.begin(),ip));
363363

364-
bins_start_index.reserve(bins+1);
365-
bins_end_index.reserve(bins+1);
366-
//Allocations
367-
bool eligible_spacing{false};
368-
bool eligible_unique_numbers{false};
364+
bins_start_index.reserve(bins+1);
365+
bins_end_index.reserve(bins+1);
369366
//Start_index
370367
bins_start_index.push_back(0);
371368
if(bins>1)
372369
{
373-
for (size_t i = min_observations_in_split-1; i <= max_index; ++i) //for each observation in an allowable range
370+
for (size_t i = min_observations_in_split-1; i <= max_index+1-min_observations_in_split; ++i) //for each observation in an allowable range
374371
{
375-
//General eligibility
376-
if(i>=min_observations_in_split-1 && (i%observations_in_bin==0 || values_sorted_unique.size()<=bins) && i<=max_index+1-min_observations_in_split) eligible_spacing=true;
372+
size_t last_bin_start_index{bins_start_index[bins_start_index.size()-1]};
373+
bool eligible_on_spacing_between_observations{i >= last_bin_start_index + observations_in_bin || values_sorted_unique.size()<=bins};
374+
bool eligible_on_unique_numbers{i>0 && !check_if_approximately_equal(sorted_vectors.values_sorted[i],sorted_vectors.values_sorted[i-1])};
377375

378-
//Eligibility when considering unique numbers
379-
if(i>0 && !check_if_approximately_equal(sorted_vectors.values_sorted[i],sorted_vectors.values_sorted[i-1])) eligible_unique_numbers=true;
380-
else eligible_unique_numbers=false;
381-
382-
//Creating bin if possible and resetting eligibility for next iteration
383-
if(eligible_spacing && eligible_unique_numbers)
376+
bool create_bin{eligible_on_spacing_between_observations && eligible_on_unique_numbers};
377+
if(create_bin)
384378
{
385379
bins_start_index.push_back(i);
386-
eligible_spacing=false;
387-
eligible_unique_numbers=false;
388380
}
389381
}
390382
}

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setuptools.setup(
1717
name='aplr',
18-
version='1.0.3',
18+
version='1.0.4',
1919
description='Automatic Piecewise Linear Regression',
2020
ext_modules=[sfc_module],
2121
author="Mathias von Ottenbreit",

0 commit comments

Comments
 (0)