Skip to content

Commit 1a06c65

Browse files
bugfix
1 parent 7870300 commit 1a06c65

12 files changed

+336
-737
lines changed

cpp/APLRRegressor.h

Lines changed: 153 additions & 95 deletions
Large diffs are not rendered by default.

cpp/functions.h

Lines changed: 28 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212

1313
using namespace Eigen;
1414

15-
//implements relative method - do not use for comparing with zero
16-
//use this most of the time, tolerance needs to be meaningful in your context
1715
template<typename TReal>
1816
static bool is_approximately_equal(TReal a, TReal b, TReal tolerance = std::numeric_limits<TReal>::epsilon())
1917
{
@@ -30,8 +28,6 @@ static bool is_approximately_equal(TReal a, TReal b, TReal tolerance = std::nume
3028
return false;
3129
}
3230

33-
//supply tolerance that is meaningful in your context
34-
//for example, default tolerance may not work if you are comparing double with float
3531
template<typename TReal>
3632
static bool is_approximately_zero(TReal a, TReal tolerance = std::numeric_limits<TReal>::epsilon())
3733
{
@@ -80,10 +76,8 @@ VectorXd calculate_tweedie_errors(const VectorXd &y,const VectorXd &predicted,do
8076
return errors;
8177
}
8278

83-
//Computes errors (for each observation) based on error metric for a vector
8479
VectorXd calculate_errors(const VectorXd &y,const VectorXd &predicted,const VectorXd &sample_weight=VectorXd(0),const std::string &family="gaussian",double tweedie_power=1.5)
8580
{
86-
//Error per observation before adjustment for sample weights
8781
VectorXd errors;
8882
if(family=="gaussian")
8983
errors=calculate_gaussian_errors(y,predicted);
@@ -95,7 +89,7 @@ VectorXd calculate_errors(const VectorXd &y,const VectorXd &predicted,const Vect
9589
errors=calculate_gamma_errors(y,predicted);
9690
else if(family=="tweedie")
9791
errors=calculate_tweedie_errors(y,predicted,tweedie_power);
98-
//Adjusting for sample weights if specified
92+
9993
if(sample_weight.size()>0)
10094
errors=errors.array()*sample_weight.array();
10195

@@ -109,25 +103,20 @@ double calculate_gaussian_error_one_observation(double y,double predicted)
109103
return error;
110104
}
111105

112-
//Computes error for one observation based on error metric
113106
double calculate_error_one_observation(double y,double predicted,double sample_weight=NAN_DOUBLE)
114107
{
115-
//Error per observation before adjustment for sample weights
116108
double error{calculate_gaussian_error_one_observation(y,predicted)};
117109

118-
//Adjusting for sample weights if specified
119110
if(!std::isnan(sample_weight))
120111
error=error*sample_weight;
121112

122113
return error;
123114
}
124115

125-
//Computes overall error based on errors from calculate_errors(), returning one value
126116
double calculate_mean_error(const VectorXd &errors,const VectorXd &sample_weight=VectorXd(0))
127117
{
128118
double error{std::numeric_limits<double>::infinity()};
129119

130-
//Adjusting for sample weights if specified
131120
if(sample_weight.size()>0)
132121
error=errors.sum()/sample_weight.sum();
133122
else
@@ -138,7 +127,6 @@ double calculate_mean_error(const VectorXd &errors,const VectorXd &sample_weight
138127
return error;
139128
}
140129

141-
//Computes overall error based on errors from calculate_errors(), returning one value
142130
double calculate_sum_error(const VectorXd &errors)
143131
{
144132
double error{errors.sum()};
@@ -192,22 +180,18 @@ VectorXd transform_linear_predictor_to_predictions(const VectorXd &linear_predic
192180
return VectorXd(0);
193181
}
194182

195-
//sorts index based on v
196-
VectorXi sort_indexes_ascending(const VectorXd &v)
183+
VectorXi sort_indexes_ascending(const VectorXd &sort_based_on_me)
197184
{
198-
// initialize original index locations
199-
VectorXi idx(v.size());
185+
VectorXi idx(sort_based_on_me.size());
200186
std::iota(idx.begin(),idx.end(),0);
201187

202-
// sort indexes based on comparing values in v
203-
std::sort(idx.begin(), idx.end(),[&v](size_t i1, size_t i2) {return v(i1) < v(i2);});
188+
std::sort(idx.begin(), idx.end(),[&sort_based_on_me](size_t i1, size_t i2) {return sort_based_on_me(i1) < sort_based_on_me(i2);});
204189

205190
return idx;
206191
}
207192

208-
//Loads a csv file into an Eigen matrix
209193
template<typename M>
210-
M load_csv (const std::string &path) {
194+
M load_csv_into_eigen_matrix (const std::string &path) {
211195
std::ifstream indata;
212196
indata.open(path);
213197
std::string line;
@@ -224,8 +208,7 @@ M load_csv (const std::string &path) {
224208
return Map<const Matrix<typename M::Scalar, M::RowsAtCompileTime, M::ColsAtCompileTime, RowMajor>>(values.data(), rows, values.size()/rows);
225209
}
226210

227-
//Saves an Eigen matrix as a csv file
228-
void save_data(std::string fileName, MatrixXd matrix)
211+
void save_as_csv_file(std::string fileName, MatrixXd matrix)
229212
{
230213
//https://eigen.tuxfamily.org/dox/structEigen_1_1IOFormat.html
231214
const static IOFormat CSVFormat(FullPrecision, DontAlignCols, ", ", "\n");
@@ -238,51 +221,12 @@ void save_data(std::string fileName, MatrixXd matrix)
238221
}
239222
}
240223

241-
//For multicore distribution of elements
242224
struct DistributedIndices
243225
{
244226
std::vector<size_t> index_lowest;
245227
std::vector<size_t> index_highest;
246228
};
247229

248-
//Distribution of elements to multiple cores
249-
template <typename T> //type must implement a size() method
250-
DistributedIndices distribute_to_indices(T &collection,size_t n_jobs)
251-
{
252-
size_t collection_size=static_cast<size_t>(collection.size());
253-
254-
//Initializing output
255-
DistributedIndices output;
256-
output.index_lowest.reserve(collection_size);
257-
output.index_highest.reserve(collection_size);
258-
259-
//Determining how many items to evaluate per core
260-
size_t available_cores{static_cast<size_t>(std::thread::hardware_concurrency())};
261-
if(n_jobs>1)
262-
available_cores=std::min(n_jobs,available_cores);
263-
size_t units_per_core{std::max(collection_size/available_cores,static_cast<size_t>(1))};
264-
265-
//For each set of items going into one core
266-
for (size_t i = 0; i < collection_size; i=i+units_per_core)
267-
{
268-
output.index_lowest.push_back(i);
269-
}
270-
for (size_t i = 0; i < output.index_lowest.size()-1; ++i)
271-
{
272-
output.index_highest.push_back(output.index_lowest[i+1]-1);
273-
}
274-
output.index_highest.push_back(collection_size-1);
275-
//Removing last bunch and adjusting the second last if necessary
276-
if(output.index_lowest.size()>available_cores)
277-
{
278-
output.index_lowest.pop_back();
279-
output.index_highest.pop_back();
280-
output.index_highest[output.index_highest.size()-1]=collection_size-1;
281-
}
282-
283-
return output;
284-
}
285-
286230
template <typename T> //type must implement a size() method
287231
size_t calculate_max_index_in_vector(T &vector)
288232
{
@@ -371,4 +315,26 @@ VectorXd calculate_rolling_centered_mean(const VectorXd &vector, const VectorXi
371315
}
372316

373317
return rolling_centered_mean;
318+
}
319+
320+
VectorXi calculate_indicator(const VectorXd &v)
321+
{
322+
VectorXi indicator{VectorXi::Constant(v.rows(),1)};
323+
for (size_t i = 0; i < static_cast<size_t>(v.size()); ++i)
324+
{
325+
if(is_approximately_zero(v[i]))
326+
indicator[i]=0;
327+
}
328+
return indicator;
329+
}
330+
331+
VectorXi calculate_indicator(const VectorXi &v)
332+
{
333+
VectorXi indicator{VectorXi::Constant(v.rows(),1)};
334+
for (size_t i = 0; i < static_cast<size_t>(v.size()); ++i)
335+
{
336+
if(v[i]==0)
337+
indicator[i]=0;
338+
}
339+
return indicator;
374340
}

cpp/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ int main()
2626
model.max_eligible_terms=5;
2727

2828
//Data
29-
MatrixXd X_train{load_csv<MatrixXd>("X_train.csv")};
30-
MatrixXd X_test{load_csv<MatrixXd>("X_test.csv")};
31-
VectorXd y_train{load_csv<MatrixXd>("y_train.csv")};
32-
VectorXd y_test{load_csv<MatrixXd>("y_test.csv")};
29+
MatrixXd X_train{load_csv_into_eigen_matrix<MatrixXd>("X_train.csv")};
30+
MatrixXd X_test{load_csv_into_eigen_matrix<MatrixXd>("X_test.csv")};
31+
VectorXd y_train{load_csv_into_eigen_matrix<MatrixXd>("y_train.csv")};
32+
VectorXd y_test{load_csv_into_eigen_matrix<MatrixXd>("y_test.csv")};
3333

3434
VectorXd sample_weight{VectorXd::Constant(y_train.size(),1.0)};
3535
//VectorXd sample_weight{VectorXd::Random(y_train.size()).cwiseAbs()};
@@ -44,7 +44,7 @@ int main()
4444
VectorXd predictions{model.predict(X_test)};
4545

4646
//Saving results
47-
save_data("output.csv",predictions);
47+
save_as_csv_file("output.csv",predictions);
4848
std::cout<<"min validation_error "<<model.validation_error_steps.minCoeff()<<"\n\n";
4949
std::cout<<is_approximately_equal(model.validation_error_steps.minCoeff(),6.32895,0.00001)<<"\n";
5050

0 commit comments

Comments
 (0)