1212
1313using namespace Eigen ;
1414
15- // implements relative method - do not use for comparing with zero
16- // use this most of the time, tolerance needs to be meaningful in your context
1715template <typename TReal>
1816static bool is_approximately_equal (TReal a, TReal b, TReal tolerance = std::numeric_limits<TReal>::epsilon())
1917{
@@ -30,8 +28,6 @@ static bool is_approximately_equal(TReal a, TReal b, TReal tolerance = std::nume
3028 return false ;
3129}
3230
33- // supply tolerance that is meaningful in your context
34- // for example, default tolerance may not work if you are comparing double with float
3531template <typename TReal>
3632static bool is_approximately_zero (TReal a, TReal tolerance = std::numeric_limits<TReal>::epsilon())
3733{
@@ -80,10 +76,8 @@ VectorXd calculate_tweedie_errors(const VectorXd &y,const VectorXd &predicted,do
8076 return errors;
8177}
8278
83- // Computes errors (for each observation) based on error metric for a vector
8479VectorXd calculate_errors (const VectorXd &y,const VectorXd &predicted,const VectorXd &sample_weight=VectorXd(0 ),const std::string &family="gaussian",double tweedie_power=1.5)
8580{
86- // Error per observation before adjustment for sample weights
8781 VectorXd errors;
8882 if (family==" gaussian" )
8983 errors=calculate_gaussian_errors (y,predicted);
@@ -95,7 +89,7 @@ VectorXd calculate_errors(const VectorXd &y,const VectorXd &predicted,const Vect
9589 errors=calculate_gamma_errors (y,predicted);
9690 else if (family==" tweedie" )
9791 errors=calculate_tweedie_errors (y,predicted,tweedie_power);
98- // Adjusting for sample weights if specified
92+
9993 if (sample_weight.size ()>0 )
10094 errors=errors.array ()*sample_weight.array ();
10195
@@ -109,25 +103,20 @@ double calculate_gaussian_error_one_observation(double y,double predicted)
109103 return error;
110104}
111105
112- // Computes error for one observation based on error metric
113106double calculate_error_one_observation (double y,double predicted,double sample_weight=NAN_DOUBLE)
114107{
115- // Error per observation before adjustment for sample weights
116108 double error{calculate_gaussian_error_one_observation (y,predicted)};
117109
118- // Adjusting for sample weights if specified
119110 if (!std::isnan (sample_weight))
120111 error=error*sample_weight;
121112
122113 return error;
123114}
124115
125- // Computes overall error based on errors from calculate_errors(), returning one value
126116double calculate_mean_error (const VectorXd &errors,const VectorXd &sample_weight=VectorXd(0 ))
127117{
128118 double error{std::numeric_limits<double >::infinity ()};
129119
130- // Adjusting for sample weights if specified
131120 if (sample_weight.size ()>0 )
132121 error=errors.sum ()/sample_weight.sum ();
133122 else
@@ -138,7 +127,6 @@ double calculate_mean_error(const VectorXd &errors,const VectorXd &sample_weight
138127 return error;
139128}
140129
141- // Computes overall error based on errors from calculate_errors(), returning one value
142130double calculate_sum_error (const VectorXd &errors)
143131{
144132 double error{errors.sum ()};
@@ -192,22 +180,18 @@ VectorXd transform_linear_predictor_to_predictions(const VectorXd &linear_predic
192180 return VectorXd (0 );
193181}
194182
195- // sorts index based on v
196- VectorXi sort_indexes_ascending (const VectorXd &v)
183+ VectorXi sort_indexes_ascending (const VectorXd &sort_based_on_me)
197184{
198- // initialize original index locations
199- VectorXi idx (v.size ());
185+ VectorXi idx (sort_based_on_me.size ());
200186 std::iota (idx.begin (),idx.end (),0 );
201187
202- // sort indexes based on comparing values in v
203- std::sort (idx.begin (), idx.end (),[&v](size_t i1, size_t i2) {return v (i1) < v (i2);});
188+ std::sort (idx.begin (), idx.end (),[&sort_based_on_me](size_t i1, size_t i2) {return sort_based_on_me (i1) < sort_based_on_me (i2);});
204189
205190 return idx;
206191}
207192
208- // Loads a csv file into an Eigen matrix
209193template <typename M>
210- M load_csv (const std::string &path) {
194+ M load_csv_into_eigen_matrix (const std::string &path) {
211195 std::ifstream indata;
212196 indata.open (path);
213197 std::string line;
@@ -224,8 +208,7 @@ M load_csv (const std::string &path) {
224208 return Map<const Matrix<typename M::Scalar, M::RowsAtCompileTime, M::ColsAtCompileTime, RowMajor>>(values.data (), rows, values.size ()/rows);
225209}
226210
227- // Saves an Eigen matrix as a csv file
228- void save_data (std::string fileName, MatrixXd matrix)
211+ void save_as_csv_file (std::string fileName, MatrixXd matrix)
229212{
230213 // https://eigen.tuxfamily.org/dox/structEigen_1_1IOFormat.html
231214 const static IOFormat CSVFormat (FullPrecision, DontAlignCols, " , " , " \n " );
@@ -238,51 +221,12 @@ void save_data(std::string fileName, MatrixXd matrix)
238221 }
239222}
240223
241- // For multicore distribution of elements
242224struct DistributedIndices
243225{
244226 std::vector<size_t > index_lowest;
245227 std::vector<size_t > index_highest;
246228};
247229
248- // Distribution of elements to multiple cores
249- template <typename T> // type must implement a size() method
250- DistributedIndices distribute_to_indices (T &collection,size_t n_jobs)
251- {
252- size_t collection_size=static_cast <size_t >(collection.size ());
253-
254- // Initializing output
255- DistributedIndices output;
256- output.index_lowest .reserve (collection_size);
257- output.index_highest .reserve (collection_size);
258-
259- // Determining how many items to evaluate per core
260- size_t available_cores{static_cast <size_t >(std::thread::hardware_concurrency ())};
261- if (n_jobs>1 )
262- available_cores=std::min (n_jobs,available_cores);
263- size_t units_per_core{std::max (collection_size/available_cores,static_cast <size_t >(1 ))};
264-
265- // For each set of items going into one core
266- for (size_t i = 0 ; i < collection_size; i=i+units_per_core)
267- {
268- output.index_lowest .push_back (i);
269- }
270- for (size_t i = 0 ; i < output.index_lowest .size ()-1 ; ++i)
271- {
272- output.index_highest .push_back (output.index_lowest [i+1 ]-1 );
273- }
274- output.index_highest .push_back (collection_size-1 );
275- // Removing last bunch and adjusting the second last if necessary
276- if (output.index_lowest .size ()>available_cores)
277- {
278- output.index_lowest .pop_back ();
279- output.index_highest .pop_back ();
280- output.index_highest [output.index_highest .size ()-1 ]=collection_size-1 ;
281- }
282-
283- return output;
284- }
285-
286230template <typename T> // type must implement a size() method
287231size_t calculate_max_index_in_vector (T &vector)
288232{
@@ -371,4 +315,26 @@ VectorXd calculate_rolling_centered_mean(const VectorXd &vector, const VectorXi
371315 }
372316
373317 return rolling_centered_mean;
318+ }
319+
320+ VectorXi calculate_indicator (const VectorXd &v)
321+ {
322+ VectorXi indicator{VectorXi::Constant (v.rows (),1 )};
323+ for (size_t i = 0 ; i < static_cast <size_t >(v.size ()); ++i)
324+ {
325+ if (is_approximately_zero (v[i]))
326+ indicator[i]=0 ;
327+ }
328+ return indicator;
329+ }
330+
331+ VectorXi calculate_indicator (const VectorXi &v)
332+ {
333+ VectorXi indicator{VectorXi::Constant (v.rows (),1 )};
334+ for (size_t i = 0 ; i < static_cast <size_t >(v.size ()); ++i)
335+ {
336+ if (v[i]==0 )
337+ indicator[i]=0 ;
338+ }
339+ return indicator;
374340}
0 commit comments