Skip to content

Commit f32facf

Browse files
removed an unnecessary constructor parameter and updated the presentation regarding max_terms
1 parent 6304c41 commit f32facf

File tree

5 files changed

+20
-22
lines changed

5 files changed

+20
-22
lines changed

cpp/APLRClassifier.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ using namespace Eigen;
1212
class APLRClassifier
1313
{
1414
private:
15-
size_t reserved_terms_times_num_x;
1615
std::map<std::string, VectorXd> response_values; // Key is category and value is response vector
1716

1817
void initialize();
@@ -55,7 +54,7 @@ class APLRClassifier
5554
std::vector<std::vector<size_t>> base_predictors_in_each_unique_term_affiliation;
5655

5756
APLRClassifier(size_t m = 20000, double v = 0.5, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), size_t n_jobs = 0,
58-
size_t cv_folds = 5, size_t reserved_terms_times_num_x = 100, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
57+
size_t cv_folds = 5, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
5958
size_t max_interactions = 100000, size_t min_observations_in_split = 4, size_t ineligible_boosting_steps_added = 15, size_t max_eligible_terms = 7,
6059
size_t boosting_steps_before_interactions_are_allowed = 0, bool monotonic_constraints_ignore_interactions = false,
6160
size_t early_stopping_rounds = 500, size_t num_first_steps_with_linear_effects_only = 0,
@@ -81,13 +80,13 @@ class APLRClassifier
8180
};
8281

8382
APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, size_t n_jobs, size_t cv_folds,
84-
size_t reserved_terms_times_num_x, size_t bins, size_t verbosity, size_t max_interaction_level, size_t max_interactions,
83+
size_t bins, size_t verbosity, size_t max_interaction_level, size_t max_interactions,
8584
size_t min_observations_in_split, size_t ineligible_boosting_steps_added, size_t max_eligible_terms,
8685
size_t boosting_steps_before_interactions_are_allowed, bool monotonic_constraints_ignore_interactions,
8786
size_t early_stopping_rounds, size_t num_first_steps_with_linear_effects_only,
8887
double penalty_for_non_linearity, double penalty_for_interactions, size_t max_terms)
8988
: m{m}, v{v}, random_state{random_state}, n_jobs{n_jobs}, cv_folds{cv_folds},
90-
reserved_terms_times_num_x{reserved_terms_times_num_x}, bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
89+
bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
9190
max_interactions{max_interactions}, min_observations_in_split{min_observations_in_split},
9291
ineligible_boosting_steps_added{ineligible_boosting_steps_added}, max_eligible_terms{max_eligible_terms},
9392
boosting_steps_before_interactions_are_allowed{boosting_steps_before_interactions_are_allowed},
@@ -99,7 +98,7 @@ APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, s
9998

10099
APLRClassifier::APLRClassifier(const APLRClassifier &other)
101100
: m{other.m}, v{other.v}, random_state{other.random_state}, n_jobs{other.n_jobs}, cv_folds{other.cv_folds},
102-
reserved_terms_times_num_x{other.reserved_terms_times_num_x}, bins{other.bins}, verbosity{other.verbosity},
101+
bins{other.bins}, verbosity{other.verbosity},
103102
max_interaction_level{other.max_interaction_level}, max_interactions{other.max_interactions},
104103
min_observations_in_split{other.min_observations_in_split}, ineligible_boosting_steps_added{other.ineligible_boosting_steps_added},
105104
max_eligible_terms{other.max_eligible_terms}, logit_models{other.logit_models}, categories{other.categories},
@@ -134,7 +133,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
134133
bool two_class_case{categories.size() == 2};
135134
if (two_class_case)
136135
{
137-
logit_models[categories[0]] = APLRRegressor(m, v, random_state, "binomial", "logit", n_jobs, cv_folds, reserved_terms_times_num_x,
136+
logit_models[categories[0]] = APLRRegressor(m, v, random_state, "binomial", "logit", n_jobs, cv_folds,
138137
bins, verbosity, max_interaction_level, max_interactions, min_observations_in_split, ineligible_boosting_steps_added,
139138
max_eligible_terms, 1.5, "default", 0.5);
140139
logit_models[categories[0]].boosting_steps_before_interactions_are_allowed = boosting_steps_before_interactions_are_allowed;
@@ -155,7 +154,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
155154
{
156155
for (auto &category : categories)
157156
{
158-
logit_models[category] = APLRRegressor(m, v, random_state, "binomial", "logit", n_jobs, cv_folds, reserved_terms_times_num_x,
157+
logit_models[category] = APLRRegressor(m, v, random_state, "binomial", "logit", n_jobs, cv_folds,
159158
bins, verbosity, max_interaction_level, max_interactions, min_observations_in_split, ineligible_boosting_steps_added,
160159
max_eligible_terms, 1.5, "default", 0.5);
161160
logit_models[category].boosting_steps_before_interactions_are_allowed = boosting_steps_before_interactions_are_allowed;
@@ -212,7 +211,7 @@ void APLRClassifier::create_response_for_each_category(const std::vector<std::st
212211

213212
void APLRClassifier::define_cv_observations(const std::vector<std::string> &y, const MatrixXi &cv_observations_)
214213
{
215-
APLRRegressor aplr_regressor{APLRRegressor(m, v, random_state, "binomial", "logit", n_jobs, cv_folds, reserved_terms_times_num_x,
214+
APLRRegressor aplr_regressor{APLRRegressor(m, v, random_state, "binomial", "logit", n_jobs, cv_folds,
216215
bins, verbosity, max_interaction_level, max_interactions, min_observations_in_split, ineligible_boosting_steps_added,
217216
max_eligible_terms, 1.5, "default", 0.5)};
218217
VectorXd y_dummy_vector{VectorXd(y.size())};

cpp/APLRRegressor.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ struct ModelForCVFold
3030
class APLRRegressor
3131
{
3232
private:
33-
size_t reserved_terms_times_num_x;
3433
MatrixXd X_train;
3534
VectorXd y_train;
3635
VectorXd sample_weight_train;
@@ -250,7 +249,7 @@ class APLRRegressor
250249

251250
APLRRegressor(size_t m = 20000, double v = 0.5, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), std::string loss_function = "mse",
252251
std::string link_function = "identity", size_t n_jobs = 0, size_t cv_folds = 5,
253-
size_t reserved_terms_times_num_x = 100, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1, size_t max_interactions = 100000,
252+
size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1, size_t max_interactions = 100000,
254253
size_t min_observations_in_split = 4, size_t ineligible_boosting_steps_added = 15, size_t max_eligible_terms = 7, double dispersion_parameter = 1.5,
255254
std::string validation_tuning_metric = "default", double quantile = 0.5,
256255
const std::function<double(VectorXd, VectorXd, VectorXd, VectorXi, MatrixXd)> &calculate_custom_validation_error_function = {},
@@ -302,7 +301,7 @@ class APLRRegressor
302301
};
303302

304303
APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std::string loss_function, std::string link_function, size_t n_jobs,
305-
size_t cv_folds, size_t reserved_terms_times_num_x, size_t bins, size_t verbosity, size_t max_interaction_level,
304+
size_t cv_folds, size_t bins, size_t verbosity, size_t max_interaction_level,
306305
size_t max_interactions, size_t min_observations_in_split, size_t ineligible_boosting_steps_added, size_t max_eligible_terms, double dispersion_parameter,
307306
std::string validation_tuning_metric, double quantile,
308307
const std::function<double(VectorXd, VectorXd, VectorXd, VectorXi, MatrixXd)> &calculate_custom_validation_error_function,
@@ -314,7 +313,7 @@ APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std
314313
size_t group_mse_by_prediction_bins, size_t group_mse_cycle_min_obs_in_bin, size_t early_stopping_rounds,
315314
size_t num_first_steps_with_linear_effects_only, double penalty_for_non_linearity, double penalty_for_interactions,
316315
size_t max_terms)
317-
: reserved_terms_times_num_x{reserved_terms_times_num_x}, intercept{NAN_DOUBLE}, m{m}, v{v},
316+
: intercept{NAN_DOUBLE}, m{m}, v{v},
318317
loss_function{loss_function}, link_function{link_function}, cv_folds{cv_folds}, n_jobs{n_jobs}, random_state{random_state},
319318
bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
320319
max_interactions{max_interactions}, interactions_eligible{0}, validation_error_steps{MatrixXd(0, 0)},
@@ -335,7 +334,7 @@ APLRRegressor::APLRRegressor(size_t m, double v, uint_fast32_t random_state, std
335334
}
336335

337336
APLRRegressor::APLRRegressor(const APLRRegressor &other)
338-
: reserved_terms_times_num_x{other.reserved_terms_times_num_x}, intercept{other.intercept}, terms{other.terms}, m{other.m}, v{other.v},
337+
: intercept{other.intercept}, terms{other.terms}, m{other.m}, v{other.v},
339338
loss_function{other.loss_function}, link_function{other.link_function}, cv_folds{other.cv_folds},
340339
n_jobs{other.n_jobs}, random_state{other.random_state}, bins{other.bins},
341340
verbosity{other.verbosity}, term_names{other.term_names}, term_affiliations{other.term_affiliations}, term_coefficients{other.term_coefficients},
@@ -914,9 +913,9 @@ void APLRRegressor::initialize(const std::vector<int> &monotonic_constraints)
914913
number_of_base_terms = static_cast<size_t>(X_train.cols());
915914

916915
terms.clear();
917-
terms.reserve(X_train.cols() * reserved_terms_times_num_x);
916+
terms.reserve(m);
918917

919-
terms_eligible_current.reserve(X_train.cols() * reserved_terms_times_num_x);
918+
terms_eligible_current.reserve(m);
920919
size_t X_train_cols{static_cast<size_t>(X_train.cols())};
921920
for (size_t i = 0; i < X_train_cols; ++i)
922921
{

cpp/pythonbinding.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ std::function<VectorXd(VectorXd)> empty_calculate_custom_differentiate_predictio
2020
PYBIND11_MODULE(aplr_cpp, m)
2121
{
2222
py::class_<APLRRegressor>(m, "APLRRegressor", py::module_local())
23-
.def(py::init<int &, double &, int &, std::string &, std::string &, int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, double &, std::string &,
23+
.def(py::init<int &, double &, int &, std::string &, std::string &, int &, int &, int &, int &, int &, int &, int &, int &, int &, double &, std::string &,
2424
double &, std::function<double(const VectorXd &y, const VectorXd &predictions, const VectorXd &sample_weight, const VectorXi &group, const MatrixXd &other_data)> &,
2525
std::function<double(const VectorXd &y, const VectorXd &predictions, const VectorXd &sample_weight, const VectorXi &group, const MatrixXd &other_data)> &,
2626
std::function<VectorXd(const VectorXd &y, const VectorXd &predictions, const VectorXi &group, const MatrixXd &other_data)> &,
2727
std::function<VectorXd(const VectorXd &linear_predictor)> &, std::function<VectorXd(const VectorXd &linear_predictor)> &,
2828
int &, bool &, int &, int &, int &, int &, double &, double &, int &>(),
2929
py::arg("m") = 20000, py::arg("v") = 0.5, py::arg("random_state") = 0, py::arg("loss_function") = "mse", py::arg("link_function") = "identity",
3030
py::arg("n_jobs") = 0, py::arg("cv_folds") = 5,
31-
py::arg("reserved_terms_times_num_x") = 100, py::arg("bins") = 300, py::arg("verbosity") = 0,
31+
py::arg("bins") = 300, py::arg("verbosity") = 0,
3232
py::arg("max_interaction_level") = 1, py::arg("max_interactions") = 100000, py::arg("min_observations_in_split") = 4,
3333
py::arg("ineligible_boosting_steps_added") = 15, py::arg("max_eligible_terms") = 7,
3434
py::arg("dispersion_parameter") = 1.5,
@@ -201,7 +201,7 @@ PYBIND11_MODULE(aplr_cpp, m)
201201
std::map<std::string, size_t> unique_term_affiliation_map = t[46].cast<std::map<std::string, size_t>>();
202202
std::vector<std::vector<size_t>> base_predictors_in_each_unique_term_affiliation = t[47].cast<std::vector<std::vector<size_t>>>();
203203

204-
APLRRegressor a(m, v, random_state, loss_function, link_function, n_jobs, cv_folds, 100, bins, verbosity, max_interaction_level,
204+
APLRRegressor a(m, v, random_state, loss_function, link_function, n_jobs, cv_folds, bins, verbosity, max_interaction_level,
205205
max_interactions, min_observations_in_split, ineligible_boosting_steps_added, max_eligible_terms, dispersion_parameter,
206206
validation_tuning_metric, quantile);
207207
a.intercept = intercept;
@@ -282,10 +282,10 @@ PYBIND11_MODULE(aplr_cpp, m)
282282
}));
283283

284284
py::class_<APLRClassifier>(m, "APLRClassifier", py::module_local())
285-
.def(py::init<int &, double &, int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, bool &, int &, int &,
285+
.def(py::init<int &, double &, int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, int &, bool &, int &, int &,
286286
double &, double &, int &>(),
287287
py::arg("m") = 20000, py::arg("v") = 0.5, py::arg("random_state") = 0, py::arg("n_jobs") = 0, py::arg("cv_folds") = 5,
288-
py::arg("reserved_terms_times_num_x") = 100, py::arg("bins") = 300, py::arg("verbosity") = 0,
288+
py::arg("bins") = 300, py::arg("verbosity") = 0,
289289
py::arg("max_interaction_level") = 1, py::arg("max_interactions") = 100000, py::arg("min_observations_in_split") = 4,
290290
py::arg("ineligible_boosting_steps_added") = 15, py::arg("max_eligible_terms") = 7,
291291
py::arg("boosting_steps_before_interactions_are_allowed") = 0, py::arg("monotonic_constraints_ignore_interactions") = false,
@@ -380,7 +380,7 @@ PYBIND11_MODULE(aplr_cpp, m)
380380
std::map<std::string, size_t> unique_term_affiliation_map = t[25].cast<std::map<std::string, size_t>>();
381381
std::vector<std::vector<size_t>> base_predictors_in_each_unique_term_affiliation = t[26].cast<std::vector<std::vector<size_t>>>();
382382

383-
APLRClassifier a(m, v, random_state, n_jobs, cv_folds, 100, bins, verbosity, max_interaction_level, max_interactions,
383+
APLRClassifier a(m, v, random_state, n_jobs, cv_folds, bins, verbosity, max_interaction_level, max_interactions,
384384
min_observations_in_split, ineligible_boosting_steps_added, max_eligible_terms);
385385
a.logit_models = logit_models;
386386
a.categories = categories;

documentation/APLR 10.6.0.pdf

4.97 KB
Binary file not shown.

python/benchmarks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def trial_filter(task):
2828
else:
2929
return []
3030

31-
exclude_set = set()
31+
exclude_set = set(["kddcup", "poker"])
3232
if task.name in exclude_set:
3333
return []
3434
else:

0 commit comments

Comments
 (0)