Skip to content

Commit fe62bb6

Browse files
990
1 parent 7382e77 commit fe62bb6

File tree

10 files changed

+275
-60
lines changed

10 files changed

+275
-60
lines changed

API_REFERENCE_FOR_CLASSIFICATION.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# APLRClassifier
22

3-
## class aplr.APLRClassifier(m:int=3000, v:float=0.1, random_state:int=0, n_jobs:int=0, cv_folds:int=5, bins:int=300, verbosity:int=0, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0, penalty_for_non_linearity: float = 0.0, penalty_for_interactions: float = 0.0)
3+
## class aplr.APLRClassifier(m:int=3000, v:float=0.1, random_state:int=0, n_jobs:int=0, cv_folds:int=5, bins:int=300, verbosity:int=0, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0, penalty_for_non_linearity: float = 0.0, penalty_for_interactions: float = 0.0, max_terms: int = 0)
44

55
### Constructor parameters
66

@@ -58,6 +58,9 @@ Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects
5858
#### penalty_for_interactions (default = 0.0)
5959
Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
6060

61+
#### max_terms (default = 0)
62+
Restricts the maximum number of terms in any of the underlying models trained to ***max_terms***. The default value of 0 means no limit. After the limit is reached, the remaining boosting steps are used to further update the coefficients of already included terms. A reason for using ***max_terms*** is to increase model interpretability by reducing the number of terms in the model. Please note that low non-zero values of ***max_terms*** may require a high ***v*** for best results, such as 1.0.
63+
6164

6265
## Method: fit(X:npt.ArrayLike, y:List[str], sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], interaction_constraints:List[List[int]]=[], predictor_learning_rates: List[float] = [], predictor_penalties_for_non_linearity: List[float] = [], predictor_penalties_for_interactions: List[float] = [])
6366

API_REFERENCE_FOR_REGRESSION.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# APLRRegressor
22

3-
## class aplr.APLRRegressor(m:int=3000, v:float=0.1, random_state:int=0, loss_function:str="mse", link_function:str="identity", n_jobs:int=0, cv_folds:int=5, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, dispersion_parameter:float=1.5, validation_tuning_metric:str="default", quantile:float=0.5, calculate_custom_validation_error_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_loss_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_negative_gradient_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, group_mse_by_prediction_bins: int = 10, group_mse_cycle_min_obs_in_bin: int = 30, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0, penalty_for_non_linearity: float = 0.0, penalty_for_interactions: float = 0.0)
3+
## class aplr.APLRRegressor(m:int=3000, v:float=0.1, random_state:int=0, loss_function:str="mse", link_function:str="identity", n_jobs:int=0, cv_folds:int=5, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, dispersion_parameter:float=1.5, validation_tuning_metric:str="default", quantile:float=0.5, calculate_custom_validation_error_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_loss_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], float]]=None, calculate_custom_negative_gradient_function:Optional[Callable[[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_transform_linear_predictor_to_predictions_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, calculate_custom_differentiate_predictions_wrt_linear_predictor_function:Optional[Callable[[npt.ArrayLike], npt.ArrayLike]]=None, boosting_steps_before_interactions_are_allowed: int = 0, monotonic_constraints_ignore_interactions: bool = False, group_mse_by_prediction_bins: int = 10, group_mse_cycle_min_obs_in_bin: int = 30, early_stopping_rounds: int = 500, num_first_steps_with_linear_effects_only: int = 0, penalty_for_non_linearity: float = 0.0, penalty_for_interactions: float = 0.0, max_terms: int = 0)
44

55
### Constructor parameters
66

@@ -126,6 +126,9 @@ Specifies a penalty in the range [0.0, 1.0] on terms that are not linear effects
126126
#### penalty_for_interactions (default = 0.0)
127127
Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value increases model interpretability but can hurt predictiveness. Values outside of the [0.0, 1.0] range are rounded to the nearest boundary within the range.
128128

129+
#### max_terms (default = 0)
130+
Restricts the maximum number of terms in any of the underlying models trained to ***max_terms***. The default value of 0 means no limit. After the limit is reached, the remaining boosting steps are used to further update the coefficients of already included terms. A reason for using ***max_terms*** is to increase model interpretability by reducing the number of terms in the model. Please note that low non-zero values of ***max_terms*** may require a high ***v*** for best results, such as 1.0.
131+
129132

130133
## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], cv_observations: npt.ArrayLike = np.empty([0, 0]), prioritized_predictors_indexes:List[int]=[], monotonic_constraints:List[int]=[], group:npt.ArrayLike = np.empty(0), interaction_constraints:List[List[int]]=[], other_data: npt.ArrayLike = np.empty([0, 0]), predictor_learning_rates: List[float] = [], predictor_penalties_for_non_linearity: List[float] = [], predictor_penalties_for_interactions: List[float] = [])
131134

aplr/aplr.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(
6868
num_first_steps_with_linear_effects_only: int = 0,
6969
penalty_for_non_linearity: float = 0.0,
7070
penalty_for_interactions: float = 0.0,
71+
max_terms: int = 0,
7172
):
7273
self.m = m
7374
self.v = v
@@ -113,6 +114,7 @@ def __init__(
113114
)
114115
self.penalty_for_non_linearity = penalty_for_non_linearity
115116
self.penalty_for_interactions = penalty_for_interactions
117+
self.max_terms = max_terms
116118

117119
# Creating aplr_cpp and setting parameters
118120
self.APLRRegressor = aplr_cpp.APLRRegressor()
@@ -172,6 +174,7 @@ def __set_params_cpp(self):
172174
)
173175
self.APLRRegressor.penalty_for_non_linearity = self.penalty_for_non_linearity
174176
self.APLRRegressor.penalty_for_interactions = self.penalty_for_interactions
177+
self.APLRRegressor.max_terms = self.max_terms
175178

176179
def fit(
177180
self,
@@ -308,6 +311,7 @@ def get_params(self, deep=True):
308311
"num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
309312
"penalty_for_non_linearity": self.penalty_for_non_linearity,
310313
"penalty_for_interactions": self.penalty_for_interactions,
314+
"max_terms": self.max_terms,
311315
}
312316

313317
# For sklearn
@@ -339,6 +343,7 @@ def __init__(
339343
num_first_steps_with_linear_effects_only: int = 0,
340344
penalty_for_non_linearity: float = 0.0,
341345
penalty_for_interactions: float = 0.0,
346+
max_terms: int = 0,
342347
):
343348
self.m = m
344349
self.v = v
@@ -364,6 +369,7 @@ def __init__(
364369
)
365370
self.penalty_for_non_linearity = penalty_for_non_linearity
366371
self.penalty_for_interactions = penalty_for_interactions
372+
self.max_terms = max_terms
367373

368374
# Creating aplr_cpp and setting parameters
369375
self.APLRClassifier = aplr_cpp.APLRClassifier()
@@ -397,6 +403,7 @@ def __set_params_cpp(self):
397403
)
398404
self.APLRClassifier.penalty_for_non_linearity = self.penalty_for_non_linearity
399405
self.APLRClassifier.penalty_for_interactions = self.penalty_for_interactions
406+
self.APLRClassifier.max_terms = self.max_terms
400407

401408
def fit(
402409
self,
@@ -478,6 +485,7 @@ def get_params(self, deep=True):
478485
"num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
479486
"penalty_for_non_linearity": self.penalty_for_non_linearity,
480487
"penalty_for_interactions": self.penalty_for_interactions,
488+
"max_terms": self.max_terms,
481489
}
482490

483491
# For sklearn

cpp/APLRClassifier.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,14 @@ class APLRClassifier
4848
size_t num_first_steps_with_linear_effects_only;
4949
double penalty_for_non_linearity;
5050
double penalty_for_interactions;
51+
size_t max_terms;
5152

5253
APLRClassifier(size_t m = 3000, double v = 0.1, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), size_t n_jobs = 0,
5354
size_t cv_folds = 5, size_t reserved_terms_times_num_x = 100, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
5455
size_t max_interactions = 100000, size_t min_observations_in_split = 20, size_t ineligible_boosting_steps_added = 10, size_t max_eligible_terms = 5,
5556
size_t boosting_steps_before_interactions_are_allowed = 0, bool monotonic_constraints_ignore_interactions = false,
5657
size_t early_stopping_rounds = 500, size_t num_first_steps_with_linear_effects_only = 0,
57-
double penalty_for_non_linearity = 0.0, double penalty_for_interactions = 0.0);
58+
double penalty_for_non_linearity = 0.0, double penalty_for_interactions = 0.0, size_t max_terms = 0);
5859
APLRClassifier(const APLRClassifier &other);
5960
~APLRClassifier();
6061
void fit(const MatrixXd &X, const std::vector<std::string> &y, const VectorXd &sample_weight = VectorXd(0),
@@ -78,15 +79,15 @@ APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, s
7879
size_t min_observations_in_split, size_t ineligible_boosting_steps_added, size_t max_eligible_terms,
7980
size_t boosting_steps_before_interactions_are_allowed, bool monotonic_constraints_ignore_interactions,
8081
size_t early_stopping_rounds, size_t num_first_steps_with_linear_effects_only,
81-
double penalty_for_non_linearity, double penalty_for_interactions)
82+
double penalty_for_non_linearity, double penalty_for_interactions, size_t max_terms)
8283
: m{m}, v{v}, random_state{random_state}, n_jobs{n_jobs}, cv_folds{cv_folds},
8384
reserved_terms_times_num_x{reserved_terms_times_num_x}, bins{bins}, verbosity{verbosity}, max_interaction_level{max_interaction_level},
8485
max_interactions{max_interactions}, min_observations_in_split{min_observations_in_split},
8586
ineligible_boosting_steps_added{ineligible_boosting_steps_added}, max_eligible_terms{max_eligible_terms},
8687
boosting_steps_before_interactions_are_allowed{boosting_steps_before_interactions_are_allowed},
8788
monotonic_constraints_ignore_interactions{monotonic_constraints_ignore_interactions}, early_stopping_rounds{early_stopping_rounds},
8889
num_first_steps_with_linear_effects_only{num_first_steps_with_linear_effects_only}, penalty_for_non_linearity{penalty_for_non_linearity},
89-
penalty_for_interactions{penalty_for_interactions}
90+
penalty_for_interactions{penalty_for_interactions}, max_terms{max_terms}
9091
{
9192
}
9293

@@ -102,7 +103,8 @@ APLRClassifier::APLRClassifier(const APLRClassifier &other)
102103
monotonic_constraints_ignore_interactions{other.monotonic_constraints_ignore_interactions},
103104
early_stopping_rounds{other.early_stopping_rounds},
104105
num_first_steps_with_linear_effects_only{other.num_first_steps_with_linear_effects_only},
105-
penalty_for_non_linearity{other.penalty_for_non_linearity}, penalty_for_interactions{other.penalty_for_interactions}
106+
penalty_for_non_linearity{other.penalty_for_non_linearity}, penalty_for_interactions{other.penalty_for_interactions},
107+
max_terms{other.max_terms}
106108
{
107109
}
108110

@@ -133,6 +135,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
133135
logit_models[categories[0]].num_first_steps_with_linear_effects_only = num_first_steps_with_linear_effects_only;
134136
logit_models[categories[0]].penalty_for_non_linearity = penalty_for_non_linearity;
135137
logit_models[categories[0]].penalty_for_interactions = penalty_for_interactions;
138+
logit_models[categories[0]].max_terms = max_terms;
136139
logit_models[categories[0]].fit(X, response_values[categories[0]], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
137140
monotonic_constraints, VectorXi(0), interaction_constraints, MatrixXd(0, 0), predictor_learning_rates,
138141
predictor_penalties_for_non_linearity, predictor_penalties_for_interactions);
@@ -153,6 +156,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
153156
logit_models[category].num_first_steps_with_linear_effects_only = num_first_steps_with_linear_effects_only;
154157
logit_models[category].penalty_for_non_linearity = penalty_for_non_linearity;
155158
logit_models[category].penalty_for_interactions = penalty_for_interactions;
159+
logit_models[category].max_terms = max_terms;
156160
logit_models[category].fit(X, response_values[category], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
157161
monotonic_constraints, VectorXi(0), interaction_constraints, MatrixXd(0, 0), predictor_learning_rates,
158162
predictor_penalties_for_non_linearity, predictor_penalties_for_interactions);

0 commit comments

Comments
 (0)