Skip to content

Commit 3a1361d

Browse files
reverted to 1.3.2 and added comments to api reference
1 parent a18aad3 commit 3a1361d

File tree

7 files changed

+31
-115
lines changed

7 files changed

+31
-115
lines changed

API_REFERENCE.md

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# APLRRegressor
22

3-
## class aplr.APLRRegressor(m:int=1000, v:float=0.1, random_state:int=0, family:str="gaussian", link_function:str="identity", n_jobs:int=0, validation_ratio:float=0.2, intercept:float=np.nan, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, tweedie_power:float=1.5, cap_outliers_in_validation_set:bool=True, cap_outliers_when_using_the_model:bool=True)
3+
## class aplr.APLRRegressor(m:int=1000, v:float=0.1, random_state:int=0, family:str="gaussian", link_function:str="identity", n_jobs:int=0, validation_ratio:float=0.2, intercept:float=np.nan, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, tweedie_power:float=1.5)
44

55
### Constructor parameters
66

@@ -52,12 +52,6 @@ Limits 1) the number of terms already in the model that can be considered as int
5252
#### tweedie_power (default = 1.5)
5353
Species the variance power for the "tweedie" ***family*** and ***link_function***.
5454

55-
#### cap_outliers_in_validation_set (default = True)
56-
If ***True*** then term values will be limited by the minimum and maximum values found during model training when calculating validation error. If you need the model to extrapolate then it may be more appropriate to set ***cap_outliers_in_validation_set*** to ***False***. In the latter case the model may become more vulnerable to outliers.
57-
58-
#### cap_outliers_when_using_the_model (default = True)
59-
If ***True*** then term values will be limited by the minimum and maximum values found during model training when using the model. This can make the model less vulnerable to outliers and is recommended unless you need the model to extrapolate. If you need the model to extrapolate then set ***cap_outliers_when_using_the_model*** to ***False***.
60-
6155

6256
## Method: fit(X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], validation_set_indexes:List[int]=[])
6357

@@ -173,14 +167,4 @@ The index of the term selected. So ***0*** is the first term, ***1*** is the sec
173167

174168
## Method: get_m()
175169

176-
***Returns the number of boosting steps in the model (the value that minimized validation error).***
177-
178-
179-
## Method: set_cap_outliers_when_using_the_model(cap_outliers_when_using_the_model:bool)
180-
181-
***Sets cap_outliers_when_using_the_model.***
182-
183-
### Parameters
184-
185-
#### cap_outliers_when_using_the_model
186-
True or False.
170+
***Returns the number of boosting steps in the model (the value that minimized validation error).***

aplr/aplr.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
class APLRRegressor():
8-
def __init__(self, m:int=1000, v:float=0.1, random_state:int=0, family:str="gaussian", link_function:str="identity", n_jobs:int=0, validation_ratio:float=0.2, intercept:float=np.nan, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, tweedie_power:float=1.5, cap_outliers_in_validation_set:bool=True, cap_outliers_when_using_the_model:bool=True):
8+
def __init__(self, m:int=1000, v:float=0.1, random_state:int=0, family:str="gaussian", link_function:str="identity", n_jobs:int=0, validation_ratio:float=0.2, intercept:float=np.nan, bins:int=300, max_interaction_level:int=1, max_interactions:int=100000, min_observations_in_split:int=20, ineligible_boosting_steps_added:int=10, max_eligible_terms:int=5, verbosity:int=0, tweedie_power:float=1.5):
99
self.m=m
1010
self.v=v
1111
self.random_state=random_state
@@ -22,8 +22,6 @@ def __init__(self, m:int=1000, v:float=0.1, random_state:int=0, family:str="gaus
2222
self.max_eligible_terms=max_eligible_terms
2323
self.verbosity=verbosity
2424
self.tweedie_power=tweedie_power
25-
self.cap_outliers_in_validation_set=cap_outliers_in_validation_set
26-
self.cap_outliers_when_using_the_model=cap_outliers_when_using_the_model
2725

2826
#Creating aplr_cpp and setting parameters
2927
self.APLRRegressor=aplr_cpp.APLRRegressor()
@@ -47,8 +45,6 @@ def __set_params_cpp(self):
4745
self.APLRRegressor.max_eligible_terms=self.max_eligible_terms
4846
self.APLRRegressor.verbosity=self.verbosity
4947
self.APLRRegressor.tweedie_power=self.tweedie_power
50-
self.APLRRegressor.cap_outliers_in_validation_set=self.cap_outliers_in_validation_set
51-
self.APLRRegressor.cap_outliers_when_using_the_model=self.cap_outliers_when_using_the_model
5248

5349
def fit(self, X:npt.ArrayLike, y:npt.ArrayLike, sample_weight:npt.ArrayLike = np.empty(0), X_names:List[str]=[], validation_set_indexes:List[int]=[]):
5450
self.__set_params_cpp()
@@ -93,13 +89,9 @@ def get_intercept_steps(self)->npt.ArrayLike:
9389
def get_m(self)->int:
9490
return self.APLRRegressor.get_m()
9591

96-
def set_cap_outliers_when_using_the_model(self, cap_outliers_when_using_the_model:bool):
97-
self.APLRRegressor.set_cap_outliers_when_using_the_model(cap_outliers_when_using_the_model)
98-
self.cap_outliers_when_using_the_model=self.APLRRegressor.cap_outliers_when_using_the_model
99-
10092
#For sklearn
10193
def get_params(self, deep=True):
102-
return {"m": self.m, "v": self.v,"random_state":self.random_state,"family":self.family,"link_function":self.link_function,"n_jobs":self.n_jobs,"validation_ratio":self.validation_ratio,"intercept":self.intercept,"bins":self.bins,"max_interaction_level":self.max_interaction_level,"max_interactions":self.max_interactions,"verbosity":self.verbosity,"min_observations_in_split":self.min_observations_in_split,"ineligible_boosting_steps_added":self.ineligible_boosting_steps_added,"max_eligible_terms":self.max_eligible_terms,"tweedie_power":self.tweedie_power,"cap_outliers_in_validation_set":self.cap_outliers_in_validation_set,"cap_outliers_when_using_the_model":self.cap_outliers_when_using_the_model}
94+
return {"m": self.m, "v": self.v,"random_state":self.random_state,"family":self.family,"link_function":self.link_function,"n_jobs":self.n_jobs,"validation_ratio":self.validation_ratio,"intercept":self.intercept,"bins":self.bins,"max_interaction_level":self.max_interaction_level,"max_interactions":self.max_interactions,"verbosity":self.verbosity,"min_observations_in_split":self.min_observations_in_split,"ineligible_boosting_steps_added":self.ineligible_boosting_steps_added,"max_eligible_terms":self.max_eligible_terms,"tweedie_power":self.tweedie_power}
10395

10496
#For sklearn
10597
def set_params(self, **parameters):

cpp/APLRRegressor.h

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -118,22 +118,18 @@ class APLRRegressor
118118
size_t number_of_base_terms;
119119
VectorXd feature_importance; //Populated in fit() using validation set. Rows are in the same order as in X.
120120
double tweedie_power;
121-
bool cap_outliers_in_validation_set;
122-
bool cap_outliers_when_using_the_model;
123121

124122
//Methods
125123
APLRRegressor(size_t m=1000,double v=0.1,uint_fast32_t random_state=std::numeric_limits<uint_fast32_t>::lowest(),std::string family="gaussian",
126124
std::string link_function="identity", size_t n_jobs=0, double validation_ratio=0.2,double intercept=NAN_DOUBLE,
127125
size_t reserved_terms_times_num_x=100, size_t bins=300,size_t verbosity=0,size_t max_interaction_level=1,size_t max_interactions=100000,
128-
size_t min_observations_in_split=20, size_t ineligible_boosting_steps_added=10, size_t max_eligible_terms=5,double tweedie_power=1.5,
129-
bool cap_outliers_in_validation_set=true, bool cap_outliers_when_using_the_model=true);
126+
size_t min_observations_in_split=20, size_t ineligible_boosting_steps_added=10, size_t max_eligible_terms=5,double tweedie_power=1.5);
130127
APLRRegressor(const APLRRegressor &other);
131128
~APLRRegressor();
132129
void fit(const MatrixXd &X,const VectorXd &y,const VectorXd &sample_weight=VectorXd(0),const std::vector<std::string> &X_names={},const std::vector<size_t> &validation_set_indexes={});
133130
VectorXd predict(const MatrixXd &X);
134131
void set_term_names(const std::vector<std::string> &X_names);
135132
MatrixXd calculate_local_feature_importance(const MatrixXd &X);
136-
MatrixXd calculate_local_feature_importance_base_function(const MatrixXd &X, bool cap_outliers);
137133
MatrixXd calculate_local_feature_importance_for_terms(const MatrixXd &X);
138134
MatrixXd calculate_terms(const MatrixXd &X);
139135
std::vector<std::string> get_term_names();
@@ -144,21 +140,18 @@ class APLRRegressor
144140
double get_intercept();
145141
VectorXd get_intercept_steps();
146142
size_t get_m();
147-
void set_cap_outliers_when_using_the_model(bool cap_outliers_when_using_the_model);
148143
};
149144

150145
//Regular constructor
151146
APLRRegressor::APLRRegressor(size_t m,double v,uint_fast32_t random_state,std::string family,std::string link_function,size_t n_jobs,
152147
double validation_ratio,double intercept,size_t reserved_terms_times_num_x,size_t bins,size_t verbosity,size_t max_interaction_level,
153-
size_t max_interactions,size_t min_observations_in_split,size_t ineligible_boosting_steps_added,size_t max_eligible_terms,double tweedie_power,
154-
bool cap_outliers_in_validation_set, bool cap_outliers_when_using_the_model):
148+
size_t max_interactions,size_t min_observations_in_split,size_t ineligible_boosting_steps_added,size_t max_eligible_terms,double tweedie_power):
155149
reserved_terms_times_num_x{reserved_terms_times_num_x},intercept{intercept},m{m},v{v},
156150
family{family},link_function{link_function},validation_ratio{validation_ratio},n_jobs{n_jobs},random_state{random_state},
157151
bins{bins},verbosity{verbosity},max_interaction_level{max_interaction_level},
158152
intercept_steps{VectorXd(0)},max_interactions{max_interactions},interactions_eligible{0},validation_error_steps{VectorXd(0)},
159153
min_observations_in_split{min_observations_in_split},ineligible_boosting_steps_added{ineligible_boosting_steps_added},
160-
max_eligible_terms{max_eligible_terms},number_of_base_terms{0},tweedie_power{tweedie_power},
161-
cap_outliers_in_validation_set{cap_outliers_in_validation_set},cap_outliers_when_using_the_model{cap_outliers_when_using_the_model}
154+
max_eligible_terms{max_eligible_terms},number_of_base_terms{0},tweedie_power{tweedie_power}
162155
{
163156
}
164157

@@ -172,8 +165,7 @@ APLRRegressor::APLRRegressor(const APLRRegressor &other):
172165
max_interactions{other.max_interactions},interactions_eligible{other.interactions_eligible},validation_error_steps{other.validation_error_steps},
173166
min_observations_in_split{other.min_observations_in_split},ineligible_boosting_steps_added{other.ineligible_boosting_steps_added},
174167
max_eligible_terms{other.max_eligible_terms},number_of_base_terms{other.number_of_base_terms},
175-
feature_importance{other.feature_importance},tweedie_power{other.tweedie_power},
176-
cap_outliers_in_validation_set{other.cap_outliers_in_validation_set},cap_outliers_when_using_the_model{other.cap_outliers_when_using_the_model}
168+
feature_importance{other.feature_importance},tweedie_power{other.tweedie_power}
177169
{
178170
}
179171

@@ -749,7 +741,7 @@ void APLRRegressor::select_the_best_term_and_update_errors(size_t boosting_step)
749741

750742
//Updating current predictions
751743
VectorXd values{terms_eligible_current[best_term].calculate(X_train)};
752-
VectorXd values_validation{terms_eligible_current[best_term].calculate(X_validation, cap_outliers_in_validation_set)};
744+
VectorXd values_validation{terms_eligible_current[best_term].calculate(X_validation)};
753745
linear_predictor_update=values*terms_eligible_current[best_term].coefficient;
754746
linear_predictor_update_validation=values_validation*terms_eligible_current[best_term].coefficient;
755747
double error_after_updating_term=calculate_sum_error(calculate_errors(neg_gradient_current,linear_predictor_update,sample_weight_train));
@@ -1002,7 +994,7 @@ void APLRRegressor::set_term_names(const std::vector<std::string> &X_names)
1002994
void APLRRegressor::calculate_feature_importance_on_validation_set()
1003995
{
1004996
feature_importance=VectorXd::Constant(number_of_base_terms,0);
1005-
MatrixXd li{calculate_local_feature_importance_base_function(X_validation, cap_outliers_in_validation_set)};
997+
MatrixXd li{calculate_local_feature_importance(X_validation)};
1006998
for (size_t i = 0; i < static_cast<size_t>(li.cols()); ++i) //for each column calculate mean abs values
1007999
{
10081000
feature_importance[i]=li.col(i).cwiseAbs().mean();
@@ -1012,11 +1004,6 @@ void APLRRegressor::calculate_feature_importance_on_validation_set()
10121004
//Computes local feature importance on data X.
10131005
//Output matrix has columns for each base term in the same order as in X and observations in rows.
10141006
MatrixXd APLRRegressor::calculate_local_feature_importance(const MatrixXd &X)
1015-
{
1016-
return calculate_local_feature_importance_base_function(X, cap_outliers_when_using_the_model);
1017-
}
1018-
1019-
MatrixXd APLRRegressor::calculate_local_feature_importance_base_function(const MatrixXd &X, bool cap_outliers)
10201007
{
10211008
validate_that_model_can_be_used(X);
10221009

@@ -1025,7 +1012,7 @@ MatrixXd APLRRegressor::calculate_local_feature_importance_base_function(const M
10251012
//Terms
10261013
for (size_t i = 0; i < terms.size(); ++i) //for each term
10271014
{
1028-
VectorXd contrib{terms[i].calculate_prediction_contribution(X, cap_outliers)};
1015+
VectorXd contrib{terms[i].calculate_prediction_contribution(X)};
10291016
output.col(terms[i].base_term)+=contrib;
10301017
}
10311018

@@ -1084,7 +1071,7 @@ VectorXd APLRRegressor::calculate_linear_predictor(const MatrixXd &X)
10841071
VectorXd predictions{VectorXd::Constant(X.rows(),intercept)};
10851072
for (size_t i = 0; i < terms.size(); ++i) //for each term
10861073
{
1087-
VectorXd contrib{terms[i].calculate_prediction_contribution(X, cap_outliers_when_using_the_model)};
1074+
VectorXd contrib{terms[i].calculate_prediction_contribution(X)};
10881075
predictions+=contrib;
10891076
}
10901077
return predictions;
@@ -1099,7 +1086,7 @@ MatrixXd APLRRegressor::calculate_local_feature_importance_for_terms(const Matri
10991086
//Terms
11001087
for (size_t i = 0; i < terms.size(); ++i) //for each term
11011088
{
1102-
VectorXd contrib{terms[i].calculate_prediction_contribution(X, cap_outliers_when_using_the_model)};
1089+
VectorXd contrib{terms[i].calculate_prediction_contribution(X)};
11031090
output.col(i)+=contrib;
11041091
}
11051092

@@ -1115,7 +1102,7 @@ MatrixXd APLRRegressor::calculate_terms(const MatrixXd &X)
11151102
//Terms
11161103
for (size_t i = 0; i < terms.size(); ++i) //for each term
11171104
{
1118-
VectorXd values{terms[i].calculate(X, cap_outliers_when_using_the_model)};
1105+
VectorXd values{terms[i].calculate(X)};
11191106
output.col(i)+=values;
11201107
}
11211108

@@ -1160,9 +1147,4 @@ VectorXd APLRRegressor::get_intercept_steps()
11601147
size_t APLRRegressor::get_m()
11611148
{
11621149
return m;
1163-
}
1164-
1165-
void APLRRegressor::set_cap_outliers_when_using_the_model(bool cap_outliers_when_using_the_model)
1166-
{
1167-
this->cap_outliers_when_using_the_model=cap_outliers_when_using_the_model;
11681150
}

cpp/main.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ int main()
2424
model.max_interactions=30;
2525
model.ineligible_boosting_steps_added=10;
2626
model.max_eligible_terms=5;
27-
model.cap_outliers_in_validation_set=false;
28-
model.cap_outliers_when_using_the_model=false;
2927

3028
//Data
3129
MatrixXd X_train{load_csv<MatrixXd>("X_train.csv")};

0 commit comments

Comments
 (0)