ottenbreit-data-science
diff --git a/‎.github/workflows/build_wheels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_wheels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 5 additions & 2 deletions b/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 5 additions & 2 deletions b/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎aplr/aplr.py‎
Lines changed: 4 additions & 0 deletions b/‎aplr/aplr.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎cpp/APLRClassifier.h‎
Lines changed: 8 additions & 4 deletions b/‎cpp/APLRClassifier.h‎
Lines changed: 8 additions & 4 deletions
@@ -10,7 +10,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.17.0
+        uses: pypa/cibuildwheel@v2.22.0
         env:
           CIBW_SKIP: "*musllinux*"
           CIBW_ENVIRONMENT: MACOSX_DEPLOYMENT_TARGET=10.14
 
@@ -32,7 +32,7 @@ Specifies the maximum allowed depth of interaction terms. ***0*** means that int
 The maximum number of interactions allowed in each underlying model. A lower value may be used to reduce computational time or to increase interpretability.
 
 #### min_observations_in_split (default = 4)
-The minimum effective number of observations that a term in the model must rely on. This hyperparameter should be tuned. Larger values are more appropriate for larger datasets. Larger values result in more robust models (lower variance), potentially at the expense of increased bias.
+The minimum effective number of observations that a term in the model must rely on as well as the minimum number of boundary value observations where there cannot be splits. This hyperparameter should be tuned. Larger values are more appropriate for larger datasets. Larger values result in more robust models (lower variance), potentially at the expense of increased bias.
 
 #### ineligible_boosting_steps_added (default = 15)
 Controls how many boosting steps a term that becomes ineligible has to remain ineligible. The default value works well according to empirical results. This hyperparameter is intended for reducing computational costs.
@@ -62,7 +62,7 @@ Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value
 Restricts the maximum number of terms in any of the underlying models trained to ***max_terms***. The default value of 0 means no limit. After the limit is reached, the remaining boosting steps are used to further update the coefficients of already included terms. An optional tuning objective could be to find the lowest positive value of ***max_terms*** that does not increase the prediction error significantly. Low positive values can speed up the training process significantly. Setting a limit with ***max_terms*** may require a higher learning rate for best results.
 
 
-## Method: fit(X:FloatMatrix, y:List[str], sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], interaction_constraints:List[List[int]] = [], predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [])
+## Method: fit(X:FloatMatrix, y:List[str], sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], interaction_constraints:List[List[int]] = [], predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [], predictor_min_observations_in_split: List[int] = [])
 
 ***This method fits the model to data.***
 
@@ -101,6 +101,9 @@ An optional list of floats specifying penalties for non-linearity for each predi
 #### predictor_penalties_for_interactions
 An optional list of floats specifying interaction penalties for each predictor. If provided then this supercedes ***penalty_for_interactions***. For example, if there are two predictors in ***X***, then predictor_penalties_for_interactions = [0.1,0.2] means that all terms using the first predictor in ***X*** as a main effect will have an interaction penalty of 0.1 and that all terms using the second predictor in ***X*** as a main effect will have an interaction penalty of 0.2.
 
+#### predictor_min_observations_in_split
+An optional list of integers specifying the minimum effective number of observations in a split for each predictor. If provided then this supercedes ***min_observations_in_split***.
+
 
 ## Method: predict_class_probabilities(X:FloatMatrix, cap_predictions_to_minmax_in_training:bool = False)
 
 
@@ -35,7 +35,7 @@ Specifies the maximum allowed depth of interaction terms. ***0*** means that int
 The maximum number of interactions allowed in each underlying model. A lower value may be used to reduce computational time or to increase interpretability.
 
 #### min_observations_in_split (default = 4)
-The minimum effective number of observations that a term in the model must rely on. This hyperparameter should be tuned. Larger values are more appropriate for larger datasets. Larger values result in more robust models (lower variance), potentially at the expense of increased bias.
+The minimum effective number of observations that a term in the model must rely on as well as the minimum number of boundary value observations where there cannot be splits. This hyperparameter should be tuned. Larger values are more appropriate for larger datasets. Larger values result in more robust models (lower variance), potentially at the expense of increased bias.
 
 #### ineligible_boosting_steps_added (default = 15)
 Controls how many boosting steps a term that becomes ineligible has to remain ineligible. The default value works well according to empirical results. This hyperparameter is intended for reducing computational costs.
@@ -130,7 +130,7 @@ Specifies a penalty in the range [0.0, 1.0] on interaction terms. A higher value
 Restricts the maximum number of terms in any of the underlying models trained to ***max_terms***. The default value of 0 means no limit. After the limit is reached, the remaining boosting steps are used to further update the coefficients of already included terms. An optional tuning objective could be to find the lowest positive value of ***max_terms*** that does not increase the prediction error significantly. Low positive values can speed up the training process significantly. Setting a limit with ***max_terms*** may require a higher learning rate for best results.
 
 
-## Method: fit(X:FloatMatrix, y:FloatVector, sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], group:FloatVector = np.empty(0), interaction_constraints:List[List[int]] = [], other_data:FloatMatrix = np.empty([0, 0]), predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [])
+## Method: fit(X:FloatMatrix, y:FloatVector, sample_weight:FloatVector = np.empty(0), X_names:List[str] = [], cv_observations:IntMatrix = np.empty([0, 0]), prioritized_predictors_indexes:List[int] = [], monotonic_constraints:List[int] = [], group:FloatVector = np.empty(0), interaction_constraints:List[List[int]] = [], other_data:FloatMatrix = np.empty([0, 0]), predictor_learning_rates:List[float] = [], predictor_penalties_for_non_linearity:List[float] = [], predictor_penalties_for_interactions:List[float] = [], predictor_min_observations_in_split: List[int] = [])
 
 ***This method fits the model to data.***
 
@@ -175,6 +175,9 @@ An optional list of floats specifying penalties for non-linearity for each predi
 #### predictor_penalties_for_interactions
 An optional list of floats specifying interaction penalties for each predictor. If provided then this supercedes ***penalty_for_interactions***. For example, if there are two predictors in ***X***, then predictor_penalties_for_interactions = [0.1,0.2] means that all terms using the first predictor in ***X*** as a main effect will have an interaction penalty of 0.1 and that all terms using the second predictor in ***X*** as a main effect will have an interaction penalty of 0.2.
 
+#### predictor_min_observations_in_split
+An optional list of integers specifying the minimum effective number of observations in a split for each predictor. If provided then this supercedes ***min_observations_in_split***.
+
 
 ## Method: predict(X:FloatMatrix, cap_predictions_to_minmax_in_training:bool = True)
 
 
@@ -196,6 +196,7 @@ def fit(
         predictor_learning_rates: List[float] = [],
         predictor_penalties_for_non_linearity: List[float] = [],
         predictor_penalties_for_interactions: List[float] = [],
+        predictor_min_observations_in_split: List[int] = [],
     ):
         self.__set_params_cpp()
         self.APLRRegressor.fit(
@@ -212,6 +213,7 @@ def fit(
             predictor_learning_rates,
             predictor_penalties_for_non_linearity,
             predictor_penalties_for_interactions,
+            predictor_min_observations_in_split,
         )
 
     def predict(
@@ -447,6 +449,7 @@ def fit(
         predictor_learning_rates: List[float] = [],
         predictor_penalties_for_non_linearity: List[float] = [],
         predictor_penalties_for_interactions: List[float] = [],
+        predictor_min_observations_in_split: List[int] = [],
     ):
         self.__set_params_cpp()
         self.APLRClassifier.fit(
@@ -461,6 +464,7 @@ def fit(
             predictor_learning_rates,
             predictor_penalties_for_non_linearity,
             predictor_penalties_for_interactions,
+            predictor_min_observations_in_split,
         )
         # For sklearn
         self.classes_ = np.arange(len(self.APLRClassifier.get_categories()))
 
@@ -66,7 +66,8 @@ class APLRClassifier
              const std::vector<size_t> &prioritized_predictors_indexes = {}, const std::vector<int> &monotonic_constraints = {},
              const std::vector<std::vector<size_t>> &interaction_constraints = {}, const std::vector<double> &predictor_learning_rates = {},
              const std::vector<double> &predictor_penalties_for_non_linearity = {},
-             const std::vector<double> &predictor_penalties_for_interactions = {});
+             const std::vector<double> &predictor_penalties_for_interactions = {},
+             const std::vector<size_t> &predictor_min_observations_in_split = {});
     MatrixXd predict_class_probabilities(const MatrixXd &X, bool cap_predictions_to_minmax_in_training = false);
     std::vector<std::string> predict(const MatrixXd &X, bool cap_predictions_to_minmax_in_training = false);
     MatrixXd calculate_local_feature_contribution(const MatrixXd &X);
@@ -123,7 +124,8 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
                          const MatrixXi &cv_observations, const std::vector<size_t> &prioritized_predictors_indexes,
                          const std::vector<int> &monotonic_constraints, const std::vector<std::vector<size_t>> &interaction_constraints,
                          const std::vector<double> &predictor_learning_rates, const std::vector<double> &predictor_penalties_for_non_linearity,
-                         const std::vector<double> &predictor_penalties_for_interactions)
+                         const std::vector<double> &predictor_penalties_for_interactions,
+                         const std::vector<size_t> &predictor_min_observations_in_split)
 {
     initialize();
     find_categories(y);
@@ -145,7 +147,8 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
         logit_models[categories[0]].max_terms = max_terms;
         logit_models[categories[0]].fit(X, response_values[categories[0]], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
                                         monotonic_constraints, VectorXi(0), interaction_constraints, MatrixXd(0, 0), predictor_learning_rates,
-                                        predictor_penalties_for_non_linearity, predictor_penalties_for_interactions);
+                                        predictor_penalties_for_non_linearity, predictor_penalties_for_interactions,
+                                        predictor_min_observations_in_split);
 
         logit_models[categories[1]] = logit_models[categories[0]];
         invert_second_model_in_two_class_case(logit_models[categories[1]]);
@@ -166,7 +169,8 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
             logit_models[category].max_terms = max_terms;
             logit_models[category].fit(X, response_values[category], sample_weight, X_names, cv_observations, prioritized_predictors_indexes,
                                        monotonic_constraints, VectorXi(0), interaction_constraints, MatrixXd(0, 0), predictor_learning_rates,
-                                       predictor_penalties_for_non_linearity, predictor_penalties_for_interactions);
+                                       predictor_penalties_for_non_linearity, predictor_penalties_for_interactions,
+                                       predictor_min_observations_in_split);
         }
     }