ottenbreit-data-science
diff --git a/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 8 additions & 3 deletions b/‎API_REFERENCE_FOR_CLASSIFICATION.md‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 12 additions & 2 deletions b/‎API_REFERENCE_FOR_REGRESSION.md‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎aplr/aplr.py‎
Lines changed: 9 additions & 0 deletions b/‎aplr/aplr.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎cpp/APLRClassifier.h‎
Lines changed: 52 additions & 5 deletions b/‎cpp/APLRClassifier.h‎
Lines changed: 52 additions & 5 deletions
@@ -29,7 +29,7 @@ Specifies the maximum number of bins to discretize the data into when searching
 Specifies the maximum allowed depth of interaction terms. ***0*** means that interactions are not allowed. This hyperparameter should be tuned by for example doing a grid search for best predictiveness. For best interpretability use 0 (or 1 if interactions are needed).
 
 #### max_interactions (default = 100000)
-The maximum number of interactions allowed in each underlying model. A lower value may be used to reduce computational time.
+The maximum number of interactions allowed in each underlying model. A lower value may be used to reduce computational time or to increase interpretability.
 
 #### min_observations_in_split (default = 20)
 The minimum effective number of observations that a term in the model must rely on. This hyperparameter should be tuned. Larger values are more appropriate for larger datasets. Larger values result in more robust models (lower variance), potentially at the expense of increased bias.
@@ -125,7 +125,7 @@ Parameters are the same as in ***predict_class_probabilities()***.
 
 ## Method: calculate_local_feature_contribution(X:npt.ArrayLike)
 
-***Returns a numpy matrix containing estimated feature contribution to the linear predictor in X for each predictor.***
+***Returns a numpy matrix containing feature contribution to the linear predictor in X for each predictor. For each prediction this method uses calculate_local_feature_contribution() in the logit APLRRegressor model for the category that corresponds to the prediction. Example: If a prediction is "myclass" then the method uses calculate_local_feature_contribution() in the logit model that predicts whether an observation belongs to class "myclass" or not.***
 
 ### Parameters
 
@@ -160,4 +160,9 @@ A string specifying the label of the category.
 
 ## Method: get_feature_importance()
 
-***Returns a numpy vector containing the feature importance of each predictor, estimated as an average of feature importances for the underlying logit models.***
+***Returns a numpy vector containing the feature importance of each predictor, estimated as an average of feature importances for the underlying logit models.***
+
+
+## Method: get_unique_term_affiliations()
+
+***Returns a list of strings containing unique predictor affiliations for terms.***
@@ -32,7 +32,7 @@ Specifies the maximum number of bins to discretize the data into when searching
 Specifies the maximum allowed depth of interaction terms. ***0*** means that interactions are not allowed. This hyperparameter should be tuned by for example doing a grid search for best predictiveness. For best interpretability use 0 (or 1 if interactions are needed).
 
 #### max_interactions (default = 100000)
-The maximum number of interactions allowed in each underlying model. A lower value may be used to reduce computational time.
+The maximum number of interactions allowed in each underlying model. A lower value may be used to reduce computational time or to increase interpretability.
 
 #### min_observations_in_split (default = 20)
 The minimum effective number of observations that a term in the model must rely on. This hyperparameter should be tuned. Larger values are more appropriate for larger datasets. Larger values result in more robust models (lower variance), potentially at the expense of increased bias.
@@ -221,7 +221,7 @@ A numpy matrix with predictor values.
 
 ## Method: calculate_local_feature_contribution(X:npt.ArrayLike)
 
-***Returns a numpy matrix containing estimated feature contribution to the linear predictor in X for each predictor.***
+***Returns a numpy matrix containing feature contribution to the linear predictor in X for each predictor.***
 
 ### Parameters
 
@@ -267,6 +267,16 @@ A numpy matrix with predictor values.
 ***Returns a list of strings containing term names.***
 
 
+## Method: get_term_affiliations()
+
+***Returns a list of strings containing predictor affiliations for terms.***
+
+
+## Method: get_unique_term_affiliations()
+
+***Returns a list of strings containing unique predictor affiliations for terms.***
+
+
 ## Method: get_term_coefficients()
 
 ***Returns a numpy vector containing term regression coefficients.***
 
@@ -250,6 +250,12 @@ def calculate_terms(self, X: npt.ArrayLike) -> npt.ArrayLike:
     def get_term_names(self) -> List[str]:
         return self.APLRRegressor.get_term_names()
 
+    def get_term_affiliations(self) -> List[str]:
+        return self.APLRRegressor.get_term_affiliations()
+
+    def get_unique_term_affiliations(self) -> List[str]:
+        return self.APLRRegressor.get_unique_term_affiliations()
+
     def get_term_coefficients(self) -> npt.ArrayLike:
         return self.APLRRegressor.get_term_coefficients()
 
@@ -469,6 +475,9 @@ def get_cv_error(self) -> float:
     def get_feature_importance(self) -> npt.ArrayLike:
         return self.APLRClassifier.get_feature_importance()
 
+    def get_unique_term_affiliations(self) -> List[str]:
+        return self.APLRClassifier.get_unique_term_affiliations()
+
     # For sklearn
     def get_params(self, deep=True):
         return {
 
@@ -21,6 +21,7 @@ class APLRClassifier
     void define_cv_observations(const std::vector<std::string> &y, const MatrixXi &cv_observations_);
     void invert_second_model_in_two_class_case(APLRRegressor &second_model);
     void calculate_validation_metrics();
+    void calculate_unique_term_affiliations();
     void cleanup_after_fit();
 
 public:
@@ -49,6 +50,8 @@ class APLRClassifier
     double penalty_for_non_linearity;
     double penalty_for_interactions;
     size_t max_terms;
+    std::vector<std::string> unique_term_affiliations;
+    std::map<std::string, size_t> unique_term_affiliation_map;
 
     APLRClassifier(size_t m = 3000, double v = 0.1, uint_fast32_t random_state = std::numeric_limits<uint_fast32_t>::lowest(), size_t n_jobs = 0,
                    size_t cv_folds = 5, size_t reserved_terms_times_num_x = 100, size_t bins = 300, size_t verbosity = 0, size_t max_interaction_level = 1,
@@ -72,6 +75,7 @@ class APLRClassifier
     MatrixXd get_validation_error_steps();
     double get_cv_error();
     VectorXd get_feature_importance();
+    std::vector<std::string> get_unique_term_affiliations();
 };
 
 APLRClassifier::APLRClassifier(size_t m, double v, uint_fast32_t random_state, size_t n_jobs, size_t cv_folds,
@@ -104,7 +108,8 @@ APLRClassifier::APLRClassifier(const APLRClassifier &other)
       early_stopping_rounds{other.early_stopping_rounds},
       num_first_steps_with_linear_effects_only{other.num_first_steps_with_linear_effects_only},
       penalty_for_non_linearity{other.penalty_for_non_linearity}, penalty_for_interactions{other.penalty_for_interactions},
-      max_terms{other.max_terms}
+      max_terms{other.max_terms}, unique_term_affiliations{other.unique_term_affiliations},
+      unique_term_affiliation_map{other.unique_term_affiliation_map}
 {
 }
 
@@ -163,6 +168,7 @@ void APLRClassifier::fit(const MatrixXd &X, const std::vector<std::string> &y, c
         }
     }
 
+    calculate_unique_term_affiliations();
     calculate_validation_metrics();
     cleanup_after_fit();
 }
@@ -227,17 +233,47 @@ void APLRClassifier::invert_second_model_in_two_class_case(APLRRegressor &second
     }
 }
 
+void APLRClassifier::calculate_unique_term_affiliations()
+{
+    size_t number_of_term_affiliations{0};
+    for (std::string &category : categories)
+    {
+        number_of_term_affiliations += logit_models[category].number_of_unique_term_affiliations;
+    }
+    std::vector<std::string> term_affiliations;
+    term_affiliations.reserve(number_of_term_affiliations);
+    size_t counter{0};
+    for (std::string &category : categories)
+    {
+        for (auto &affiliation : logit_models[category].unique_term_affiliations)
+        {
+            term_affiliations.push_back(affiliation);
+            ++counter;
+        }
+    }
+    unique_term_affiliations = get_unique_strings_as_vector(term_affiliations);
+    for (size_t i = 0; i < unique_term_affiliations.size(); ++i)
+    {
+        unique_term_affiliation_map[unique_term_affiliations[i]] = i;
+    }
+}
+
 void APLRClassifier::calculate_validation_metrics()
 {
     double category_weight{1.0 / static_cast<double>(categories.size())};
     validation_error_steps = MatrixXd::Constant(m, cv_observations.cols(), 0.0);
     cv_error = 0.0;
-    feature_importance = VectorXd::Constant(logit_models[categories[0]].get_feature_importance().rows(), 0.0);
+    feature_importance = VectorXd::Constant(unique_term_affiliations.size(), 0.0);
     for (std::string &category : categories)
     {
         cv_error += logit_models[category].get_cv_error() * category_weight;
         validation_error_steps += logit_models[category].get_validation_error_steps() * category_weight;
-        feature_importance += logit_models[category].get_feature_importance() * category_weight;
+        for (auto &affiliation : logit_models[category].unique_term_affiliations)
+        {
+            size_t feature_number_in_classifier{unique_term_affiliation_map[affiliation]};
+            size_t feature_number_in_logit_model{logit_models[category].unique_term_affiliation_map[affiliation]};
+            feature_importance[feature_number_in_classifier] += logit_models[category].get_feature_importance()[feature_number_in_logit_model] * category_weight;
+        }
     }
 }
 
@@ -282,11 +318,17 @@ std::vector<std::string> APLRClassifier::predict(const MatrixXd &X, bool cap_pre
 
 MatrixXd APLRClassifier::calculate_local_feature_contribution(const MatrixXd &X)
 {
-    MatrixXd output{MatrixXd::Constant(X.rows(), feature_importance.rows(), 0)};
+    MatrixXd output{MatrixXd::Constant(X.rows(), unique_term_affiliations.size(), 0)};
     std::vector<std::string> predictions{predict(X, false)};
     for (size_t row = 0; row < predictions.size(); ++row)
     {
-        output.row(row) = logit_models[predictions[row]].calculate_local_feature_contribution(X.row(row));
+        VectorXd local_feature_contribution_from_logit_model{logit_models[predictions[row]].calculate_local_feature_contribution(X.row(row)).row(0)};
+        for (auto &affiliation : logit_models[predictions[row]].unique_term_affiliations)
+        {
+            size_t feature_number_in_classifier{unique_term_affiliation_map[affiliation]};
+            size_t feature_number_in_logit_model{logit_models[predictions[row]].unique_term_affiliation_map[affiliation]};
+            output.col(feature_number_in_classifier)[row] = local_feature_contribution_from_logit_model[feature_number_in_logit_model];
+        }
     }
 
     return output;
@@ -327,4 +369,9 @@ double APLRClassifier::get_cv_error()
 VectorXd APLRClassifier::get_feature_importance()
 {
     return feature_importance;
+}
+
+std::vector<std::string> APLRClassifier::get_unique_term_affiliations()
+{
+    return unique_term_affiliations;
 }