Adding the predict_proba method to ShapWrapper and Evaluation class

mmerce · mmerce · commit 56f3ccfbfaa0 · 2023-07-18T16:45:44.000+02:00
diff --git a/bigml/evaluation.py b/bigml/evaluation.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2023 BigML
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+"""An local Evaluation object.
+
+This module defines a local class to handle the results of an evaluation
+
+"""
+import json
+
+
+from bigml.api import get_api_connection, ID_GETTERS
+from bigml.basemodel import retrieve_resource, get_resource_dict
+
+CLASSIFICATION_METRICS = [
+    "accuracy", "precision", "recall", "phi" "phi_coefficient",
+    "f_measure", "confusion_matrix", "per_class_statistics"]
+
+REGRESSION_METRICS = ["mean_absolute_error", "mean_squared_error", "r_squared"]
+
+
+class ClassificationEval():
+    """A class to store the classification metrics """
+    def __init__(self, name, per_class_statistics):
+        self.name = name
+        for statistics in per_class_statistics:
+            if statistics["class_name"] == name:
+                break
+        for metric in CLASSIFICATION_METRICS:
+            if metric in statistics.keys():
+                setattr(self, metric, statistics.get(metric))
+
+
+class Evaluation():
+    """A class to deal with the information in an evaluation result
+
+    """
+    def __init__(self, evaluation, api=None):
+
+        self.resource_id = None
+        self.model_id = None
+        self.test_dataset_id = None
+        self.regression = None
+        self.full = None
+        self.random = None
+        self.error = None
+        self.error_message = None
+        self.api = get_api_connection(api)
+
+        try:
+            self.resource_id, evaluation = get_resource_dict( \
+                evaluation, "evaluation", self.api)
+        except ValueError as resource:
+            try:
+                evaluation = json.loads(str(resource))
+                self.resource_id = evaluation["resource"]
+            except ValueError:
+                raise ValueError("The evaluation resource was faulty: \n%s" % \
+                    resource)
+
+        if 'object' in evaluation and isinstance(evaluation['object'], dict):
+            evaluation = evaluation['object']
+        self.status = evaluation["status"]
+        self.error = self.status.get("error")
+        if self.error is not None:
+            self.error_message = self.status.get("message")
+        else:
+            self.model_id = evaluation["model"]
+            self.test_dataset_id = evaluation["dataset"]
+
+            if 'result' in evaluation and \
+                    isinstance(evaluation['result'], dict):
+                self.full = evaluation.get("result", {}).get("model")
+                self.random = evaluation.get("result", {}).get("random")
+                self.regression =  not self.full.get("confusion_matrix")
+                if self.regression:
+                    self.add_metrics(self.full, REGRESSION_METRICS)
+                    self.mean = evaluation.get("result", {}).get("mean")
+                else:
+                    self.add_metrics(self.full, CLASSIFICATION_METRICS)
+                    self.mode = evaluation.get("result", {}).get("mode")
+                    self.classes = evaluation.get("result", {}).get(
+                        "class_names")
+            else:
+                raise ValueError("Failed to find the correct evaluation"
+                                 " structure.")
+        if not self.regression:
+            self.positive_class = ClassificationEval(self.classes[-1],
+                                                     self.per_class_statistics)
+
+    def add_metrics(self, metrics_info, metrics_list, obj=None):
+        """Adding the metrics in the `metrics_info` dictionary as attributes
+        in the object passed as argument. If None is given, the metrics will
+        be added to the self object.
+        """
+        if obj is None:
+            obj = self
+
+        for metric in metrics_list:
+            setattr(obj, metric, metrics_info.get(metric,
+                metrics_info.get("average_%s" % metric)))
+
+    def set_positive_class(self, positive_class):
+        """Changing the positive class """
+        if positive_class is None or positive_class not in self.classes:
+            raise ValueError("The possible classes are: %s" %
+                ", ".join(self.classes))
+        self.positive_class = ClassificationEval(positive_class,
+                                                 self.per_class_statistics)
diff --git a/bigml/shapwrapper.py b/bigml/shapwrapper.py
@@ -21,6 +21,7 @@
 import numpy as np
 
 from bigml.supervised import SupervisedModel, extract_id
+from bigml.fusion import Fusion
 from bigml.fields import Fields
 from bigml.api import get_resource_type, get_api_connection
 
@@ -35,7 +36,8 @@ def __init__(self, model, api=None, cache_get=None,
         self.api = get_api_connection(api)
         resource_id, model = extract_id(model, self.api)
         resource_type = get_resource_type(resource_id)
-        self.local_model = SupervisedModel(model, api=api, cache_get=cache_get,
+        model_class = Fusion if resource_type == "fusion" else SupervisedModel
+        self.local_model = model_class(model, api=api, cache_get=cache_get,
             operation_settings=operation_settings)
         objective_id = getattr(self.local_model, "objective_id", None)
         self.fields = Fields(self.local_model.fields,
@@ -55,3 +57,15 @@ def predict(self, x_test, **kwargs):
         pred_fields = Fields(objective_field)
         return pred_fields.to_numpy(batch_prediction,
                                     objective=True).reshape(-1)
+
+    def predict_proba(self, x_test):
+        """Prediction method that interfaces with the Shap library"""
+        if self.local_model.regression:
+            raise ValueError("This method is only available for classification"
+                             " models.")
+        input_data_list = self.fields.from_numpy(x_test)
+        predictions = np.ndarray([])
+        for input_data in inner_data_list:
+            prediction = self.predict_probability(input_data, compact=True)
+            np.append(predictions, np.ndarray(prediction))
+        return predictions
diff --git a/bigml/supervised.py b/bigml/supervised.py
@@ -137,6 +137,8 @@ def __init__(self, model, api=None, cache_get=None,
         for attr, value in list(local_model.__dict__.items()):
             setattr(self, attr, value)
         self.local_model = local_model
+        self.regression = resource_type == "linearregression" or \
+            self.local_model.regression
         self.name = self.local_model.name
         self.description = self.local_model.description