Added argument to as_html and as_df to allow for single output selection

RobGeada · RobGeada · commit 4c835de0232a · 2022-12-13T14:21:49.000Z
diff --git a/src/trustyai/explainers/counterfactuals.py b/src/trustyai/explainers/counterfactuals.py
@@ -5,10 +5,10 @@
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 import pandas as pd
+from pandas.io.formats.style import Styler
 import uuid as _uuid
 
 from trustyai import _default_initializer  # pylint: disable=unused-import
-from .explanation_results import ExplanationResults
 from trustyai.utils._visualisation import (
     DEFAULT_STYLE as ds,
     DEFAULT_RC_PARAMS as drcp,
@@ -50,7 +50,7 @@
 CounterfactualConfig = _CounterfactualConfig
 
 
-class CounterfactualResult(ExplanationResults):
+class CounterfactualResult:
     """Wraps Counterfactual results. This object is returned by the
     :class:`~CounterfactualExplainer`, and provides a variety of methods to visualize and interact
     with the results of the counterfactual explanation.
diff --git a/src/trustyai/explainers/explanation_results.py b/src/trustyai/explainers/explanation_results.py
@@ -1,29 +1,27 @@
 """Generic class for Explanation and Saliency results"""
 from abc import ABC, abstractmethod
-from typing import Dict
+from typing import Dict, Union
 
 import bokeh.models
 import pandas as pd
 from bokeh.io import show
 from pandas.io.formats.style import Styler
 
 
-class ExplanationResults(ABC):
-    """Abstract class for explanation visualisers"""
+# pylint: disable=too-few-public-methods
+class SaliencyResults(ABC):
+    """Abstract class for saliency visualisers"""
 
     @abstractmethod
-    def as_dataframe(self) -> pd.DataFrame:
+    def as_dataframe(
+        self, output_name=None
+    ) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]:
         """Display explanation result as a dataframe"""
 
     @abstractmethod
-    def as_html(self) -> Styler:
+    def as_html(self, output_name=None) -> Union[Dict[str, Styler], Styler]:
         """Visualise the styled dataframe"""
 
-
-# pylint: disable=too-few-public-methods
-class SaliencyResults(ExplanationResults):
-    """Abstract class for saliency visualisers"""
-
     @abstractmethod
     def saliency_map(self):
         """Return the Saliencies as a dictionary, keyed by output name"""
diff --git a/src/trustyai/explainers/lime.py b/src/trustyai/explainers/lime.py
@@ -72,46 +72,69 @@ def saliency_map(self) -> Dict[str, Saliency]:
             for entry in self._java_saliency_results.saliencies.entrySet()
         }
 
-    def as_dataframe(self) -> pd.DataFrame:
+    def as_dataframe(
+        self, output_name: str = None
+    ) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]:
         """
         Return the LIME result as a dataframe.
 
+        Parameters
+        ----------
+        output_name: str
+            If an output_name is passed, that output's explanation is returned as a pandas
+            dataframe. Otherwise, all outputs' explanation dataframes are returned in a dictionary.
+
+
         Returns
         -------
-        pandas.DataFrame
+        pandas.DataFrame or Dict[str, pandas.Dataframe]
             Dictionary of DataFrames, keyed by output name, containing the results of the LIME
-            explanation. For each model output, the table will contain the following columns:
+            explanation. Each dataframe will contain the following columns:
 
             * ``Feature``: The name of the feature
             * ``Value``: The value of the feature for this particular input.
             * ``Saliency``: The importance of this feature to the output.
             * ``Confidence``: The confidence of this explanation as returned by the explainer.
 
         """
+
         outputs = self.saliency_map().keys()
 
         data = {}
         for output in outputs:
-            output_rows = []
-            for pfi in self.saliency_map().get(output).getPerFeatureImportance():
-                output_rows.append(
-                    {
-                        "Feature": str(pfi.getFeature().getName().toString()),
-                        "Value": pfi.getFeature().getValue().getUnderlyingObject(),
-                        "Saliency": pfi.getScore(),
-                        "Confidence": pfi.getConfidence(),
-                    }
-                )
-            data[output] = pd.DataFrame(output_rows)
+            if output_name is None or output == output_name:
+                output_rows = []
+                for pfi in self.saliency_map().get(output).getPerFeatureImportance():
+                    output_rows.append(
+                        {
+                            "Feature": str(pfi.getFeature().getName().toString()),
+                            "Value": pfi.getFeature().getValue().getUnderlyingObject(),
+                            "Saliency": pfi.getScore(),
+                            "Confidence": pfi.getConfidence(),
+                        }
+                    )
+                data[output] = pd.DataFrame(output_rows)
+
+        if output_name is not None:
+            return data[output_name]
         return data
 
-    def as_html(self) -> pd.io.formats.style.Styler:
+    def as_html(
+        self, output_name: str = None
+    ) -> Union[Dict[str, pd.io.formats.style.Styler], pd.io.formats.style.Styler]:
         """
         Return the LIME results as Pandas Styler objects.
 
+        Parameters
+        ----------
+        output_name: str
+            If an output_name is passed, that output's explanation is returned as a pandas Styler.
+            Otherwise, all outputs' explanation stylers are returned in a dictionary.
+
+
         Returns
         -------
-        Dict[str, pandas.Styler]
+        pandas.Styler or Dict[str, pandas.Styler]
             Dictionary of stylers keyed by output name. Each styler containing the results of the
             LIME explanation for that particular output, in the same
             schema as in :func:`as_dataframe`. This will:
@@ -121,19 +144,25 @@ def as_html(self) -> pd.io.formats.style.Styler:
 
         htmls = {}
         for k, df in self.as_dataframe().items():
-            htmls[k] = df.style.background_gradient(
-                LinearSegmentedColormap.from_list(
-                    name="rwg",
-                    colors=[
-                        ds["negative_primary_colour"],
-                        ds["neutral_primary_colour"],
-                        ds["positive_primary_colour"],
-                    ],
-                ),
-                subset="Saliency",
-                vmin=-1 * max(np.abs(df["Saliency"])),
-                vmax=max(np.abs(df["Saliency"])),
-            )
+            if output_name is None or k == output_name:
+                style = df.style.background_gradient(
+                    LinearSegmentedColormap.from_list(
+                        name="rwg",
+                        colors=[
+                            ds["negative_primary_colour"],
+                            ds["neutral_primary_colour"],
+                            ds["positive_primary_colour"],
+                        ],
+                    ),
+                    subset="Saliency",
+                    vmin=-1 * max(np.abs(df["Saliency"])),
+                    vmax=max(np.abs(df["Saliency"])),
+                )
+                style.set_caption(f"LIME Explanation of {output_name}")
+                htmls[k] = style
+
+        if output_name is not None:
+            return htmls[output_name]
         return htmls
 
     def _matplotlib_plot(self, output_name: str, block=True) -> None:
diff --git a/src/trustyai/explainers/shap.py b/src/trustyai/explainers/shap.py
@@ -123,15 +123,24 @@ def _saliency_to_dataframe(self, saliency, output_name):
 
         return pd.DataFrame([fnull] + data_rows)
 
-    def as_dataframe(self) -> Dict[str, pd.DataFrame]:
+    def as_dataframe(
+        self, output_name: str = None
+    ) -> Union[Dict[str, pd.DataFrame], pd.DataFrame]:
         """
         Return the SHAP results as dataframes.
 
+        Parameters
+        ----------
+        output_name: str
+            If an output_name is passed, that output's explanation is returned as a dataframe.
+            Otherwise, all outputs' explanation dataframe are returned in a dictionary.
+
         Returns
         -------
-        Dict[str, pandas.DataFrame]
-            Dictionary of DataFrames, keyed by output name, containing the results of the SHAP
-            explanation. For each model output, the table will contain the following columns:
+        pandas.Dataframe or Dict[str, pandas.DataFrame]
+            A dataframe or dictionary of DataFrames, keyed by output name. Each dataframe
+            contains the results of the SHAP explanation for a particular output. Each dataframe
+            wiil contain the following columns:
 
             * ``Feature``: The name of the feature
             * ``Feature Value``: The value of the feature for this particular input.
@@ -140,18 +149,33 @@ def as_dataframe(self) -> Dict[str, pd.DataFrame]:
             * ``Confidence``: The confidence of this explanation as returned by the explainer.
 
         """
-        df_dict = {}
-        for output_name, saliency in self.saliency_map().items():
-            df_dict[output_name] = self._saliency_to_dataframe(saliency, output_name)
-        return df_dict
+        if output_name is None:
+            df_dict = {}
+            for output_name_key, saliency in self.saliency_map().items():
+                df_dict[output_name_key] = self._saliency_to_dataframe(
+                    saliency, output_name_key
+                )
+            return df_dict
+        return self._saliency_to_dataframe(
+            self.saliency_map()[output_name], output_name
+        )
 
-    def as_html(self) -> Dict[str, pd.io.formats.style.Styler]:
+    def as_html(
+        self, output_name: str = None
+    ) -> Union[Dict[str, pd.io.formats.style.Styler], pd.io.formats.style.Styler]:
         """
         Return the SHAP results as Pandas Styler objects.
 
+        Parameters
+        ----------
+        output_name: str
+            If an output_name is passed, that output's explanation is returned as a pandas Styler.
+            Otherwise, all outputs' explanation stylers are returned in a dictionary.
+
+
         Returns
         -------
-        Dict[str, pandas.Styler]
+        Pandas Styler or Dict[str, pandas.Styler]
             Dictionary of stylers keyed by output name. Each styler containing the results of the
             SHAP explanation for that particular output, in the same
             schema as in :func:`as_dataframe`. This will:
@@ -174,31 +198,35 @@ def _color_feature_values(feature_values, background_vals):
             return [None] + formats
 
         df_dict = {}
-        for output_name, saliency in self.saliency_map().items():
-            df = self._saliency_to_dataframe(saliency, output_name)
-            shap_values = df["SHAP Value"].values[1:]
-            background_mean_feature_values = df["Mean Background Value"].values[1:]
-
-            style = df.style.background_gradient(
-                LinearSegmentedColormap.from_list(
-                    name="rwg",
-                    colors=[
-                        ds["negative_primary_colour"],
-                        ds["neutral_primary_colour"],
-                        ds["positive_primary_colour"],
-                    ],
-                ),
-                subset=(slice(1, None), "SHAP Value"),
-                vmin=-1 * max(np.abs(shap_values)),
-                vmax=max(np.abs(shap_values)),
-            )
-            style.set_caption(f"Explanation of {output_name}")
-            df_dict[output_name] = style.apply(
-                _color_feature_values,
-                background_vals=background_mean_feature_values,
-                subset="Value",
-                axis=0,
-            )
+        for output_name_key, saliency in self.saliency_map().items():
+            if output_name is None or output_name_key == output_name:
+                df = self._saliency_to_dataframe(saliency, output_name_key)
+                shap_values = df["SHAP Value"].values[1:]
+                background_mean_feature_values = df["Mean Background Value"].values[1:]
+
+                style = df.style.background_gradient(
+                    LinearSegmentedColormap.from_list(
+                        name="rwg",
+                        colors=[
+                            ds["negative_primary_colour"],
+                            ds["neutral_primary_colour"],
+                            ds["positive_primary_colour"],
+                        ],
+                    ),
+                    subset=(slice(1, None), "SHAP Value"),
+                    vmin=-1 * max(np.abs(shap_values)),
+                    vmax=max(np.abs(shap_values)),
+                )
+                style.set_caption(f"SHAP Explanation of {output_name_key}")
+                df_dict[output_name_key] = style.apply(
+                    _color_feature_values,
+                    background_vals=background_mean_feature_values,
+                    subset="Value",
+                    axis=0,
+                )
+
+        if output_name is not None:
+            return df_dict[output_name]
         return df_dict
 
     def _matplotlib_plot(self, output_name, block=True) -> None:
diff --git a/tests/general/test_limeexplainer.py b/tests/general/test_limeexplainer.py
@@ -188,6 +188,7 @@ def test_lime_numpy():
 
     for oname in onames:
         assert oname in explanation.as_dataframe().keys()
+        assert len(explanation.as_dataframe(oname)) == 5
         for fname in fnames:
             assert fname in explanation.as_dataframe()[oname]['Feature'].values
 
diff --git a/tests/general/test_shap.py b/tests/general/test_shap.py
@@ -131,5 +131,8 @@ def test_shap_numpy():
 
     for oname in onames:
         assert oname in explanation.as_dataframe().keys()
+        assert len(explanation.as_dataframe(oname)) == 5 + 1
+
         for fname in fnames:
             assert fname in explanation.as_dataframe()[oname]['Feature'].values
+