Support TSICE explanations as plots (#169)

ruivieira · web-flow · commit cbee162e463d · 2023-07-25T12:03:11.000+01:00
* Add plots to TSICE explanation

* Remove duplicate plots and add perturbers import

* Correct subclass for TSICE explanation results

* Fix linting and formatting
diff --git a/src/trustyai/explainers/extras/tsice.py b/src/trustyai/explainers/extras/tsice.py
@@ -7,13 +7,15 @@
 
 from aix360.algorithms.tsice import TSICEExplainer as TSICEExplainerAIX
 from aix360.algorithms.tsutils.tsperturbers import TSPerturber
-import bokeh
 import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.linear_model import LinearRegression
 
-from trustyai.model import SaliencyResults
+from trustyai.explainers.explanation_results import ExplanationResults
 
 
-class TSICEResults(SaliencyResults):
+class TSICEResults(ExplanationResults):
     """Wraps TSICE results. This object is returned by the :class:`~TSICEExplainer`,
     and provides a variety of methods to visualize and interact with the explanation.
     """
@@ -23,24 +25,187 @@ def __init__(self, explanation):
 
     def as_dataframe(self) -> pd.DataFrame:
         """Returns the explanation as a pandas dataframe."""
-        return pd.DataFrame(self.explanation)
+        # Initialize an empty DataFrame
+        dataframe = pd.DataFrame()
+
+        # Loop through each feature_name and each key in data_x
+        for key in self.explanation["data_x"]:
+            for i, feature in enumerate(self.explanation["feature_names"]):
+                dataframe[f"{key}-{feature}"] = [
+                    val[0] for val in self.explanation["feature_values"][i]
+                ]
+
+        # Add "total_impact" as a column
+        dataframe["total_impact"] = self.explanation["total_impact"]
+        return dataframe
 
     def as_html(self) -> pd.io.formats.style.Styler:
         """Returns the explanation as an HTML table."""
         dataframe = self.as_dataframe()
         return dataframe.style
 
-    def saliency_map(self):
-        """
-        Returns a dictionary of feature names and their total impact.
-        """
-        dict(zip(self.explanation["feature_names"], self.explanation["total_impact"]))
+    def plot_forecast(self, variable):  # pylint: disable=too-many-locals
+        """Plots the explanation.
+        Based on https://github.com/Trusted-AI/AIX360/blob/master/examples/tsice/plots.py"""
+        forecast_horizon = self.explanation["current_forecast"].shape[0]
+        original_ts = pd.DataFrame(
+            data={variable: self.explanation["data_x"][variable]}
+        )
+        perturbations = [d for d in self.explanation["perturbations"] if variable in d]
+
+        # Generate a list of keys
+        keys = list(self.explanation["data_x"].keys())
+        # Find the index of the given key
+        key = keys.index(variable)
+        forecasts_on_perturbations = [
+            arr[:, key : key + 1]
+            for arr in self.explanation["forecasts_on_perturbations"]
+        ]
+
+        new_perturbations = []
+        new_timestamps = []
+        pred_ts = []
+
+        original_ts.index.freq = pd.infer_freq(original_ts.index)
+        for i in range(1, forecast_horizon + 1):
+            new_timestamps.append(original_ts.index[-1] + (i * original_ts.index.freq))
+
+        for perturbation in perturbations:
+            new_perturbations.append(pd.DataFrame(perturbation))
+
+        for forecast in forecasts_on_perturbations:
+            pred_ts.append(pd.DataFrame(forecast, index=new_timestamps))
+
+        current_forecast = self.explanation["current_forecast"][:, key : key + 1]
+        pred_original_ts = pd.DataFrame(current_forecast, index=new_timestamps)
+
+        _, axis = plt.subplots()
+
+        # Plot perturbed time series
+        axis = self._plot_timeseries(
+            new_perturbations,
+            color="lightgreen",
+            axis=axis,
+            name="perturbed timeseries samples",
+        )
+
+        # Plot original time series
+        axis = self._plot_timeseries(
+            original_ts, color="green", axis=axis, name="input/original timeseries"
+        )
+
+        # Plot varying forecast range
+        axis = self._plot_timeseries(
+            pred_ts, color="lightblue", axis=axis, name="forecast on perturbed samples"
+        )
+
+        # Plot original forecast
+        axis = self._plot_timeseries(
+            pred_original_ts, color="blue", axis=axis, name="original forecast"
+        )
+
+        # Set labels and title
+        axis.set_xlabel("Timestamp")
+        axis.set_ylabel(variable)
+        axis.set_title("Time-Series Individual Conditional Expectation (TSICE)")
+
+        axis.legend()
+
+        # Display the plot
+        plt.show()
+
+    def _plot_timeseries(
+        self, timeseries, color="green", axis=None, name="time series"
+    ):
+        showlegend = True
+        if isinstance(timeseries, dict):
+            data = timeseries
+            if isinstance(color, str):
+                color = {k: color for k in data}
+        elif isinstance(timeseries, list):
+            data = {}
+            for k, ts_data in enumerate(timeseries):
+                data[k] = ts_data
+            if isinstance(color, str):
+                color = {k: color for k in data}
+        else:
+            data = {}
+            data["default"] = timeseries
+            color = {"default": color}
+
+        if axis is None:
+            _, axis = plt.subplots()
+
+        first = True
+        for key, _timeseries in data.items():
+            if not first:
+                showlegend = False
+
+            self._add_timeseries(
+                axis, _timeseries, color=color[key], showlegend=showlegend, name=name
+            )
+            first = False
+
+        return axis
+
+    def _add_timeseries(
+        self, axis, timeseries, color="green", name="time series", showlegend=False
+    ):
+        timestamps = timeseries.index
+        axis.plot(
+            timestamps,
+            timeseries[timeseries.columns[0]],
+            color=color,
+            label=(name if showlegend else "_nolegend_"),
+        )
+
+    def plot_impact(self, feature_per_row=2):
+        """Plot the impace.
+        Based on https://github.com/Trusted-AI/AIX360/blob/master/examples/tsice/plots.py"""
+
+        n_row = int(np.ceil(len(self.explanation["feature_names"]) / feature_per_row))
+        feat_values = np.array(self.explanation["feature_values"])
+
+        fig, axs = plt.subplots(n_row, feature_per_row, figsize=(15, 15))
+        axs = axs.ravel()  # Flatten the axs to iterate over it
+
+        for i, feat in enumerate(self.explanation["feature_names"]):
+            x_feat = feat_values[i, :, 0]
+            trend_fit = LinearRegression()
+            trend_line = trend_fit.fit(
+                x_feat.reshape(-1, 1), self.explanation["signed_impact"]
+            )
+            x_trend = np.linspace(min(x_feat), max(x_feat), 101)
+            y_trend = trend_line.predict(x_trend[..., np.newaxis])
+
+            # Scatter plot
+            axs[i].scatter(x=x_feat, y=self.explanation["signed_impact"], color="blue")
+            # Line plot
+            axs[i].plot(
+                x_trend,
+                y_trend,
+                color="green",
+                label="correlation between forecast and observed feature",
+            )
+            # Reference line
+            current_value = self.explanation["current_feature_values"][i][0]
+            axs[i].axvline(
+                x=current_value,
+                color="firebrick",
+                linestyle="--",
+                label="current value",
+            )
+
+            axs[i].set_xlabel(feat)
+            axs[i].set_ylabel("Δ forecast")
 
-    def _matplotlib_plot(self, output_name: str, block: bool, call_show: bool) -> None:
-        pass
+        # Display the legend on the first subplot
+        axs[0].legend()
 
-    def _get_bokeh_plot(self, output_name: str) -> bokeh.models.Plot:
-        pass
+        fig.suptitle("Impact of Derived Variable On The Forecast", fontsize=16)
+        plt.tight_layout()
+        plt.subplots_adjust(top=0.95)
+        plt.show()
 
 
 class TSICEExplainer(TSICEExplainerAIX):
diff --git a/src/trustyai/utils/extras/timeseries.py b/src/trustyai/utils/extras/timeseries.py
@@ -0,0 +1,3 @@
+"""Extra time series utilities."""
+from aix360.algorithms.tsutils.tsframe import tsFrame  # pylint: disable=unused-import
+from aix360.algorithms.tsutils.tsperturbers import *  # pylint: disable=wildcard-import,unused-wildcard-import
diff --git a/tests/extras/tsice/test_tsice.py b/tests/extras/tsice/test_tsice.py
@@ -79,14 +79,14 @@ def test_tsice_with_range(self):
             explanation_window_start=10,
             explanation_window_length=observation_length,
             features_to_analyze=[
-                "mean",  # analyze mean metric from recent time series of lengh <observation_length>
+                "mean", "std"  # analyze mean metric from recent time series of lengh <observation_length>
             ],
             perturbers=[
                 BlockBootstrapPerturber(window_length=5, block_length=5, block_swap=2),
             ],
             input_length=input_length,
             forecast_lookahead=forecast_horizon,
-            n_perturbations=20,
+            n_perturbations=30,
         )
 
         # compute explanations

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+"""Extra time series utilities."""`
	`2`	`+from aix360.algorithms.tsutils.tsframe import tsFrame # pylint: disable=unused-import`
	`3`	`+from aix360.algorithms.tsutils.tsperturbers import * # pylint: disable=wildcard-import,unused-wildcard-import`