Initial support for external algorithms (#160)

ruivieira · web-flow · commit accf75eb8c9c · 2023-07-18T11:28:07.000+01:00
* Add dependencies
* Add TSICE tests
* Fix linting errors
* Update tests
* Refactored TSICE forecaster to model
* Match Pandas dependencies between ODH and XAI360
diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml
@@ -27,12 +27,14 @@ jobs:
         run: |
           pip install .
           pip install ".[dev]"
+          pip install ".[extras]"
       - name: Lint
         run: |
           pylint --ignore-imports=yes $(find src/trustyai -type f -name "*.py")
       - name: Test with pytest
         run: |
           pytest -v -s tests/general
+          pytest -v -s tests/extras
           pytest -v -s tests/initialization --forked
       - name: Style
         run: |
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,9 @@ dev = [
     "wheel~=0.38.4",
     "xgboost==1.4.2"
 ]
+extras = [
+    "aix360 [default,tsice] @ https://github.com/Trusted-AI/AIX360/archive/refs/heads/master.zip"
+]
 
 [project.urls]
 homepage = "https://github.com/trustyai-explainability/trustyai-explainability-python"
diff --git a/src/trustyai/explainers/extras/tsice.py b/src/trustyai/explainers/extras/tsice.py
@@ -0,0 +1,82 @@
+"""
+Wrapper module for TSICEExplainer from aix360.
+Original at https://github.com/Trusted-AI/AIX360/
+"""
+# pylint: disable=too-many-arguments,import-error
+from typing import Callable, List, Optional, Union
+
+from aix360.algorithms.tsice import TSICEExplainer as TSICEExplainerAIX
+from aix360.algorithms.tsutils.tsperturbers import TSPerturber
+import bokeh
+import pandas as pd
+
+from trustyai.model import SaliencyResults
+
+
+class TSICEResults(SaliencyResults):
+    """Wraps TSICE results. This object is returned by the :class:`~TSICEExplainer`,
+    and provides a variety of methods to visualize and interact with the explanation.
+    """
+
+    def __init__(self, explanation):
+        self.explanation = explanation
+
+    def as_dataframe(self) -> pd.DataFrame:
+        """Returns the explanation as a pandas dataframe."""
+        return pd.DataFrame(self.explanation)
+
+    def as_html(self) -> pd.io.formats.style.Styler:
+        """Returns the explanation as an HTML table."""
+        dataframe = self.as_dataframe()
+        return dataframe.style
+
+    def saliency_map(self):
+        """
+        Returns a dictionary of feature names and their total impact.
+        """
+        dict(zip(self.explanation["feature_names"], self.explanation["total_impact"]))
+
+    def _matplotlib_plot(self, output_name: str, block: bool, call_show: bool) -> None:
+        pass
+
+    def _get_bokeh_plot(self, output_name: str) -> bokeh.models.Plot:
+        pass
+
+
+class TSICEExplainer(TSICEExplainerAIX):
+    """
+    Wrapper for TSICEExplainer from aix360.
+    """
+
+    def __init__(
+        self,
+        model: Callable,
+        input_length: int,
+        forecast_lookahead: int,
+        n_variables: int = 1,
+        n_exogs: int = 0,
+        n_perturbations: int = 25,
+        features_to_analyze: Optional[List[str]] = None,
+        perturbers: Optional[List[Union[TSPerturber, dict]]] = None,
+        explanation_window_start: Optional[int] = None,
+        explanation_window_length: int = 10,
+    ):
+        super().__init__(
+            forecaster=model,
+            input_length=input_length,
+            forecast_lookahead=forecast_lookahead,
+            n_variables=n_variables,
+            n_exogs=n_exogs,
+            n_perturbations=n_perturbations,
+            features_to_analyze=features_to_analyze,
+            perturbers=perturbers,
+            explanation_window_start=explanation_window_start,
+            explanation_window_length=explanation_window_length,
+        )
+
+    def explain(self, inputs, outputs=None, **kwargs) -> TSICEResults:
+        """
+        Explain the model's prediction on X.
+        """
+        _explanation = super().explain_instance(inputs, y=outputs, **kwargs)
+        return TSICEResults(_explanation)
diff --git a/tests/extras/tsice/test_tsice.py b/tests/extras/tsice/test_tsice.py
@@ -0,0 +1,171 @@
+""" Tests for :py:mod:`aix360.algorithms.tsice.TSICEExplainer`.
+Original: https://github.com/Trusted-AI/AIX360/blob/master/tests/tsice/test_tsice.py
+"""
+import unittest
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestRegressor
+from aix360.algorithms.tsutils.tsframe import tsFrame
+from aix360.datasets import SunspotDataset
+from aix360.algorithms.tsutils.tsperturbers import BlockBootstrapPerturber
+from trustyai.explainers.extras.tsice import TSICEExplainer
+
+
+# transform a time series dataset into a supervised learning dataset
+# below sample forecaster is from: https://machinelearningmastery.com/random-forest-for-time-series-forecasting/
+class RandomForestUniVariateForecaster:
+    def __init__(self, n_past=4, n_future=1, RFparams={"n_estimators": 250}):
+        self.n_past = n_past
+        self.n_future = n_future
+        self.model = RandomForestRegressor(**RFparams)
+
+    def fit(self, X):
+        train = self._series_to_supervised(X, n_in=self.n_past, n_out=self.n_future)
+        trainX, trainy = train[:, : -self.n_future], train[:, -self.n_future:]
+        self.model = self.model.fit(trainX, trainy)
+        return self
+
+    def _series_to_supervised(self, data, n_in=1, n_out=1, dropnan=True):
+        1 if type(data) is list else data.shape[1]
+        df = pd.DataFrame(data)
+        cols = list()
+
+        # input sequence (t-n, ... t-1)
+        for i in range(n_in, 0, -1):
+            cols.append(df.shift(i))
+        # forecast sequence (t, t+1, ... t+n)
+        for i in range(0, n_out):
+            cols.append(df.shift(-i))
+        # put it all together
+        agg = pd.concat(cols, axis=1)
+        # drop rows with NaN values
+        if dropnan:
+            agg.dropna(inplace=True)
+        return agg.values
+
+    def predict(self, X):
+        row = X[-self.n_past:].flatten()
+        y_pred = self.model.predict(np.asarray([row]))
+        return y_pred
+
+
+class TestTSICEExplainer(unittest.TestCase):
+    def setUp(self):
+        # load data
+        df, schema = SunspotDataset().load_data()
+        ts = tsFrame(
+            df, timestamp_column=schema["timestamp"], columns=schema["targets"]
+        )
+
+        (self.ts_train, self.ts_test) = train_test_split(
+            ts, shuffle=False, stratify=None, test_size=0.15, train_size=None
+        )
+
+    def test_tsice_with_range(self):
+        # load model
+        input_length = 24
+        forecast_horizon = 4
+        forecaster = RandomForestUniVariateForecaster(
+            n_past=input_length, n_future=forecast_horizon
+        )
+
+        forecaster.fit(self.ts_train.iloc[-200:])
+
+        # initialize/fit explainer
+        observation_length = 12
+        explainer = TSICEExplainer(
+            model=forecaster.predict,
+            explanation_window_start=10,
+            explanation_window_length=observation_length,
+            features_to_analyze=[
+                "mean",  # analyze mean metric from recent time series of lengh <observation_length>
+            ],
+            perturbers=[
+                BlockBootstrapPerturber(window_length=5, block_length=5, block_swap=2),
+            ],
+            input_length=input_length,
+            forecast_lookahead=forecast_horizon,
+            n_perturbations=20,
+        )
+
+        # compute explanations
+        explanation = explainer.explain(
+            inputs=self.ts_test.iloc[:80],
+        )
+
+        # validate explanation structure
+        self.assertIn("data_x", explanation.explanation)
+        self.assertIn("feature_names", explanation.explanation)
+        self.assertIn("feature_values", explanation.explanation)
+        self.assertIn("signed_impact", explanation.explanation)
+        self.assertIn("total_impact", explanation.explanation)
+        self.assertIn("current_forecast", explanation.explanation)
+        self.assertIn("current_feature_values", explanation.explanation)
+        self.assertIn("perturbations", explanation.explanation)
+        self.assertIn("forecasts_on_perturbations", explanation.explanation)
+
+    def test_tsice_with_latest(self):
+        # load model
+        input_length = 24
+        forecast_horizon = 4
+        forecaster = RandomForestUniVariateForecaster(
+            n_past=input_length, n_future=forecast_horizon
+        )
+
+        forecaster.fit(self.ts_train.iloc[-200:])
+
+        # initialize/fit explainer
+        observation_length = 12
+        explainer = TSICEExplainer(
+            model=forecaster.predict,
+            explanation_window_start=None,
+            explanation_window_length=observation_length,
+            features_to_analyze=[
+                "mean",  # analyze mean metric from recent time series of lengh <observation_length>
+                "median",  # analyze median metric from recent time series of lengh <observation_length>
+                "std",  # analyze std metric from recent time series of lengh <observation_length>
+                "max_variation",  # analyze max_variation metric from recent time series of lengh <observation_length>
+                "min",
+                "max",
+                "range",
+                "intercept",
+                "trend",
+                "rsquared",
+            ],
+            perturbers=[
+                BlockBootstrapPerturber(window_length=5, block_length=5, block_swap=2),
+                dict(
+                    type="frequency",
+                    window_length=5,
+                    truncate_frequencies=5,
+                    block_length=4,
+                ),
+                dict(type="moving-average", window_length=5, lag=5, block_length=4),
+                dict(type="impute", block_length=4),
+                dict(type="shift", block_length=4),
+            ],
+            input_length=input_length,
+            forecast_lookahead=forecast_horizon,
+            n_perturbations=20,
+        )
+
+        # compute explanations
+        explanation = explainer.explain(
+            inputs=self.ts_test.iloc[:80],
+        )
+
+        # validate explanation structure
+        self.assertIn("data_x", explanation.explanation)
+        self.assertIn("feature_names", explanation.explanation)
+        self.assertIn("feature_values", explanation.explanation)
+        self.assertIn("signed_impact", explanation.explanation)
+        self.assertIn("total_impact", explanation.explanation)
+        self.assertIn("current_forecast", explanation.explanation)
+        self.assertIn("current_feature_values", explanation.explanation)
+        self.assertIn("perturbations", explanation.explanation)
+        self.assertIn("forecasts_on_perturbations", explanation.explanation)
+
+
+if __name__ == "__main__":
+    unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,9 @@ dev = [`
`48`	`48`	`"wheel~=0.38.4",`
`49`	`49`	`"xgboost==1.4.2"`
`50`	`50`	`]`
	`51`	`+extras = [`
	`52`	`+ "aix360 [default,tsice] @ https://github.com/Trusted-AI/AIX360/archive/refs/heads/master.zip"`
	`53`	`+]`
`51`	`54`
`52`	`55`	`[project.urls]`
`53`	`56`	`homepage = "https://github.com/trustyai-explainability/trustyai-explainability-python"`