mind-inria
diff --git a/‎src/hidimstat/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎src/hidimstat/__init__.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/hidimstat/base_perturbation.py‎
Lines changed: 20 additions & 122 deletions b/‎src/hidimstat/base_perturbation.py‎
Lines changed: 20 additions & 122 deletions
diff --git a/‎src/hidimstat/base_variable_importance.py‎
Lines changed: 146 additions & 0 deletions b/‎src/hidimstat/base_variable_importance.py‎
Lines changed: 146 additions & 0 deletions
@@ -1,4 +1,7 @@
-from .base_variable_importance import BaseVariableImportance
+from .base_variable_importance import (
+    BaseVariableImportance,
+    VariableImportanceFeatureGroup,
+)
 from .base_perturbation import BasePerturbation
 from .ensemble_clustered_inference import (
     clustered_inference,
 
@@ -1,16 +1,16 @@
 import numpy as np
-import pandas as pd
 from joblib import Parallel, delayed
 from sklearn.base import check_is_fitted
 from sklearn.metrics import root_mean_squared_error
-import warnings
 
 from hidimstat._utils.utils import _check_vim_predict_method
-from hidimstat._utils.exception import InternalError
-from hidimstat.base_variable_importance import BaseVariableImportance
+from hidimstat.base_variable_importance import (
+    BaseVariableImportance,
+    VariableImportanceGroup,
+)
 
 
-class BasePerturbation(BaseVariableImportance):
+class BasePerturbation(BaseVariableImportance, VariableImportanceGroup):
     def __init__(
         self,
         estimator,
@@ -43,6 +43,7 @@ def __init__(
             The number of parallel jobs to run. Parallelization is done over the
             variables or groups of variables.
         """
+        super().__init__()
         check_is_fitted(estimator)
         assert n_permutations > 0, "n_permutations must be positive"
         self.estimator = estimator
@@ -51,45 +52,6 @@ def __init__(
         self.method = method
         self.n_jobs = n_jobs
         self.n_permutations = n_permutations
-        self.n_groups = None
-
-    def fit(self, X, y=None, groups=None):
-        """Base fit method for perturbation-based methods. Identifies the groups.
-
-        Parameters
-        ----------
-        X: array-like of shape (n_samples, n_features)
-            The input samples.
-        y: array-like of shape (n_samples,)
-            Not used, only present for consistency with the sklearn API.
-        groups: dict, optional
-            A dictionary where the keys are the group names and the values are the
-            list of column names corresponding to each group. If None, the groups are
-            identified based on the columns of X.
-        """
-        if groups is None:
-            self.n_groups = X.shape[1]
-            self.groups = {j: [j] for j in range(self.n_groups)}
-            self._groups_ids = np.array(list(self.groups.values()), dtype=int)
-        elif isinstance(groups, dict):
-            self.n_groups = len(groups)
-            self.groups = groups
-            if isinstance(X, pd.DataFrame):
-                self._groups_ids = []
-                for group_key in self.groups.keys():
-                    self._groups_ids.append(
-                        [
-                            i
-                            for i, col in enumerate(X.columns)
-                            if col in self.groups[group_key]
-                        ]
-                    )
-            else:
-                self._groups_ids = [
-                    np.array(ids, dtype=int) for ids in list(self.groups.values())
-                ]
-        else:
-            raise ValueError("groups needs to be a dictionnary")
 
     def predict(self, X):
         """
@@ -111,8 +73,12 @@ def predict(self, X):
 
         # Parallelize the computation of the importance scores for each group
         out_list = Parallel(n_jobs=self.n_jobs)(
-            delayed(self._joblib_predict_one_group)(X_, group_id, group_key)
-            for group_id, group_key in enumerate(self.groups.keys())
+            delayed(self._joblib_predict_one_features_group)(
+                X_, features_group_id, features_group_key
+            )
+            for features_group_id, features_group_key in enumerate(
+                self.features_groups.keys()
+            )
         )
         return np.stack(out_list, axis=0)
 
@@ -160,77 +126,7 @@ def importance(self, X, y):
         )
         return out_dict
 
-    def _check_fit(self, X):
-        """
-        Check if the perturbation method has been properly fitted.
-
-        This method verifies that the perturbation method has been fitted by checking
-        if required attributes are set and if the number of features matches
-        the grouped variables.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            Input data to validate against the fitted model.
-
-        Raises
-        ------
-        ValueError
-            If the method has not been fitted (i.e., if n_groups, groups,
-            or _groups_ids attributes are missing).
-        AssertionError
-            If the number of features in X does not match the total number
-            of features in the grouped variables.
-        """
-        if (
-            self.n_groups is None
-            or not hasattr(self, "groups")
-            or not hasattr(self, "_groups_ids")
-        ):
-            raise ValueError(
-                "The class is not fitted. The fit method must be called"
-                " to set variable groups. If no grouping is needed,"
-                " call fit with groups=None"
-            )
-        if isinstance(X, pd.DataFrame):
-            names = list(X.columns)
-        elif isinstance(X, np.ndarray) and X.dtype.names is not None:
-            names = X.dtype.names
-            # transform Structured Array in pandas array for a better manipulation
-            X = pd.DataFrame(X)
-        elif isinstance(X, np.ndarray):
-            names = None
-        else:
-            raise ValueError("X should be a pandas dataframe or a numpy array.")
-        number_columns = X.shape[1]
-        for index_variables in self.groups.values():
-            if type(index_variables[0]) is int or np.issubdtype(
-                type(index_variables[0]), int
-            ):
-                assert np.all(
-                    np.array(index_variables, dtype=int) < number_columns
-                ), "X does not correspond to the fitting data."
-            elif type(index_variables[0]) is str or np.issubdtype(
-                type(index_variables[0]), str
-            ):
-                assert np.all(
-                    [name in names for name in index_variables]
-                ), f"The array is missing at least one of the following columns {index_variables}."
-            else:
-                raise InternalError(
-                    "A problem with indexing has happened during the fit."
-                )
-        number_unique_feature_in_groups = np.unique(
-            np.concatenate([values for values in self.groups.values()])
-        ).shape[0]
-        if X.shape[1] != number_unique_feature_in_groups:
-            warnings.warn(
-                f"The number of features in X: {X.shape[1]} differs from the"
-                " number of features for which importance is computed: "
-                f"{number_unique_feature_in_groups}"
-            )
-
-    def _joblib_predict_one_group(self, X, group_id, group_key):
+    def _joblib_predict_one_group(self, X, features_group_id, features_group_key):
         """
         Compute the predictions after perturbation of the data for a given
         group of variables. This function is parallelized.
@@ -244,13 +140,15 @@ def _joblib_predict_one_group(self, X, group_id, group_key):
         group_key: str, int
             The key of the group of variables. (parameter use for debugging)
         """
-        group_ids = self._groups_ids[group_id]
-        non_group_ids = np.delete(np.arange(X.shape[1]), group_ids)
+        features_group_ids = self._groups_ids[features_group_id]
+        non_features_group_ids = np.delete(np.arange(X.shape[1]), features_group_ids)
         # Create an array X_perm_j of shape (n_permutations, n_samples, n_features)
         # where the j-th group of covariates is permuted
         X_perm = np.empty((self.n_permutations, X.shape[0], X.shape[1]))
-        X_perm[:, :, non_group_ids] = np.delete(X, group_ids, axis=1)
-        X_perm[:, :, group_ids] = self._permutation(X, group_id=group_id)
+        X_perm[:, :, non_features_group_ids] = np.delete(X, features_group_ids, axis=1)
+        X_perm[:, :, features_group_ids] = self._permutation(
+            X, features_group_id=features_group_id
+        )
         # Reshape X_perm to allow for batch prediction
         X_perm_batch = X_perm.reshape(-1, X.shape[1])
         y_pred_perm = getattr(self.estimator, self.method)(X_perm_batch)
@@ -264,6 +162,6 @@ def _joblib_predict_one_group(self, X, group_id, group_key):
             )
         return y_pred_perm
 
-    def _permutation(self, X, group_id):
+    def _permutation(self, X, features_group_id):
         """Method for creating the permuted data for the j-th group of covariates."""
         raise NotImplementedError
@@ -2,6 +2,9 @@
 
 from sklearn.base import BaseEstimator
 import numpy as np
+import pandas as pd
+
+from hidimstat._utils.exception import InternalError
 
 
 class BaseVariableImportance(BaseEstimator):
@@ -131,3 +134,146 @@ def _check_importance(self):
             raise ValueError(
                 "The importances need to be called before calling this method"
             )
+
+
+class VariableImportanceFeatureGroup:
+    """
+    Base class for variable importance methods using feature groups.
+    This class extends `BaseVariableImportance` to support variable importance
+    methods that operate on groups of features, enabling group-wise selection
+    and importance evaluation.
+
+    Attributes
+    ----------
+    n_features_groups : int, default=None
+        The number of feature groups.
+    features_groups : dict, default=None
+        A dictionary mapping group names or indices to lists of feature indices or names.
+    _features_groups_ids : array-like of shape (n_features_groups,), default=None
+        Internal representation of group indices for each group.
+
+    Methods
+    -------
+    fit(X, y=None, groups=None)
+        Identifies and stores feature groups based on input or provided grouping.
+    _check_fit(X)
+        Checks if the class has been fitted and validates group-feature correspondence.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.n_features_groups = None
+        self.features_groups = None
+        self._features_groups_ids = None
+
+    def fit(self, X, y=None, features_groups=None):
+        """
+        Base fit method for perturbation-based methods. Identifies the groups.
+
+        Parameters
+        ----------
+        X: array-like of shape (n_samples, n_features)
+            The input samples.
+        y: array-like of shape (n_samples,)
+            Not used, only present for consistency with the sklearn API.
+        features_groups: dict, optional
+            A dictionary where the keys are the group names and the values are the
+            list of column names corresponding to each group. If None, the groups are
+            identified based on the columns of X.
+        """
+        if features_groups is None:
+            self.n_features_groups = X.shape[1]
+            self.features_groups = {j: [j] for j in range(self.n_features_groups)}
+            self._features_groups_ids = np.array(
+                list(self.features_groups.values()), dtype=int
+            )
+        elif isinstance(features_groups, dict):
+            self.n_features_groups = len(features_groups)
+            self.features_groups = features_groups
+            if isinstance(X, pd.DataFrame):
+                self._features_groups_ids = []
+                for features_group_key in self.features_groups.keys():
+                    self._features_groups_ids.append(
+                        [
+                            i
+                            for i, col in enumerate(X.columns)
+                            if col in self.features_groups[features_group_key]
+                        ]
+                    )
+            else:
+                self._features_groups_ids = [
+                    np.array(ids, dtype=int)
+                    for ids in list(self.features_groups.values())
+                ]
+        else:
+            raise ValueError("features_groups needs to be a dictionnary")
+
+    def _check_fit(self, X):
+        """
+        Check if the perturbation method has been properly fitted.
+
+        This method verifies that the perturbation method has been fitted by checking
+        if required attributes are set and if the number of features matches
+        the feature grouped variables.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data to validate against the fitted model.
+
+        Raises
+        ------
+        ValueError
+            If the method has not been fitted (i.e., if n_features_groups, features_groups,
+            or _features_groups_ids attributes are missing).
+        AssertionError
+            If the number of features in X does not match the total number
+            of features in the grouped variables.
+        """
+        if (
+            self.n_features_groups is None
+            or not hasattr(self, "features_groups")
+            or not hasattr(self, "_features_groups_ids")
+        ):
+            raise ValueError(
+                "The class is not fitted. The fit method must be called"
+                " to set variable features_groups. If no grouping is needed,"
+                " call fit with features_groups=None"
+            )
+        if isinstance(X, pd.DataFrame):
+            names = list(X.columns)
+        elif isinstance(X, np.ndarray) and X.dtype.names is not None:
+            names = X.dtype.names
+            # transform Structured Array in pandas array for a better manipulation
+            X = pd.DataFrame(X)
+        elif isinstance(X, np.ndarray):
+            names = None
+        else:
+            raise ValueError("X should be a pandas dataframe or a numpy array.")
+        number_columns = X.shape[1]
+        for index_variables in self.features_groups.values():
+            if type(index_variables[0]) is int or np.issubdtype(
+                type(index_variables[0]), int
+            ):
+                assert np.all(
+                    np.array(index_variables, dtype=int) < number_columns
+                ), "X does not correspond to the fitting data."
+            elif type(index_variables[0]) is str or np.issubdtype(
+                type(index_variables[0]), str
+            ):
+                assert np.all(
+                    [name in names for name in index_variables]
+                ), f"The array is missing at least one of the following columns {index_variables}."
+            else:
+                raise InternalError(
+                    "A problem with indexing has happened during the fit."
+                )
+        number_unique_feature_in_groups = np.unique(
+            np.concatenate([values for values in self.features_groups.values()])
+        ).shape[0]
+        if X.shape[1] != number_unique_feature_in_groups:
+            warnings.warn(
+                f"The number of features in X: {X.shape[1]} differs from the"
+                " number of features for which importance is computed: "
+                f"{number_unique_feature_in_groups}"
+            )