change update diag interface and update readme

Xuzzo · Xuzzo · commit 460ce249bcc6 · 2024-01-04T16:25:39.000+01:00
diff --git a/README.md b/README.md
@@ -318,7 +318,8 @@ We currently implement the following papers:
 - Schioppa, Andrea, Polina Zablotskaia, David Vilar, and Artem Sokolov. 
   [Scaling Up Influence Functions](http://arxiv.org/abs/2112.03052). 
   In Proceedings of the AAAI-22. arXiv, 2021.
-
+- James Martens, Roger Grosse, [Optimizing Neural Networks with Kronecker-factored Approximate Curvature](https://arxiv.org/abs/1503.05671), International Conference on Machine Learning (ICML), 2015.
+- George, Thomas, César Laurent, Xavier Bouthillier, Nicolas Ballas, Pascal Vincent, [Fast Approximate Natural Gradient Descent in a Kronecker-factored Eigenbasis](https://arxiv.org/abs/1806.03884), Advances in Neural Information Processing Systems 31,2018.
   
 # License
 
diff --git a/docs/influence/influence_function_model.md b/docs/influence/influence_function_model.md
@@ -115,14 +115,14 @@ if_model = EkfacInfluence(
 ```
 Upon initialization, the K-FAC method will parse the model and extract which layers require grad and which do not. Then it will only calculate the influence scores for the layers that require grad. The current implementation of the K-FAC method is only available for linear layers, and therefore if the model contains non-linear layers that require gradient the K-FAC method will raise a NotImplementedLayerRepresentationException.
 
-A further improvement of the K-FAC method is the Eigenvalue Corrected K-FAC (EKFAC) method [@george2018fast], which allows to further re-fit the eigenvalues of the Hessian, thus providing a more accurate approximation. On top of the K-FAC method, the EKFAC method is implemented by simply calling the update_diag method from [EkfacInfluence](pydvl/influence/torch/influence_function_model.py). The following code snippet shows how to use the EKFAC method to calculate the influence function of a model. 
+A further improvement of the K-FAC method is the Eigenvalue Corrected K-FAC (EKFAC) method [@george2018fast], which allows to further re-fit the eigenvalues of the Hessian, thus providing a more accurate approximation. On top of the K-FAC method, the EKFAC method is implemented by setting `update_diagonal=True` when initialising [EkfacInfluence](pydvl/influence/torch/influence_function_model.py). The following code snippet shows how to use the EKFAC method to calculate the influence function of a model. 
 
 ```python
 from pydvl.influence.torch import EkfacInfluence
 if_model = EkfacInfluence(
     model,
+    update_diagonal=True,
     hessian_regularization=0.0,
 )
 if_model.fit(train_loader)
-if_model.update_diag(train_loader)
 ```
diff --git a/notebooks/influence_wine.ipynb b/notebooks/influence_wine.ipynb
@@ -824,10 +824,10 @@
    "source": [
     "ekfac_influence_model = EkfacInfluence(\n",
     "    nn_model,\n",
+    "    update_diagonal=True,\n",
     "    hessian_regularization=0.1,\n",
     ")\n",
     "ekfac_influence_model = ekfac_influence_model.fit(training_data_loader)\n",
-    "ekfac_influence_model = ekfac_influence_model.update_diag(training_data_loader)\n",
     "ekfac_train_influences = ekfac_influence_model.influences(\n",
     "    *test_data, *training_data, mode=\"up\"\n",
     ")\n",
diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
@@ -896,6 +896,10 @@ class EkfacInfluence(TorchInfluenceFunctionModel):
 
     Args:
         model: Instance of [torch.nn.Module][torch.nn.Module].
+        update_diagonal: If True, the diagonal values in the ekfac representation are
+            refitted from the training data after calculating the KFAC blocks.
+            This provides a more accurate approximation of the Hessian, but it is
+            computationally more expensive.
         hessian_regularization: Regularization of the hessian.
     """
 
@@ -904,11 +908,13 @@ class EkfacInfluence(TorchInfluenceFunctionModel):
     def __init__(
         self,
         model: nn.Module,
+        update_diagonal: bool = False,
         hessian_regularization: float = 0.0,
     ):
 
         super().__init__(model, torch.nn.functional.cross_entropy)
         self.hessian_regularization = hessian_regularization
+        self.update_diagonal = update_diagonal
         self.active_layers = self._parse_active_layers()
 
     @property
@@ -1056,6 +1062,8 @@ def fit(self, data: DataLoader) -> EkfacInfluence:
             layers_evect_g.values(),
             layers_diags.values(),
         )
+        if self.update_diagonal:
+            self._update_diag(data)
         return self
 
     @staticmethod
@@ -1114,7 +1122,7 @@ def grad_hook(m, m_grad, m_out):
             )
         return input_hook, grad_hook
 
-    def update_diag(
+    def _update_diag(
         self,
         data: DataLoader,
     ) -> EkfacInfluence:
@@ -1125,8 +1133,7 @@ def update_diag(
         """
         if not self.is_fitted:
             raise ValueError(
-                "EkfacInfluence must be fitted before calling update_diag on it. "
-                "Please call fit first."
+                "EkfacInfluence must be fitted before updating the diagonal."
             )
         diags = {}
         last_x_kfe: Dict[str, torch.Tensor] = {}
diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py
@@ -548,6 +548,7 @@ def test_influences_ekfac(
 
     ekfac_influence = EkfacInfluence(
         model,
+        update_diagonal=True,
         hessian_regularization=test_case.hessian_reg,
     )
 
@@ -564,7 +565,6 @@ def test_influences_ekfac(
             ekfac_influence.fit(train_dataloader)
     elif isinstance(loss, nn.CrossEntropyLoss):
         ekfac_influence = ekfac_influence.fit(train_dataloader)
-        ekfac_influence = ekfac_influence.update_diag(train_dataloader)
         ekfac_influence_values = ekfac_influence.influences(
             x_test, y_test, x_train, y_train, mode=test_case.mode
         ).numpy()