Add some docstrings and typings and clean unnecessary changes

CeliaBenquet · CeliaBenquet · commit 65fc45535cfe · 2025-04-24T10:49:07.000+02:00
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
@@ -371,11 +371,6 @@ def __post_init__(self):
         self._init_behavior_distribution()
         self._init_time_distribution()
 
-        if self.conditional != "time_delta":
-            raise NotImplementedError(
-                "Hybrid training is currently only implemented using the ``time_delta`` "
-                "continual distribution.")
-
     def _init_behavior_distribution(self):
         if self.conditional == "time":
             self.behavior_distribution = cebra.distributions.TimeContrastive(
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
@@ -1227,7 +1227,7 @@ def transform(self,
             >>> cebra_model = cebra.CEBRA(max_iterations=10)
             >>> cebra_model.fit(dataset)
             CEBRA(max_iterations=10)
-            >>> embedding = cebra_model.transform(dataset)
+            >>> embedding = cebra_model.transform(dataset, batch_size=200)
 
         """
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
@@ -1254,7 +1254,7 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    #NOTE: Deprecated, as transform is now handled in the solver but kept for testing.
+    #NOTE: Deprecated: transform is now handled in the solver but kept for testing.
     def transform_deprecated(self,
                              X: Union[npt.NDArray, torch.Tensor],
                              session_id: Optional[int] = None) -> npt.NDArray:
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
@@ -54,9 +54,10 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
                    offset: cebra.data.Offset, num_samples: int):
     """Check that indexes in a batch are in a correct range.
 
-    First and last index must be positive integers, smaller than the total length of inputs
-    in the dataset, the first index must be smaller than the last and the batch size cannot
-    be smaller than the offset of the model.
+    First and last index must be positive integers, smaller than
+    the total length of inputs in the dataset, the first index
+    must be smaller than the last and the batch size cannot be
+    smaller than the offset of the model.
 
     Args:
         batch_start_idx: Index of the first sample in the batch.
@@ -380,6 +381,16 @@ def num_parameters(self) -> int:
 
     @abc.abstractmethod
     def parameters(self, session_id: Optional[int] = None):
+        """Iterate over all parameters of the model.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Yields:
+            The parameters of the model.
+        """
         raise NotImplementedError
 
     def _get_loader(self, loader):
@@ -573,6 +584,13 @@ def _select_model(
         raise NotImplementedError
 
     def _check_is_fitted(self):
+        """Check if the model is fitted.
+
+        If the model is fitted, the solver should have a `n_features` attribute.
+
+        Raises:
+            ValueError: If the model is not fitted.
+        """
         if not hasattr(self, "n_features"):
             raise ValueError(
                 f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
@@ -581,7 +599,7 @@ def _check_is_fitted(self):
     @torch.no_grad()
     def transform(self,
                   inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
-                  pad_before_transform: bool = True,
+                  pad_before_transform: Optional[bool] = True,
                   session_id: Optional[int] = None,
                   batch_size: Optional[int] = None) -> torch.Tensor:
         """Compute the embedding.
@@ -591,11 +609,12 @@ def transform(self,
 
         Args:
             inputs: The input signal
-            pad_before_transform: If ``False``, no padding is applied to the input sequence.
-                and the output sequence will be smaller than the input sequence due to the
-                receptive field of the model. If the input sequence is ``n`` steps long,
-                and a model with receptive field ``m`` is used, the output sequence would
-                only be ``n-m+1`` steps long.
+            pad_before_transform: If ``False``, no padding is applied to the input
+                sequence and the output sequence will be smaller than the input
+                sequence due to the receptive field of the model. If the
+                input sequence is ``n`` steps long, and a model with receptive
+                field ``m`` is used, the output sequence would  only be
+                ``n-m+1`` steps long.
             session_id: The session ID, an :py:class:`int` between 0 and
                 the number of sessions -1 for multisession, and set to
                 ``None`` for single session.
@@ -640,8 +659,6 @@ def transform(self,
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """Given a batch of input examples, return the model outputs.
 
-        TODO: make this a public function?
-
         Args:
             batch: The input data, not necessarily aligned across the batch
                 dimension. This means that ``batch.index`` specifies the map
@@ -654,12 +671,12 @@ def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
         """
         raise NotImplementedError
 
-    def load(self, logdir, filename="checkpoint.pth"):
+    def load(self, logdir: str, filename: str = "checkpoint.pth"):
         """Load the experiment from its checkpoint file.
 
         Args:
-            logdir: Log directory.
-            filename (str): Checkpoint name for loading the experiment.
+            logdir: Logging directory.
+            filename: Checkpoint name for loading the experiment.
         """
 
         savepath = os.path.join(logdir, filename)
@@ -674,7 +691,7 @@ def load(self, logdir, filename="checkpoint.pth"):
             session_n_features for session_n_features in n_features
         ] if isinstance(n_features, list) else n_features)
 
-    def save(self, logdir, filename="checkpoint_last.pth"):
+    def save(self, logdir: str, filename: str = "checkpoint_last.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
@@ -40,11 +40,21 @@ class MultiSessionSolver(abc_.Solver):
     _variant_name = "multi-session"
 
     def parameters(self, session_id: Optional[int] = None):
-        """Iterate over all parameters."""
+        """Iterate over all parameters.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Yields:
+            The parameters of the model.
+        """
         if session_id is not None:
             for parameter in self.model[session_id].parameters():
                 yield parameter
 
+        # If session_id is None, it can still iterate over the criterion
         for parameter in self.criterion.parameters():
             yield parameter
 
@@ -161,12 +171,12 @@ def _check_is_inputs_valid(self, inputs: torch.Tensor,
     def _check_is_session_id_valid(self, session_id: Optional[int]):
         """Check that the session ID provided is valid for the solver instance.
 
-        The session ID must be non-null and between 0 and the number session in the dataset.
+        The session ID must be non-null and between 0 and the number session
+        in the dataset.
 
         Args:
             session_id: The session ID to check.
         """
-
         if session_id is None:
             raise RuntimeError(
                 "No session_id provided: multisession model requires a session_id to choose the model corresponding to your data shape."
@@ -233,7 +243,20 @@ class MultiSessionAuxVariableSolver(MultiSessionSolver):
     _variant_name = "multi-session-aux"
     reference_model: torch.nn.Module
 
-    def _inference(self, batches):
+    def _inference(self, batches: List[cebra.data.Batch]) -> cebra.data.Batch:
+        """Given batches of input examples, computes the feature representations/embeddings.
+
+        Args:
+            batches: A list of input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+
+        """
         refs = []
         poss = []
         negs = []
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
@@ -46,8 +46,18 @@ class SingleSessionSolver(abc_.Solver):
     _variant_name = "single-session"
 
     def parameters(self, session_id: Optional[int] = None):
-        """Iterate over all parameters."""
-        self._check_is_session_id_valid(session_id=session_id)
+        """Iterate over all parameters.
+
+        Args:
+            session_id: The session ID, an :py:class:`int` between 0 and
+                the number of sessions -1 for multisession, and set to
+                ``None`` for single session.
+
+        Yields:
+            The parameters of the model.
+        """
+        # If session_id is invalid, it doesn't matter, since we are
+        # using a single session solver.
         for parameter in self.model.parameters():
             yield parameter
 
@@ -196,7 +206,22 @@ def __post_init__(self):
             self.reference_model = copy.deepcopy(self.model)
             self.reference_model.to(self.model.device)
 
-    def _inference(self, batch):
+    def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        """Given a batch of input examples, computes the feature representation/embedding.
+
+        The reference samples are processed with a different model than the
+        positive and negative samples.
+
+        Args:
+            batch: The input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+        """
         batch.to(self.device)
         ref = self.reference_model(batch.reference)
         pos = self.model(batch.positive)
@@ -212,6 +237,21 @@ class SingleSessionHybridSolver(abc_.MultiobjectiveSolver, SingleSessionSolver):
     _variant_name = "single-session-hybrid"
 
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        """Given a batch of input examples, computes the feature representation/embedding.
+
+        The samples are processed with both a time-contrastive module and a
+        behavior-contrastive module, that are part of the same model.
+
+        Args:
+            batch: The input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+        """
         batch.to(self.device)
         behavior_ref = self.model(batch.reference)[0]
         behavior_pos = self.model(batch.positive[:int(len(batch.positive) //
@@ -305,6 +345,18 @@ def get_embedding(self, data):
             return self.model(data[0].T)
 
     def _inference(self, batch: cebra.data.Batch) -> cebra.data.Batch:
+        """Given a batch of input examples, computes the feature representation/embedding.
+
+        Args:
+            batch: The input data, not necessarily aligned across the batch
+                dimension. This means that ``batch.index`` specifies the map
+                between reference/positive samples, if not equal ``None``.
+
+        Returns:
+            Processed batch of data. While the input data might not be aligned
+            across the sample dimensions, the output data should be aligned and
+            ``batch.index`` should be set to ``None``.
+        """
         outputs = self.get_embedding(self.neural)
         idc = batch.positive - self.offset.left >= len(outputs)
         batch.positive[idc] = batch.reference[idc]