Add review updates

CeliaBenquet · CeliaBenquet · commit c2544c759478 · 2024-09-19T13:55:19.000+02:00
diff --git a/cebra/data/base.py b/cebra/data/base.py
@@ -207,7 +207,6 @@ def configure_for(self, model: "cebra.models.Model"):
             model: The model to configure the dataset for.
         """
         raise NotImplementedError
-        self.offset = model.get_offset()
 
 
 @dataclasses.dataclass
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
@@ -1202,14 +1202,18 @@ def transform(self,
         sklearn_utils_validation.check_is_fitted(self, "n_features_")
         self.solver_._check_is_session_id_valid(session_id=session_id)
 
-        if torch.is_tensor(X) and X.device.type == "cuda":
+        if torch.is_tensor(X):
             X = X.detach().cpu()
 
         X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
 
         if isinstance(X, np.ndarray):
             X = torch.from_numpy(X)
 
+        if batch_size is not None and batch_size < 1:
+            raise ValueError(
+                f"Batch size should be at least 1, got {batch_size}")
+
         with torch.no_grad():
             output = self.solver_.transform(
                 inputs=X,
@@ -1219,6 +1223,60 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
+    # Deprecated, kept for testing.
+    def transform_deprecated(self,
+                             X: Union[npt.NDArray, torch.Tensor],
+                             session_id: Optional[int] = None) -> npt.NDArray:
+        """Transform an input sequence and return the embedding.
+
+        Args:
+            X: A numpy array or torch tensor of size ``time x dimension``.
+            session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`num_sessions` for
+                multisession, set to ``None`` for single session.
+
+        Returns:
+            A :py:func:`numpy.array` of size ``time x output_dimension``.
+
+        Example:
+
+            >>> import cebra
+            >>> import numpy as np
+            >>> dataset =  np.random.uniform(0, 1, (1000, 30))
+            >>> cebra_model = cebra.CEBRA(max_iterations=10)
+            >>> cebra_model.fit(dataset)
+            CEBRA(max_iterations=10)
+            >>> embedding = cebra_model.transform(dataset)
+
+        """
+
+        sklearn_utils_validation.check_is_fitted(self, "n_features_")
+        model, offset = self._select_model(X, session_id)
+
+        # Input validation
+        X = sklearn_utils.check_input_array(X, min_samples=len(self.offset_))
+        input_dtype = X.dtype
+
+        with torch.no_grad():
+            model.eval()
+
+            if self.pad_before_transform:
+                X = np.pad(X, ((offset.left, offset.right - 1), (0, 0)),
+                           mode="edge")
+            X = torch.from_numpy(X).float().to(self.device_)
+
+            if isinstance(model, cebra.models.ConvolutionalModelMixin):
+                # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
+                X = X.transpose(1, 0).unsqueeze(0)
+                output = model(X).cpu().numpy().squeeze(0).transpose(1, 0)
+            else:
+                # Standard evaluation, (T, C, dt)
+                output = model(X).cpu().numpy()
+
+        if input_dtype == "float64":
+            return output.astype(input_dtype)
+
+        return output
+
     def fit_transform(
         self,
         X: Union[npt.NDArray, torch.Tensor],
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
@@ -81,18 +81,17 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
             f"batch_end_idx ({batch_end_idx}) cannot exceed the length of inputs ({num_samples})."
         )
 
-    batch_size_lenght = batch_end_idx - batch_start_idx
-    if batch_size_lenght <= len(offset):
+    batch_size_length = batch_end_idx - batch_start_idx
+    if batch_size_length <= len(offset):
         raise ValueError(
-            f"The batch has length {batch_size_lenght} which "
+            f"The batch has length {batch_size_length} which "
             f"is smaller or equal than the required offset length {len(offset)}."
             f"Either choose a model with smaller offset or the batch should contain more samples."
         )
 
 
 def _add_batched_zero_padding(batched_data: torch.Tensor,
-                              offset: cebra.data.Offset, 
-                              batch_start_idx: int,
+                              offset: cebra.data.Offset, batch_start_idx: int,
                               batch_end_idx: int,
                               num_samples: int) -> torch.Tensor:
     """Add zero padding to the input data before inference.
@@ -409,6 +408,7 @@ def fit(
         TODO:
             * Refine the API here. Drop the validation entirely, and implement this via a hook?
         """
+        self._set_fitted_params(loader)
         self.to(loader.device)
 
         iterator = self._get_loader(loader)
@@ -436,8 +436,6 @@ def fit(
                     save_hook(num_steps, self)
                 self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
-        self._set_fitted_params(loader)
-
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -553,6 +551,10 @@ def _select_model(
         """
         raise NotImplementedError
 
+    @property
+    def is_fitted(self):
+        return hasattr(self, "n_features")
+
     @torch.no_grad()
     def transform(self,
                   inputs: Union[torch.Tensor, List[torch.Tensor], npt.NDArray],
@@ -579,19 +581,24 @@ def transform(self,
         Returns:
             The output embedding.
         """
+        if not self.is_fitted:
+            raise ValueError(
+                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
+                "appropriate arguments before using this estimator.")
+
+        if batch_size is not None and batch_size < 1:
+            raise ValueError(
+                f"Batch size should be at least 1, got {batch_size}")
+
         if isinstance(inputs, list):
-            raise NotImplementedError(
-                "Inputs to transform() should be the data for a single session."
+            raise ValueError(
+                "Inputs to transform() should be the data for a single session, but received a list."
             )
 
         elif not isinstance(inputs, torch.Tensor):
             raise ValueError(
                 f"Inputs should be a torch.Tensor, not {type(inputs)}.")
 
-        if not hasattr(self, "n_features"):
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
         model, offset = self._select_model(inputs, session_id)
 
         if len(offset) < 2 and pad_before_transform:
@@ -647,7 +654,7 @@ def load(self, logdir, filename="checkpoint.pth"):
         checkpoint = torch.load(savepath, map_location=self.device)
         self.load_state_dict(checkpoint, strict=True)
 
-    def save(self, logdir, filename="checkpoint.pth"):
+    def save(self, logdir, filename="checkpoint_last.pth"):
         """Save the model and optimizer params.
 
         Args:
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
@@ -44,9 +44,9 @@ class MultiSessionSolver(abc_.Solver):
 
     def parameters(self, session_id: Optional[int] = None):
         """Iterate over all parameters."""
-        self._check_is_session_id_valid(session_id=session_id)
-        for parameter in self.model[session_id].parameters():
-            yield parameter
+        if session_id is not None:
+            for parameter in self.model[session_id].parameters():
+                yield parameter
 
         for parameter in self.criterion.parameters():
             yield parameter
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
diff --git a/tests/test_solver.py b/tests/test_solver.py