Fix all tests but xcebra tests

CeliaBenquet · CeliaBenquet · commit 217a8a799085 · 2025-04-24T10:02:20.000+02:00
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
@@ -1053,14 +1053,12 @@ def _partial_fit(
 
         # Save variables of interest as semi-private attributes
         self.model_ = model
-        self.n_features_ = ([
-            loader.dataset.get_input_dimension(session_id)
-            for session_id in range(loader.dataset.num_sessions)
-        ] if is_multisession else loader.dataset.input_dimension)
+
+        self.n_features_ = solver.n_features
+        self.num_sessions_ = solver.num_sessions
         self.solver_ = solver
         self.n_features_in_ = ([model[n].num_input for n in range(len(model))]
                                if is_multisession else model.num_input)
-        self.num_sessions_ = loader.dataset.num_sessions if is_multisession else None
 
         return self
 
@@ -1256,7 +1254,7 @@ def transform(self,
 
         return output.detach().cpu().numpy()
 
-    # Deprecated, kept for testing.
+    #NOTE: Deprecated, as transform is now handled in the solver but kept for testing.
     def transform_deprecated(self,
                              X: Union[npt.NDArray, torch.Tensor],
                              session_id: Optional[int] = None) -> npt.NDArray:
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
@@ -234,6 +234,11 @@ def __getitem__(self, idx):
     index_dataset = IndexDataset(inputs)
     index_dataloader = DataLoader(index_dataset, batch_size=batch_size)
 
+    if len(index_dataloader) < 2:
+        raise ValueError(
+            f"Number of batches must be greater than 1, you can use transform without batching instead, got {len(index_dataloader)}."
+        )
+
     output = []
     for batch_idx, index_batch in enumerate(index_dataloader):
         # NOTE(celia): This is to prevent that adding the offset to the
@@ -449,6 +454,9 @@ def fit(
                 if logdir is not None:
                     self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
+        assert hasattr(self, "n_features")
+        assert hasattr(self, "num_sessions")
+
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
 
@@ -564,10 +572,8 @@ def _select_model(
         """
         raise NotImplementedError
 
-    @property
     def _check_is_fitted(self):
-        #NOTE(celia): instead of hasattr(model, "n_features_"), double check this!
-        if not (hasattr(self, "history") and len(self.history) > 0):
+        if not hasattr(self, "n_features"):
             raise ValueError(
                 f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
                 "appropriate arguments before using this estimator.")
@@ -598,15 +604,6 @@ def transform(self,
         Returns:
             The output embedding.
         """
-        if not self.is_fitted:
-            raise ValueError(
-                f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this estimator.")
-
-        if batch_size is not None and batch_size < 1:
-            raise ValueError(
-                f"Batch size should be at least 1, got {batch_size}")
-
         if isinstance(inputs, list):
             raise ValueError(
                 "Inputs to transform() should be the data for a single session, but received a list."
@@ -623,7 +620,7 @@ def transform(self,
             pad_before_transform = False
 
         model.eval()
-        if batch_size is not None:
+        if batch_size is not None and inputs.shape[0] > int(batch_size * 2):
             output = _batched_transform(
                 model=model,
                 inputs=inputs,
diff --git a/cebra/solver/multi_session.py b/cebra/solver/multi_session.py
@@ -177,7 +177,7 @@ def _check_is_session_id_valid(self, session_id: Optional[int]):
             )
 
     def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
-        """ Select the model based on the input dimension and session ID.
+        """ Select the (trained) model based on the input dimension and session ID.
 
         Args:
             inputs: Data to infer using the selected model.
@@ -189,6 +189,7 @@ def _select_model(self, inputs: torch.Tensor, session_id: Optional[int]):
             The model (first returns) and the offset of the model (second returns).
         """
         self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_fitted()
         self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model[session_id]
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
@@ -103,7 +103,7 @@ def _select_model(
                             List[torch.Tensor]], session_id: Optional[int]
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
-        """ Select the model based on the input dimension and session ID.
+        """ Select the (trained) model based on the input dimension and session ID.
 
         Args:
             inputs: Data to infer using the selected model.
@@ -114,8 +114,9 @@ def _select_model(
         Returns:
             The model (first returns) and the offset of the model (second returns).
         """
-        self._check_is_inputs_valid(inputs, session_id=session_id)
         self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_fitted()
+        self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model
         offset = model.get_offset()
@@ -228,7 +229,7 @@ def _select_model(
                             List[torch.Tensor]], session_id: Optional[int]
     ) -> Tuple[Union[List[torch.nn.Module], torch.nn.Module],
                cebra.data.datatypes.Offset]:
-        """ Select the model based on the input dimension and session ID.
+        """ Select the (trained) model based on the input dimension and session ID.
 
         Args:
             inputs: Data to infer using the selected model.
@@ -239,8 +240,9 @@ def _select_model(
         Returns:
             The model (first returns) and the offset of the model (second returns).
         """
-        self._check_is_inputs_valid(inputs, session_id=session_id)
         self._check_is_session_id_valid(session_id=session_id)
+        self._check_is_fitted()
+        self._check_is_inputs_valid(inputs, session_id=session_id)
 
         model = self.model.module
         if hasattr(model, 'get_offset'):
diff --git a/tests/test_solver.py b/tests/test_solver.py
diff --git a/tests/test_solver_batched.py b/tests/test_solver_batched.py