Add tests to solver

CeliaBenquet · stes · commit 83c16691d081 · 2024-08-23T13:54:57.000+02:00
diff --git a/cebra/data/base.py b/cebra/data/base.py
@@ -196,6 +196,7 @@ def load_batch(self, index: BatchIndex) -> Batch:
         """
         raise NotImplementedError()
 
+    @abc.abstractmethod
     def configure_for(self, model: "cebra.models.Model"):
         """Configure the dataset offset for the provided model.
 
@@ -205,6 +206,7 @@ def configure_for(self, model: "cebra.models.Model"):
         Args:
             model: The model to configure the dataset for.
         """
+        raise NotImplementedError
         self.offset = model.get_offset()
 
 
@@ -230,6 +232,8 @@ class Loader(abc.ABC, cebra.io.HasDevice):
         doc="""A dataset instance specifying a ``__getitem__`` function.""",
     )
 
+    time_offset: int = dataclasses.field(default=10)
+
     num_steps: int = dataclasses.field(
         default=None,
         doc=
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
@@ -111,6 +111,18 @@ def configure_for(self, model):
         for session in self.iter_sessions():
             session.configure_for(model)
 
+    def configure_for(self, model: "cebra.models.Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        for i, session in enumerate(self.iter_sessions()):
+            session.configure_for(model[i])
+
 
 @dataclasses.dataclass
 class MultiSessionLoader(cebra_data.Loader):
@@ -121,8 +133,6 @@ class MultiSessionLoader(cebra_data.Loader):
     dimension, it is better to use a :py:class:`cebra.data.single_session.MixedDataLoader`.
     """
 
-    time_offset: int = dataclasses.field(default=10)
-
     def __post_init__(self):
         super().__post_init__()
         self.sampler = cebra_distr.MultisessionSampler(self.dataset,
@@ -151,7 +161,6 @@ class ContinuousMultiSessionDataLoader(MultiSessionLoader):
     """Contrastive learning conditioned on a continuous behavior variable."""
 
     conditional: str = "time_delta"
-    time_offset: int = dataclasses.field(default=10)
 
     @property
     def index(self):
diff --git a/cebra/data/single_session.py b/cebra/data/single_session.py
@@ -72,6 +72,17 @@ def load_batch(self, index: BatchIndex) -> Batch:
             reference=self[index.reference],
         )
 
+    def configure_for(self, model: "cebra.models.Model"):
+        """Configure the dataset offset for the provided model.
+
+        Call this function before indexing the dataset. This sets the
+        :py:attr:`offset` attribute of the dataset.
+
+        Args:
+            model: The model to configure the dataset for.
+        """
+        self.offset = model.get_offset()
+
 
 @dataclasses.dataclass
 class DiscreteDataLoader(cebra_data.Loader):
@@ -192,7 +203,6 @@ class ContinuousDataLoader(cebra_data.Loader):
     and become equivalent to time contrastive learning.
     """,
     )
-    time_offset: int = dataclasses.field(default=10)
     delta: float = dataclasses.field(default=0.1)
 
     def __post_init__(self):
@@ -274,7 +284,6 @@ class MixedDataLoader(cebra_data.Loader):
     """
 
     conditional: str = dataclasses.field(default="time_delta")
-    time_offset: int = dataclasses.field(default=10)
 
     @property
     def dindex(self):
@@ -337,7 +346,6 @@ class HybridDataLoader(cebra_data.Loader):
     """
 
     conditional: str = dataclasses.field(default="time_delta")
-    time_offset: int = dataclasses.field(default=10)
     delta: float = dataclasses.field(default=0.1)
 
     @property
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
@@ -776,8 +776,6 @@ def _configure_for_all(
                             f"receptive fields/offsets larger than 1 via the sklearn API. "
                             f"Please use a different model, or revert to the pytorch "
                             f"API for training.")
-
-                d.configure_for(model[n])
         else:
             if not isinstance(model, cebra.models.ConvolutionalModelMixin):
                 if len(model.get_offset()) > 1:
@@ -787,7 +785,7 @@ def _configure_for_all(
                         f"Please use a different model, or revert to the pytorch "
                         f"API for training.")
 
-            dataset.configure_for(model)
+        dataset.configure_for(model)
 
     def _select_model(self, X: Union[npt.NDArray, torch.Tensor],
                       session_id: int):
diff --git a/cebra/solver/base.py b/cebra/solver/base.py
@@ -37,6 +37,7 @@
 
 import literate_dataclasses as dataclasses
 import numpy as np
+import numpy.typing as npt
 import torch
 import torch.nn.functional as F
 import tqdm
@@ -89,32 +90,6 @@ def _check_indices(batch_start_idx: int, batch_end_idx: int,
         )
 
 
-def _get_batch(inputs: torch.Tensor, offset: cebra.data.Offset,
-               batch_start_idx: int, batch_end_idx: int) -> torch.Tensor:
-    """Get a batch of samples between the `batch_start_idx` and `batch_end_idx`.
-
-    Args:
-        inputs: Input data.
-        offset: Model offset.
-        batch_start_idx: Index of the first sample in the batch.
-        batch_end_idx: Index of the first sample in the batch.
-
-    Returns:
-        The batch.
-    """
-
-    if batch_start_idx == 0:  # First batch
-        indices = batch_start_idx, (batch_end_idx + offset.right - 1)
-    elif batch_end_idx == len(inputs):  # Last batch
-        indices = (batch_start_idx - offset.left), batch_end_idx
-    else:
-        indices = batch_start_idx - offset.left, batch_end_idx + offset.right - 1
-
-    _check_indices(indices[0], indices[1], offset, len(inputs))
-    batched_data = inputs[slice(*indices)]
-    return batched_data
-
-
 def _add_batched_zero_padding(batched_data: torch.Tensor,
                               offset: cebra.data.Offset, batch_start_idx: int,
                               batch_end_idx: int,
@@ -145,6 +120,45 @@ def _add_batched_zero_padding(batched_data: torch.Tensor,
     return batched_data
 
 
+def _get_batch(inputs: torch.Tensor, offset: Optional[cebra.data.Offset],
+               batch_start_idx: int, batch_end_idx: int,
+               pad_before_transform: bool) -> torch.Tensor:
+    """Get a batch of samples between the `batch_start_idx` and `batch_end_idx`.
+
+    Args:
+        inputs: Input data.
+        offset: Model offset.
+        batch_start_idx: Index of the first sample in the batch.
+        batch_end_idx: Index of the first sample in the batch.
+        pad_before_transform: If True zero-pad the batched data.
+
+    Returns:
+        The batch.
+    """
+    if offset is None:
+        raise ValueError(f"offset cannot be null.")
+
+    if batch_start_idx == 0:  # First batch
+        indices = batch_start_idx, (batch_end_idx + offset.right - 1)
+    elif batch_end_idx == len(inputs):  # Last batch
+        indices = (batch_start_idx - offset.left), batch_end_idx
+    else:
+        indices = batch_start_idx - offset.left, batch_end_idx + offset.right - 1
+
+    _check_indices(indices[0], indices[1], offset, len(inputs))
+    batched_data = inputs[slice(*indices)]
+
+    if pad_before_transform:
+        batched_data = _add_batched_zero_padding(
+            batched_data=batched_data,
+            offset=offset,
+            batch_start_idx=batch_start_idx,
+            batch_end_idx=batch_end_idx,
+            num_samples=len(inputs))
+
+    return batched_data
+
+
 def _inference_transform(model: cebra.models.Model,
                          inputs: torch.Tensor) -> torch.Tensor:
     """Compute the embedding on the inputs using the model provided.
@@ -156,9 +170,7 @@ def _inference_transform(model: cebra.models.Model,
     Returns:
         The embedding.
     """
-    #TODO(rodrigo): I am not sure what is the best way with dealing with the types and
-    # device when using batched inference. This works for now.
-    inputs = inputs.type(torch.FloatTensor).to(next(model.parameters()).device)
+    inputs = inputs.float().to(next(model.parameters()).device)
 
     if isinstance(model, cebra.models.ConvolutionalModelMixin):
         # Fully convolutional evaluation, switch (T, C) -> (1, C, T)
@@ -228,15 +240,8 @@ def __getitem__(self, idx):
         batched_data = _get_batch(inputs=inputs,
                                   offset=offset,
                                   batch_start_idx=batch_start_idx,
-                                  batch_end_idx=batch_end_idx)
-
-        if pad_before_transform:
-            batched_data = _add_batched_zero_padding(
-                batched_data=batched_data,
-                offset=offset,
-                batch_start_idx=batch_start_idx,
-                batch_end_idx=batch_end_idx,
-                num_samples=len(inputs))
+                                  batch_end_idx=batch_end_idx,
+                                  pad_before_transform=pad_before_transform)
 
         output_batch = _inference_transform(model, batched_data)
         output.append(output_batch)
@@ -549,6 +554,15 @@ def transform(self,
         Returns:
             The output embedding.
         """
+        if isinstance(inputs, list):
+            raise NotImplementedError(
+                "Inputs to transform() should be the data for a single session."
+            )
+
+        elif not isinstance(inputs, torch.Tensor):
+            raise ValueError(
+                f"Inputs should be a torch.Tensor, not {type(inputs)}.")
+
         if not hasattr(self, "n_features"):
             raise ValueError(
                 f"This {type(self).__name__} instance is not fitted yet. Call 'fit' with "
diff --git a/cebra/solver/single_session.py b/cebra/solver/single_session.py
@@ -227,7 +227,10 @@ def _select_model(
         self._check_is_session_id_valid(session_id=session_id)
 
         model = self.model.module
-        offset = model.get_offset()
+        if hasattr(model, 'get_offset'):
+            offset = model.get_offset()
+        else:
+            offset = None
         return model, offset
 
 
diff --git a/tests/test_solver.py b/tests/test_solver.py