add sklearn implementation

stes · stes · commit 6c2d55919e55 · 2025-08-02T16:25:51.000+02:00
diff --git a/cebra/data/multi_session.py b/cebra/data/multi_session.py
@@ -236,10 +236,10 @@ def __post_init__(self):
         self.sampler = cebra.distributions.UnifiedSampler(
             self.dataset, self.time_offset)
 
-        if self.batch_size < 2:
+        if self.batch_size is not None and self.batch_size < 2:
             raise ValueError("UnifiedLoader does not support batch_size < 2.")
 
-        if self.num_negatives < 2:
+        if self.num_negatives is not None and self.num_negatives < 2:
             raise ValueError(
                 "UnifiedLoader does not support num_negatives < 2.")
 
diff --git a/cebra/integrations/sklearn/cebra.py b/cebra/integrations/sklearn/cebra.py
@@ -501,6 +501,9 @@ class CEBRA(TransformerMixin, BaseEstimator):
             A Tuple of masking types and their corresponding required masking values. The keys are the
             names of the Mask instances and formatting should be ``((key, value), (key, value))``.
             |Default:| ``None``.
+        num_negatives (int):
+            The number of negative samples to use for training. If ``None``, the number of negative samples
+            will be set to the batch size. |Default:| ``None``.
 
     Example:
 
@@ -576,6 +579,7 @@ def __init__(
         ),
         masking_kwargs: Tuple[Tuple[str, Union[float, List[float],
                                                Tuple[float, ...]]], ...] = None,
+        num_negatives: int = None,
     ):
         self.__dict__.update(locals())
 
@@ -728,6 +732,7 @@ def _prepare_loader(self, dataset: cebra.data.Dataset, max_iterations: int,
                 dataset=dataset,
                 batch_size=self.batch_size,
                 num_steps=max_iterations,
+                num_negatives=self.num_negatives,
             ),
             extra_kwargs=dict(
                 time_offsets=self.time_offsets,
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
@@ -1544,3 +1544,20 @@ def test_last_incomplete_batch_smaller_than_offset():
     model.fit(train.neural, train.continuous)
 
     _ = model.transform(train.neural, batch_size=300)
+
+
+@pytest.mark.parametrize("batch_size,num_negatives", [
+    (None, None),
+    (100, None),
+    (100, 100),
+])
+def test_num_negatives(batch_size, num_negatives):
+    train = cebra.data.TensorDataset(neural=np.random.rand(20111, 100),
+                                     continuous=np.random.rand(20111, 2))
+
+    model = cebra.CEBRA(max_iterations=2,
+                        batch_size=batch_size,
+                        num_negatives=num_negatives,
+                        device="cpu")
+    model.fit(train.neural, train.continuous)
+    _ = model.transform(train.neural)