AdaptiveMotorControlLab
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-pypi.yml‎
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/release-pypi.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎PKGBUILD‎
Lines changed: 2 additions & 2 deletions b/‎PKGBUILD‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cebra/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎cebra/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cebra/data/load.py‎
Lines changed: 22 additions & 4 deletions b/‎cebra/data/load.py‎
Lines changed: 22 additions & 4 deletions
diff --git a/‎cebra/integrations/sklearn/cebra.py‎
Lines changed: 40 additions & 8 deletions b/‎cebra/integrations/sklearn/cebra.py‎
Lines changed: 40 additions & 8 deletions
diff --git a/‎cebra/integrations/sklearn/metrics.py‎
Lines changed: 143 additions & 0 deletions b/‎cebra/integrations/sklearn/metrics.py‎
Lines changed: 143 additions & 0 deletions
diff --git a/‎cebra/solver/base.py‎
Lines changed: 2 additions & 1 deletion b/‎cebra/solver/base.py‎
Lines changed: 2 additions & 1 deletion
@@ -18,7 +18,7 @@ jobs:
         # We aim to support the versions on pytorch.org
         # as well as selected previous versions on
         # https://pytorch.org/get-started/previous-versions/
-        torch-version: ["2.2.2", "2.4.0"]
+        torch-version: ["2.4.0", "2.6.0"]
         sklearn-version: ["latest"]
         include:
           - os: windows-latest
 
@@ -28,6 +28,13 @@ jobs:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip
 
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install wheel
+          # NOTE(stes) see https://github.com/pypa/twine/issues/1216#issuecomment-2629069669
+          pip install "packaging>=24.2"
+
       - name: Checkout code
         uses: actions/checkout@v3
 
 
@@ -40,7 +40,7 @@ RUN make dist
 FROM cebra-base
 
 # install the cebra wheel
-ENV WHEEL=cebra-0.4.0-py2.py3-none-any.whl
+ENV WHEEL=cebra-0.5.0rc1-py3-none-any.whl
 WORKDIR /build
 COPY --from=wheel /build/dist/${WHEEL} .
 RUN pip install --no-cache-dir ${WHEEL}'[dev,integrations,datasets]'
 
@@ -1,4 +1,4 @@
-CEBRA_VERSION := 0.4.0
+CEBRA_VERSION := 0.5.0rc1
 
 dist:
 	python3 -m pip install virtualenv
 
@@ -1,7 +1,7 @@
 # Maintainer: Steffen Schneider <[email protected]>
 pkgname=python-cebra
 _pkgname=cebra
-pkgver=0.4.0
+pkgver=0.5.0rc1
 pkgrel=1
 pkgdesc="Consistent Embeddings of high-dimensional Recordings using Auxiliary variables"
 url="https://cebra.ai"
@@ -40,7 +40,7 @@ build() {
 
 package() {
     cd $srcdir/${_pkgname}-${pkgver}
-    pip install --ignore-installed --no-deps --root="${pkgdir}" dist/${_pkgname}-${pkgver}-py2.py3-none-any.whl
+    pip install --ignore-installed --no-deps --root="${pkgdir}" dist/${_pkgname}-${pkgver}-py3-none-any.whl
     find ${pkgdir} -iname __pycache__ -exec rm -r {} \; 2>/dev/null || echo
     install -Dm 644 LICENSE.md $pkgdir/usr/share/licenses/${pkgname}/LICENSE
 }
 
@@ -66,7 +66,7 @@
 
 import cebra.integrations.sklearn as sklearn
 
-__version__ = "0.4.0"
+__version__ = "0.5.0rc1"
 __all__ = ["CEBRA"]
 __allow_lazy_imports = False
 __lazy_imports = {}
 
@@ -275,11 +275,11 @@ def _is_dlc_df(h5_file: IO[bytes], df_keys: List[str]) -> bool:
         """
         try:
             if ["_i_table", "table"] in df_keys:
-                df = pd.read_hdf(h5_file, key="table")
+                df = read_hdf(h5_file, key="table")
             else:
-                df = pd.read_hdf(h5_file, key=df_keys[0])
+                df = read_hdf(h5_file, key=df_keys[0])
         except KeyError:
-            df = pd.read_hdf(h5_file)
+            df = read_hdf(h5_file)
         return all(value in df.columns.names
                    for value in ["scorer", "bodyparts", "coords"])
 
@@ -348,7 +348,7 @@ def load_from_h5(file: Union[pathlib.Path, str], key: str,
         Returns:
             A :py:func:`numpy.array` containing the data of interest extracted from the :py:class:`pandas.DataFrame`.
         """
-        df = pd.read_hdf(file, key=key)
+        df = read_hdf(file, key=key)
         if columns is None:
             loaded_array = df.values
         elif isinstance(columns, list) and df.columns.nlevels == 1:
@@ -716,3 +716,21 @@ def _get_loader(file_ending: str) -> _BaseLoader:
     if file_ending not in __loaders.keys() or file_ending == "":
         raise OSError(f"File ending {file_ending} not supported.")
     return __loaders[file_ending]
+
+
+def read_hdf(filename, key=None):
+    """Read HDF5 file using pandas, with fallback to h5py if pandas fails.
+
+    Args:
+        filename: Path to HDF5 file
+        key: Optional key to read from HDF5 file. If None, tries "df_with_missing"
+             then falls back to first available key.
+
+    Returns:
+        pandas.DataFrame: The loaded data
+
+    Raises:
+        RuntimeError: If both pandas and h5py fail to load the file
+    """
+
+    return pd.read_hdf(filename, key=key)
@@ -27,10 +27,11 @@
 
 import numpy as np
 import numpy.typing as npt
+import packaging.version
 import pkg_resources
+import sklearn
 import sklearn.utils.validation as sklearn_utils_validation
 import torch
-import sklearn
 from sklearn.base import BaseEstimator
 from sklearn.base import TransformerMixin
 from sklearn.utils.metaestimators import available_if
@@ -43,11 +44,38 @@
 import cebra.models
 import cebra.solver
 
+# NOTE(stes): From torch 2.6 onwards, we need to specify the following list
+# when loading CEBRA models to allow weights_only = True.
+CEBRA_LOAD_SAFE_GLOBALS = [
+    cebra.data.Offset, torch.torch_version.TorchVersion, np.dtype,
+    np.dtypes.Float64DType, np.dtypes.Int64DType
+]
+
 def check_version(estimator):
     # NOTE(stes): required as a check for the old way of specifying tags
     # https://github.com/scikit-learn/scikit-learn/pull/29677#issuecomment-2334229165
-    from packaging import version
-    return version.parse(sklearn.__version__) < version.parse("1.6.dev")
+    return packaging.version.parse(
+        sklearn.__version__) < packaging.version.parse("1.6.dev")
+
+
+def _safe_torch_load(filename, weights_only, **kwargs):
+    if weights_only is None:
+        if packaging.version.parse(
+                torch.__version__) >= packaging.version.parse("2.6.0"):
+            weights_only = True
+        else:
+            weights_only = False
+
+    if not weights_only:
+        checkpoint = torch.load(filename, weights_only=False, **kwargs)
+    else:
+        # NOTE(stes): This is only supported for torch 2.6+
+        with torch.serialization.safe_globals(CEBRA_LOAD_SAFE_GLOBALS):
+            checkpoint = torch.load(filename, weights_only=True, **kwargs)
+
+    return checkpoint
+
+
 
 def _init_loader(
     is_cont: bool,
@@ -1409,15 +1437,22 @@ def save(self,
     def load(cls,
              filename: str,
              backend: Literal["auto", "sklearn", "torch"] = "auto",
+             weights_only: bool = None,
              **kwargs) -> "CEBRA":
         """Load a model from disk.
 
         Args:
             filename: The path to the file in which to save the trained model.
             backend: A string identifying the used backend.
+            weights_only: Indicates whether unpickler should be restricted to loading only tensors, primitive types,
+                dictionaries and any types added via :py:func:`torch.serialization.add_safe_globals`.
+                See :py:func:`torch.load` with ``weights_only=True`` for more details. It it recommended to leave this
+                at the default value of ``None``, which sets the argument to ``False`` for torch<2.6, and ``True`` for
+                higher versions of torch. If you experience issues with loading custom models (specified outside
+                of the CEBRA package), you can try to set this to ``False`` if you trust the source of the model.
             kwargs: Optional keyword arguments passed directly to the loader.
 
-        Return:
+        Returns:
             The model to load.
 
         Note:
@@ -1427,7 +1462,6 @@ def load(cls,
             For information about the file format please refer to :py:meth:`cebra.CEBRA.save`.
 
         Example:
-
             >>> import cebra
             >>> import numpy as np
             >>> import tempfile
@@ -1441,16 +1475,14 @@ def load(cls,
             >>> loaded_model = cebra.CEBRA.load(tmp_file)
             >>> embedding = loaded_model.transform(dataset)
             >>> tmp_file.unlink()
-
         """
-
         supported_backends = ["auto", "sklearn", "torch"]
         if backend not in supported_backends:
             raise NotImplementedError(
                 f"Unsupported backend: '{backend}'. Supported backends are: {', '.join(supported_backends)}"
             )
 
-        checkpoint = torch.load(filename, **kwargs)
+        checkpoint = _safe_torch_load(filename, weights_only, **kwargs)
 
         if backend == "auto":
             backend = "sklearn" if isinstance(checkpoint, dict) else "torch"
 
@@ -108,6 +108,149 @@ def infonce_loss(
     return avg_loss
 
 
+def goodness_of_fit_score(cebra_model: cebra_sklearn_cebra.CEBRA,
+                          X: Union[npt.NDArray, torch.Tensor],
+                          *y,
+                          session_id: Optional[int] = None,
+                          num_batches: int = 500) -> float:
+    """Compute the goodness of fit score on a *single session* dataset on the model.
+
+    This function uses the :func:`infonce_loss` function to compute the InfoNCE loss
+    for a given `cebra_model` and the :func:`infonce_to_goodness_of_fit` function
+    to derive the goodness of fit from the InfoNCE loss.
+
+    Args:
+        cebra_model: The model to use to compute the InfoNCE loss on the samples.
+        X: A 2D data matrix, corresponding to a *single session* recording.
+        y: An arbitrary amount of continuous indices passed as 2D matrices, and up to one
+            discrete index passed as a 1D array. Each index has to match the length of ``X``.
+        session_id: The session ID, an :py:class:`int` between 0 and :py:attr:`cebra.CEBRA.num_sessions`
+            for multisession, set to ``None`` for single session.
+        num_batches: The number of iterations to consider to evaluate the model on the new data.
+            Higher values will give a more accurate estimate. Set it to at least 500 iterations.
+
+    Returns:
+        The average GoF score estimated over ``num_batches`` batches from the data distribution.
+
+    Related:
+        :func:`infonce_to_goodness_of_fit`
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> neural_data = np.random.uniform(0, 1, (1000, 20))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
+        >>> cebra_model.fit(neural_data)
+        CEBRA(batch_size=512, max_iterations=10)
+        >>> gof = cebra.sklearn.metrics.goodness_of_fit_score(cebra_model, neural_data)
+    """
+    loss = infonce_loss(cebra_model,
+                        X,
+                        *y,
+                        session_id=session_id,
+                        num_batches=num_batches,
+                        correct_by_batchsize=False)
+    return infonce_to_goodness_of_fit(loss, cebra_model)
+
+
+def goodness_of_fit_history(model: cebra_sklearn_cebra.CEBRA) -> np.ndarray:
+    """Return the history of the goodness of fit score.
+
+    Args:
+        model: A trained CEBRA model.
+
+    Returns:
+        A numpy array containing the goodness of fit values, measured in bits.
+
+    Related:
+        :func:`infonce_to_goodness_of_fit`
+
+    Example:
+
+        >>> import cebra
+        >>> import numpy as np
+        >>> neural_data = np.random.uniform(0, 1, (1000, 20))
+        >>> cebra_model = cebra.CEBRA(max_iterations=10, batch_size = 512)
+        >>> cebra_model.fit(neural_data)
+        CEBRA(batch_size=512, max_iterations=10)
+        >>> gof_history = cebra.sklearn.metrics.goodness_of_fit_history(cebra_model)
+    """
+    infonce = np.array(model.state_dict_["log"]["total"])
+    return infonce_to_goodness_of_fit(infonce, model)
+
+
+def infonce_to_goodness_of_fit(
+        infonce: Union[float, np.ndarray],
+        model: Optional[cebra_sklearn_cebra.CEBRA] = None,
+        batch_size: Optional[int] = None,
+        num_sessions: Optional[int] = None) -> Union[float, np.ndarray]:
+    """Given a trained CEBRA model, return goodness of fit metric.
+
+    The goodness of fit ranges from 0 (lowest meaningful value)
+    to a positive number with the unit "bits", the higher the
+    better.
+
+    Values lower than 0 bits are possible, but these only occur
+    due to numerical effects. A perfectly collapsed embedding
+    (e.g., because the data cannot be fit with the provided
+    auxiliary variables) will have a goodness of fit of 0.
+
+    The conversion between the generalized InfoNCE metric that
+    CEBRA is trained with and the goodness of fit computed with this
+    function is
+
+    .. math::
+
+        S = \\log N - \\text{InfoNCE}
+
+    To use this function, either provide a trained CEBRA model or the
+    batch size and number of sessions.
+
+    Args:
+        infonce: The InfoNCE loss, either a single value or an iterable of values.
+        model: The trained CEBRA model.
+        batch_size: The batch size used to train the model.
+        num_sessions: The number of sessions used to train the model.
+
+    Returns:
+        Numpy array containing the goodness of fit values, measured in bits
+
+    Raises:
+        RuntimeError: If the provided model is not fit to data.
+        ValueError: If both ``model`` and ``(batch_size, num_sessions)`` are provided.
+    """
+    if model is not None:
+        if batch_size is not None or num_sessions is not None:
+            raise ValueError(
+                "batch_size and num_sessions should not be provided if model is provided."
+            )
+        if not hasattr(model, "state_dict_"):
+            raise RuntimeError("Fit the CEBRA model first.")
+        if model.batch_size is None:
+            raise ValueError(
+                "Computing the goodness of fit is not yet supported for "
+                "models trained on the full dataset (batchsize = None). ")
+        batch_size = model.batch_size
+        num_sessions = model.num_sessions_
+        if num_sessions is None:
+            num_sessions = 1
+
+        if model.batch_size is None:
+            raise ValueError(
+                "Computing the goodness of fit is not yet supported for "
+                "models trained on the full dataset (batchsize = None). ")
+    else:
+        if batch_size is None or num_sessions is None:
+            raise ValueError(
+                f"batch_size ({batch_size}) and num_sessions ({num_sessions})"
+                f"should be provided if model is not provided.")
+
+    nats_to_bits = np.log2(np.e)
+    chance_level = np.log(batch_size * num_sessions)
+    return (chance_level - infonce) * nats_to_bits
+
+
 def _consistency_scores(
     embeddings: List[Union[npt.NDArray, torch.Tensor]],
     datasets: List[Union[int, str]],
 
@@ -210,7 +210,8 @@ def fit(
                         self.decoding(loader, valid_loader))
                 if save_hook is not None:
                     save_hook(num_steps, self)
-                self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
+                if logdir is not None:
+                    self.save(logdir, f"checkpoint_{num_steps:#07d}.pth")
 
     def step(self, batch: cebra.data.Batch) -> dict:
         """Perform a single gradient update.
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-CEBRA_VERSION := 0.4.0`
	`1`	`+CEBRA_VERSION := 0.5.0rc1`
`2`	`2`
`3`	`3`	`dist:`
`4`	`4`	`python3 -m pip install virtualenv`