diff --git a/dwave/plugins/torch/models/baselines.py b/dwave/plugins/torch/models/baselines.py
new file mode 100755
index 0000000..62bbba7
--- /dev/null
+++ b/dwave/plugins/torch/models/baselines.py
@@ -0,0 +1,191 @@
+# Copyright 2025 D-Wave
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from math import pi as PI
+
+import numpy as np
+import numpy.linalg as la
+import torch
+from torch import nn
+
+from dwave.plugins.torch.utils import bit2spin_soft, spin2bit_soft
+
+
+class SphericalBinary(nn.Module):
+
+    @staticmethod
+    def nearestPD_np(A):
+        # https://github.com/ok1zjf/LBAE/blob/master/sampler.py#L41C2-L41C3
+        def chol(B):
+            try:
+                L = la.cholesky(B)
+                return torch.tensor(L)
+            except la.LinAlgError:
+                return None
+        B = (A + A.T) / 2
+        _, s, V = la.svd(B)
+
+        H = np.dot(V.T, np.dot(np.diag(s), V))
+        A2 = (B + H) / 2
+        A3 = (A2 + A2.T) / 2
+
+        L = chol(A3)
+        if L is not None:
+            return torch.tensor(A3.tolist()), torch.tensor(L.tolist())
+
+        # Still H is not PSD, fixing it...
+        spacing = np.spacing(la.norm(A))
+        # The above is different from [1]. It appears that MATLAB's `chol` Cholesky
+        # decomposition will accept matrixes with exactly 0-eigenvalue, whereas
+        # Numpy's will not. So where [1] uses `eps(mineig)` (where `eps` is Matlab
+        # for `np.spacing`), we use the above definition. CAVEAT: our `spacing`
+        # will be much larger than [1]'s `eps(mineig)`, since `mineig` is usually on
+        # the order of 1e-16, and `eps(1e-16)` is on the order of 1e-34, whereas
+        # `spacing` will, for Gaussian random matrixes of small dimension, be on
+        # othe order of 1e-16. In practice, both ways converge, as the unit test
+        # below suggests.
+        I = np.eye(A.shape[0])
+        k = 1
+        while True:
+            L = chol(A3)
+            if L is not None:
+                break
+
+            # Increasing eigen values...
+            mineig = np.min(np.real(la.eigvals(A3)))
+            A3 += I * (-mineig * k**2 + spacing)
+            k += 1
+
+        return torch.tensor(A3.tolist()), torch.tensor(L.tolist())
+
+    @staticmethod
+    def nearestPD_torch(A):
+        # https://github.com/ok1zjf/LBAE/blob/master/sampler.py#L41C2-L41C3
+        la = torch.linalg
+
+        def chol(B):
+            try:
+                L = la.cholesky(B)
+                return torch.tensor(L)
+            except la.LinAlgError:
+                return None
+        B = (A + A.T) / 2
+        _, s, V = la.svd(B)
+
+        H = V.T@(s.diag() @ V)
+        A2 = (B + H) / 2
+        A3 = (A2 + A2.T) / 2
+
+        L = chol(A3)
+        if L is not None:
+            return A3, L
+
+        # Still H is not PSD, fixing it...
+        spacing = 1e-30
+        # spacing = np.spacing(la.norm(A)).item()
+        # The above is different from [1]. It appears that MATLAB's `chol` Cholesky
+        # decomposition will accept matrixes with exactly 0-eigenvalue, whereas
+        # Numpy's will not. So where [1] uses `eps(mineig)` (where `eps` is Matlab
+        # for `np.spacing`), we use the above definition. CAVEAT: our `spacing`
+        # will be much larger than [1]'s `eps(mineig)`, since `mineig` is usually on
+        # the order of 1e-16, and `eps(1e-16)` is on the order of 1e-34, whereas
+        # `spacing` will, for Gaussian random matrixes of small dimension, be on
+        # othe order of 1e-16. In practice, both ways converge, as the unit test
+        # below suggests.
+        I = torch.eye(A.shape[0], device=A.device)
+        k = 1
+        while True:
+            L = chol(A3)
+            if L is not None:
+                break
+
+            # Increasing eigen values...
+            mineig = torch.min(torch.real(la.eigvals(A3)))
+            A3 += I * (-mineig * k**2 + spacing)
+            k += 1
+
+        return A3, L
+
+    def __init__(self, d: int):
+        # Based on latent bernoulli autoencoder paper https://proceedings.mlr.press/v119/fajtl20a.html
+        super().__init__()
+        self.d = d
+        self.V = None
+
+    def fit(self, x):
+        if x.abs().min() != x.abs().max() != 1:
+            raise RuntimeError("`x` must be spin-valued.")
+        bs, d = x.shape
+        first_moment = x.mean(0)
+        second_moment = torch.einsum("bi, bj -> ij", x, x) / bs
+        moments_matrix = torch.eye(d + 1, device=x.device)
+        # Upper-left corner is second moment
+        moments_matrix.data[:self.d, :self.d] = second_moment
+        # Right-most column (up to last second last entry) is first moment
+        moments_matrix.data[:-1, -1] = first_moment
+        # Bottom-most row (up to last second last entry) is first moment
+        moments_matrix.data[-1, :-1] = first_moment
+        # Bottom-right entry is one (redundant code; moments matrix is initialized with `torch.eye`)
+        moments_matrix.data[-1, -1] = 1
+        _, V = self.nearestPD_torch(torch.cos(PI/2*(1 - moments_matrix)))
+        self.V = torch.nn.Parameter(V.to(x.device), requires_grad=False)
+
+    def sample(self, n):
+        if self.V is None:
+            raise RuntimeError("Model needs to be fitted before sampling.")
+        r = torch.randn((self.d+1, n), device=self.V.device)
+        spins_augmented = (self.V @ r).sign()
+        spins_augmented[spins_augmented == 0] = 1
+        spins = (spins_augmented[:-1]
+                 * spins_augmented[-1].unsqueeze(0)).mT
+        return spins
+
+
+class GaussianBinary(nn.Module):
+    def __init__(self, d: int):
+        super().__init__()
+        self.mu = torch.nn.Parameter(torch.rand(d))
+        self.cov = torch.nn.Parameter(torch.randn((d, d)))
+        self.d = d
+        self.epsilon = torch.nn.Parameter(1e-3*torch.eye(d))
+
+    def fit(self, x):
+        if x.abs().min() != x.abs().max() != 1:
+            raise RuntimeError("`x` must be spin-valued.")
+        self.mu.data[:] = x.mean(0)
+        self.cov.data[:] = x.mT.cov()
+
+    def sample(self, n):
+        c = torch.linalg.cholesky(self.cov + self.epsilon)
+        z = torch.randn((n, self.d), device=self.mu.device)
+        x = z@c.mT + self.mu
+        return bit2spin_soft((x >= 0).float())
+
+
+class IndependentBernoulli(nn.Module):
+    def __init__(self, d: int):
+        super().__init__()
+        self.p = torch.nn.Parameter(torch.rand(d))
+        self.d = d
+
+    def fit(self, x):
+        if x.abs().min() != x.abs().max() != 1:
+            raise RuntimeError("`x` must be spin-valued.")
+        self.p.data[:] = spin2bit_soft(x.mean(0))
+
+    def sample(self, n):
+        if self.p is None:
+            raise RuntimeError("Model parameters have not been defined.")
+        pp = self.p.unsqueeze(0).repeat(n, 1)
+        return bit2spin_soft(pp.bernoulli())