docstring, fix test

homerjed · homerjed · commit c0dc20df6608 · 2025-06-01T12:19:36.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,5 @@ _set_transformer.py
 __unet.py
 cifar10.ipynb
 grfs.ipynb
-simple.ipynb
+simple.ipynb
+mnist_clouds.py
diff --git a/data/utils.py b/data/utils.py
@@ -167,6 +167,42 @@ def loop(
                 )
 
 
+def maybe_convert(a):
+    return np.asarray(a) if isinstance(a, jnp.ndarray) else a
+
+
+class TensorDataset(torch.utils.data.Dataset):
+    def __init__(self, tensors, x_transform=None, q_transform=None, a_transform=None):
+        self.names = ["x", "q", "a"]
+        self.data = {
+            name: torch.as_tensor(np.copy(maybe_convert(t))) if exists(t) else None
+            for name, t in zip(self.names, tensors)
+        }
+
+        self.transforms = {
+            name: transform if exists(transform) else None
+            for name, transform in zip(self.names, [x_transform, q_transform, a_transform])
+        }
+
+        # Sanity check: all non-None tensors must have same first dimension
+        lengths = [v.shape[0] for v in self.data.values() if v is not None]
+        assert len(set(lengths)) == 1, "All input tensors must have the same length."
+
+    def __getitem__(self, index):
+        output = []
+        for key in self.names:
+            tensor = self.data.get(key)
+            if exists(tensor):
+                val = tensor[index]
+                if self.transforms[key]:
+                    val = self.transforms[key](val)
+                output.append(val)
+        return tuple(output)
+
+    def __len__(self):
+        return next(v.shape[0] for v in self.data.values() if v is not None)
+
+
 @jaxtyped(typechecker=typechecker)
 @dataclass
 class ScalerDataset:
@@ -220,43 +256,6 @@ class ScalerDataset:
     ]
 
 
-def maybe_convert(a):
-    return np.asarray(a) if isinstance(a, jnp.ndarray) else a
-
-
-class TensorDataset(torch.utils.data.Dataset):
-    def __init__(self, tensors, x_transform=None, q_transform=None, a_transform=None):
-        self.names = ["x", "q", "a"]
-        self.data = {
-            name: torch.as_tensor(np.copy(maybe_convert(t))) if exists(t) else None
-            for name, t in zip(self.names, tensors)
-        }
-
-        self.transforms = {
-            name: transform if exists(transform) else None
-            for name, transform in zip(self.names, [x_transform, q_transform, a_transform])
-        }
-
-        # Sanity check: all non-None tensors must have same first dimension
-        lengths = [v.shape[0] for v in self.data.values() if v is not None]
-        assert len(set(lengths)) == 1, "All input tensors must have the same length."
-
-    def __getitem__(self, index):
-        output = []
-        for key in self.names:
-            tensor = self.data.get(key)
-            if exists(tensor):
-                val = tensor[index]
-                if self.transforms[key]:
-                    val = self.transforms[key](val)
-                val = jnp.asarray(val.numpy())
-                output.append(val)
-        return tuple(output)
-
-    def __len__(self):
-        return next(v.shape[0] for v in self.data.values() if v is not None)
-
-
 @jaxtyped(typechecker=typechecker)
 def dataset_from_tensors(
     X: Float[Array, "n ..."],
diff --git a/sbgm/_train.py b/sbgm/_train.py
@@ -8,10 +8,10 @@
 import jax.numpy as jnp
 import jax.random as jr
 import equinox as eqx
-from jaxtyping import Key, Array, Float, PyTree, jaxtyped
+from jaxtyping import Key, Array, Float, Scalar, PyTree, jaxtyped
 from beartype import beartype as typechecker
-from ml_collections import ConfigDict
 import optax
+from ml_collections import ConfigDict
 from tqdm.auto import trange
 
 from .sde import SDE
@@ -69,7 +69,7 @@ def accumulate_gradients_scan(
     n_minibatches: int,
     *,
     grad_fn: Callable
-) -> Tuple[Float[Array, ""], PyTree]:
+) -> Tuple[Scalar, PyTree]:
     batch_size = xqat[0].shape[0]
     minibatch_size = batch_size // n_minibatches
 
@@ -124,9 +124,9 @@ def single_loss_fn(
     x: Float[Array, "..."], 
     q: Optional[Float[Array, "..."]], 
     a: Optional[Float[Array, "..."]],
-    t: Float[Array, ""],
+    t: Scalar,
     key: Key
-) -> Float[Array, ""]:
+) -> Scalar:
     key_noise, key_apply = jr.split(key)
     mean, std = sde.marginal_prob(x, t) 
     noise = jr.normal(key_noise, x.shape)
@@ -183,7 +183,7 @@ def make_step(
     sharding: Optional[jax.sharding.NamedSharding] = None,
     replicated_sharding: Optional[jax.sharding.NamedSharding] = None
 ) -> Tuple[
-    Float[Array, ""], Model, Key[jnp.ndarray, "..."], optax.OptState
+    Scalar, Model, Key[jnp.ndarray, "..."], optax.OptState
 ]:
     model = eqx.nn.inference_mode(model, False)
 
diff --git a/sbgm/models/__init__.py b/sbgm/models/__init__.py
@@ -2,7 +2,7 @@
 import equinox as eqx
 from jaxtyping import Key
 import numpy as np
-import ml_collections
+from ml_collections import ConfigDict
 
 from ._mixer import Mixer2d
 from ._mlp import ResidualNetwork 
@@ -13,17 +13,40 @@
 def get_model(
     model_key: Key, 
     model_type: str, 
-    config: ml_collections.ConfigDict, 
+    config: ConfigDict, 
     data_shape: Sequence[int], 
     context_shape: Optional[Sequence[int]] = None, 
     parameter_dim: Optional[int] = None
 ) -> eqx.Module:
+    """
+        Get the model based on the specified type and configuration.
+
+        Args:
+            model_key: JAX random key for model initialization.
+            model_type: Type of the model to create (e.g., "Mixer", "UNet", "mlp", "DiT").
+            config: Configuration dictionary containing model parameters.
+            data_shape: Shape of the input data (e.g. image dimensions, channels first).
+            context_shape: Shape of the context map, if applicable.
+            parameter_dim: Dimension of the additional conditioning.
+        Returns:
+            An initialized instance of the specified model type.
+
+        Raises:
+            ValueError: If the model type is not recognized.
+    """
+
     # Grab channel assuming 'q' is a map like x
     if context_shape is not None:
         context_channels, *_ = context_shape.shape 
     else:
         context_channels = None
 
+    if model_type not in ["Mixer", "UNet", "mlp", "DiT"]:
+        raise ValueError(
+            f"Model type {model_type} is not recognized. "
+            "Choose from 'Mixer', 'UNet', 'mlp', or 'DiT'."
+        )
+
     if model_type == "Mixer":
         model = Mixer2d(
             data_shape,