Added LNCDE doc string

Benjamin-Walker · Benjamin-Walker · commit 04daf6542ca1 · 2025-05-23T13:32:49.000+01:00
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -13,4 +13,4 @@ jobs:
         uses: actions/checkout@v2
         
       - name: Checks with pre-commit
-        uses: pre-commit/action@v2.0.3
+        uses: pre-commit/action@v3.0.0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,19 +1,25 @@
 repos:
   - repo: https://github.com/ambv/black
-    rev: 22.3.0
+    rev: 24.3.0
     hooks:
-    - id: black
+      - id: black
+
   - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.2.3
+    rev: 1.7.0
     hooks:
-    - id: nbqa-black
-    - id: nbqa-isort
-    - id: nbqa-flake8
+      - id: nbqa-black
+        additional_dependencies: [black, setuptools]
+      - id: nbqa-isort
+        additional_dependencies: [isort, setuptools]
+      - id: nbqa-flake8
+        additional_dependencies: [flake8, setuptools]
+
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
-    - id: isort
+      - id: isort
+
   - repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+    rev: 6.1.0
     hooks:
-    - id: flake8
+      - id: flake8
diff --git a/data_dir/dataloaders.py b/data_dir/dataloaders.py
@@ -42,7 +42,7 @@ class Dataloader:
     def __init__(self, data, labels, inmemory=True):
         self.data = data
         self.labels = labels
-        if type(self.data) == tuple:
+        if isinstance(self.data, tuple):
             if len(data[1][0].shape) > 2:
                 self.data_is_coeffs = True
             else:
diff --git a/models/LRU.py b/models/LRU.py
@@ -64,9 +64,7 @@ def __init__(self, N, H, r_min=0, r_max=1, max_phase=6.28, *, key):
         # between r_min and r_max, with phase in [0, max_phase].
         u1 = jr.uniform(u1_key, shape=(N,))
         u2 = jr.uniform(u2_key, shape=(N,))
-        self.nu_log = jnp.log(
-            -0.5 * jnp.log(u1 * (r_max**2 - r_min**2) + r_min**2)
-        )
+        self.nu_log = jnp.log(-0.5 * jnp.log(u1 * (r_max**2 - r_min**2) + r_min**2))
         self.theta_log = jnp.log(max_phase * u2)
 
         # Glorot initialized Input/Output projection matrices
diff --git a/models/LinearNeuralCDEs.py b/models/LinearNeuralCDEs.py
@@ -1,3 +1,28 @@
+"""
+This module implements the `LogLinearCDE` class using JAX and Equinox. The model is a
+block-diagonal Linear Controlled Differential Equation (CDE), where the output is
+approximated during training using the Log-ODE method.
+
+Attributes of the `LogLinearCDE` model:
+- `init_layer`: The linear layer used to initialize the hidden state $h_0$ from the input $x_0$.
+- `out_layer`: The linear layer used to produce final predictions from the hidden state.
+- `vf_A`: Learnable parameters for the linear vector field, shaped as flattened block matrices.
+- `hidden_dim`: The dimension of the hidden state $h_t$.
+- `block_size`: Size of each square block in the block-diagonal vector field.
+- `num_blocks`: Number of blocks, computed as `hidden_dim // block_size`.
+- `parallel_steps`: Number of log-flow matrices composed in parallel (using associative scan).
+- `logsig_depth`: The depth of the log-signature used in the Log-ODE method.
+- `basis_list`: The list of basis elements of the free Lie algebra up to the specified depth.
+- `lambd`: Regularization parameter applied to vector field scaling.
+- `w_init_std`: Standard deviation for the initial weights of the vector field.
+- `classification`: Boolean indicating if the model is used for classification tasks.
+
+The class includes:
+- `log_ode`: Method for computing the iterated Lie brackets of the linear vector fields.
+- `__call__`: Performs the forward pass, where flows are composed and applied to the hidden state
+  either step-by-step or in parallel (using associative scan), followed by output projection.
+"""
+
 from __future__ import annotations
 
 from typing import List, Tuple
@@ -28,10 +53,6 @@ def depth(b):
 
 
 class LogLinearCDE(eqx.Module):
-    """
-    Block‑diagonal Linear Controlled Differential Equation layer.
-    """
-
     init_layer: eqx.nn.Linear
     out_layer: eqx.nn.Linear
     vf_A: jnp.ndarray
@@ -41,10 +62,10 @@ class LogLinearCDE(eqx.Module):
     parallel_steps: int
     logsig_depth: int
     basis_list: List[Tuple[int, ...]]
-    stepsize: int
     lambd: float
+    w_init_std: float
+    classification: bool
 
-    classification: bool = True
     lip2: bool = True
     nondeterministic: bool = False
     stateful: bool = False
@@ -57,10 +78,10 @@ def __init__(
         label_dim: int,
         block_size: int,
         logsig_depth: int,
-        stepsize: int,
         lambd: float = 1.0,
         w_init_std: float = 0.25,
         parallel_steps: int = 128,
+        classification: bool = True,
         key,
     ):
         if hidden_dim % block_size != 0:
@@ -70,24 +91,25 @@ def __init__(
         self.num_blocks = hidden_dim // block_size
         self.parallel_steps = parallel_steps
         self.logsig_depth = logsig_depth
-        self.stepsize = stepsize
         ctx = rp.get_context(width=data_dim, depth=self.logsig_depth, coeffs=rp.DPReal)
         basis = ctx.lie_basis
         basis_list = []
         for i in range(basis.size(self.logsig_depth)):
             basis_list.append(eval(str(basis.index_to_key(i))))
         self.basis_list = basis_list
         self.lambd = lambd
+        self.w_init_std = w_init_std
 
         k_init, k_A, k_B = jr.split(key, 3)
         self.init_layer = eqx.nn.Linear(data_dim, hidden_dim, key=k_init)
         self.out_layer = eqx.nn.Linear(hidden_dim, label_dim, key=k_B)
 
         self.vf_A = (
             jr.normal(k_A, (data_dim + 1, self.num_blocks * block_size * block_size))
-            * w_init_std
+            * self.w_init_std
             / jnp.sqrt(block_size)
         )
+        self.classification = classification
 
     def log_ode(self, vf):
 
@@ -111,7 +133,7 @@ def log_ode(self, vf):
 
             left_indices = []
             right_indices = []
-            for (i_b, b) in curr_elements:
+            for i_b, b in curr_elements:
                 u_tuple = to_tuple(b[0])
                 v_tuple = to_tuple(b[1])
                 i_u = basis_index[u_tuple]
@@ -184,7 +206,12 @@ def parallel_step(y, flows):
             inp_rem = flows[-remainder:]
             _, y_rem = jax.lax.scan(step, ys[-1], inp_rem)
             ys = jnp.vstack([ys, y_rem])
-        ys = jnp.mean(ys, axis=0)
-        ys = self.out_layer(ys)
-        preds = jax.nn.softmax(ys)
+
+        if self.classification:
+            ys = jnp.mean(ys, axis=0)
+            preds = jax.nn.softmax(self.out_layer(ys))
+        else:
+            ys = jax.vmap(self.out_layer)(ys)
+            preds = jnp.tanh(ys)
+
         return preds
diff --git a/models/generate_model.py b/models/generate_model.py
@@ -72,7 +72,6 @@ def create_model(
     max_steps=16**4,
     scale=1.0,
     lambd=0.0,
-    stepsize=1,
     w_init_std=0.25,
     *,
     key,
@@ -113,9 +112,9 @@ def create_model(
                 label_dim=label_dim,
                 block_size=block_size,
                 logsig_depth=logsig_depth,
-                stepsize=stepsize,
                 lambd=lambd,
                 w_init_std=w_init_std,
+                classification=classification,
                 key=key,
             ),
             None,
diff --git a/results/analyse_results.py b/results/analyse_results.py
@@ -107,12 +107,12 @@
                     for tr_idx in train_idxs:
                         idx = idxs[tr_idx]
                         print(
-                            f"{model[:-1]} {dataset[:-1]} {exps[idx]} {100*val_metrics[idx]}"
+                            f"{model[:-1]} {dataset[:-1]} {exps[idx]} {100 * val_metrics[idx]}"
                         )
 
                 elif experiment == "repeats":
                     test_metrics = np.array(test_metrics)
                     print(
                         f"{model[:-1]} {dataset[:-1]} {np.mean([len(x) for x in val_metrics])} "
-                        f"{100*np.mean(test_metrics)} {100*np.std(test_metrics)}"
+                        f"{100 * np.mean(test_metrics)} {100 * np.std(test_metrics)}"
                     )
diff --git a/run_experiment.py b/run_experiment.py
@@ -177,7 +177,6 @@ def run_experiments(model_names, dataset_names, experiment_folder, pytorch_exper
                     "stepsize_controller": diffrax.ConstantStepSize(),
                     "scale": scale,
                     "lambd": lambd,
-                    "stepsize": stepsize,
                 }
                 run_args = {
                     "data_dir": data_dir,
@@ -218,11 +217,11 @@ def run_experiments(model_names, dataset_names, experiment_folder, pytorch_exper
         model_names = ["mamba", "S6"]
     else:
         model_names = [
-            "ncde",
-            "log_ncde",
-            "nrde",
-            "S5",
-            "lru",
+            # "ncde",
+            # "log_ncde",
+            # "nrde",
+            # "S5",
+            # "lru",
             "bd_linear_ncde",
             "dense_linear_ncde",
             "diagonal_linear_ncde",
diff --git a/simple_example.ipynb b/simple_example.ipynb
@@ -76,6 +76,8 @@
    "outputs": [],
    "source": [
     "# Define the NeuralCDE class\n",
+    "\n",
+    "\n",
     "class NeuralCDE(eqx.Module):\n",
     "    vf: eqx.nn.MLP  # Vector field parameterised as an MLP\n",
     "    data_dim: int  # Dimension of the input data\n",
@@ -164,6 +166,8 @@
    "outputs": [],
    "source": [
     "# Define the LogNeuralCDE class, which is identical to NeuralCDE, except for the get_ode method\n",
+    "\n",
+    "\n",
     "class LogNeuralCDE(NeuralCDE):\n",
     "    stepsize: int  # The interval size for the Log-ODE method\n",
     "    depth: int  # The log-signature truncation depth for the Log-ODE method\n",
@@ -307,6 +311,8 @@
    ],
    "source": [
     "# Generate synthetic multivariate time series data using JAX\n",
+    "\n",
+    "\n",
     "def generate_multivariate_sine_wave(frequencies, length, noise_level, key):\n",
     "    t = jnp.linspace(0, 1, length)  # Generate time points\n",
     "    num_features = len(frequencies)  # Number of features in the data\n",
@@ -422,6 +428,8 @@
    "outputs": [],
    "source": [
     "# Define the Dataloader class\n",
+    "\n",
+    "\n",
     "class Dataloader:\n",
     "    data: jnp.ndarray  # Data array\n",
     "    labels: jnp.ndarray  # Labels array\n",
@@ -464,6 +472,8 @@
    "outputs": [],
    "source": [
     "# Define the classification loss function with gradient calculation\n",
+    "\n",
+    "\n",
     "@eqx.filter_value_and_grad\n",
     "def classification_loss(model, X, y):\n",
     "    # Predict the output using the model\n",
@@ -492,6 +502,8 @@
    "outputs": [],
    "source": [
     "# Define the training function for the model\n",
+    "\n",
+    "\n",
     "def train_model(\n",
     "    model,\n",
     "    num_steps=2000,  # Number of training steps\n",
diff --git a/train.py b/train.py
@@ -102,12 +102,14 @@ def regression_loss(diff_model, static_model, X, y, state, key):
     pred_y = pred_y[:, :, 0]
     norm = 0
     if model.lip2:
-        for layer in model.vf.mlp.layers:
-            norm += jnp.mean(
-                jnp.linalg.norm(layer.weight, axis=-1)
-                + jnp.linalg.norm(layer.bias, axis=-1)
-            )
-        norm *= model.lambd
+        if hasattr(model, "vf"):
+            for layer in model.vf.mlp.layers:
+                norm += jnp.mean(
+                    jnp.linalg.norm(layer.weight, axis=-1)
+                    + jnp.linalg.norm(layer.bias, axis=-1)
+                )
+        else:
+            norm += jnp.mean(jnp.linalg.norm(model.vf_A, axis=-1))
     return (
         jnp.mean(jnp.mean((pred_y - y) ** 2, axis=1)) + norm,
         state,

Original file line number	Diff line number	Diff line change
`@@ -107,12 +107,12 @@`
`107`	`107`	`for tr_idx in train_idxs:`
`108`	`108`	`idx = idxs[tr_idx]`
`109`	`109`	`print(`
`110`		`- f"{model[:-1]} {dataset[:-1]} {exps[idx]} {100*val_metrics[idx]}"`
	`110`	`+ f"{model[:-1]} {dataset[:-1]} {exps[idx]} {100 * val_metrics[idx]}"`
`111`	`111`	`)`
`112`	`112`
`113`	`113`	`elif experiment == "repeats":`
`114`	`114`	`test_metrics = np.array(test_metrics)`
`115`	`115`	`print(`
`116`	`116`	`f"{model[:-1]} {dataset[:-1]} {np.mean([len(x) for x in val_metrics])} "`
`117`		`- f"{100np.mean(test_metrics)} {100np.std(test_metrics)}"`
	`117`	`+ f"{100 * np.mean(test_metrics)} {100 * np.std(test_metrics)}"`
`118`	`118`	`)`