Set up working infrastructure for batched KF

aandorra-mia · aandorra-mia · commit 60706271fa11 · 2025-06-14T18:06:54.000-04:00
diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml
@@ -18,3 +18,4 @@ dependencies:
   - pip:
       - jax
       - blackjax
+      - -e .
diff --git a/notebooks/batch-examples.ipynb b/notebooks/batch-examples.ipynb
@@ -0,0 +1,388 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0a5841d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pytensor\n",
+    "import pytensor.tensor as pt\n",
+    "from pymc_extras.statespace.filters import StandardFilter\n",
+    "from tests.statespace.utilities.test_helpers import make_test_inputs\n",
+    "from pytensor.graph.replace import vectorize_graph\n",
+    "from importlib import reload\n",
+    "import pymc_extras.statespace.filters.distributions as pmss_dist\n",
+    "from pymc_extras.statespace.filters.distributions import SequenceMvNormal\n",
+    "import pymc as pm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "14299e50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed = sum(map(ord, \"batched-kf\"))\n",
+    "rng = np.random.default_rng(seed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "71bc513e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_batch_inputs(batch_size, p=1, m=5, r=1, n=10, rng=rng):\n",
+    "    \"\"\"\n",
+    "    Create batched inputs for testing.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    batch_size : int\n",
+    "        Number of batches to create\n",
+    "    p : int\n",
+    "        First dimension parameter\n",
+    "    m : int\n",
+    "        Second dimension parameter\n",
+    "    r : int\n",
+    "        Third dimension parameter\n",
+    "    n : int\n",
+    "        Fourth dimension parameter\n",
+    "    rng : numpy.random.Generator\n",
+    "        Random number generator\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    list\n",
+    "        List of stacked inputs for each batch\n",
+    "    \"\"\"\n",
+    "    # Create individual inputs for each batch\n",
+    "    np_batch_inputs = []\n",
+    "    for i in range(batch_size):\n",
+    "        inputs = make_test_inputs(p, m, r, n, rng)\n",
+    "        np_batch_inputs.append(inputs)\n",
+    "\n",
+    "    return [np.stack(x, axis=0) for x in zip(*np_batch_inputs)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0c1824cf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 10, 1)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create batch inputs with batch size 3\n",
+    "np_batch_inputs = create_batch_inputs(3)\n",
+    "np_batch_inputs[0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "773d4cb4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p, m, r, n = 1, 5, 1, 10\n",
+    "inputs = [pt.as_tensor(x).type() for x in make_test_inputs(p, m, r, n, rng)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "511de29f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kf = StandardFilter()\n",
+    "kf_outputs = kf.build_graph(*inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "33006d8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batched_inputs = [pt.tensor(shape=(None, *x.type.shape)) for x in inputs]\n",
+    "vec_subs = dict(zip(inputs, batched_inputs))\n",
+    "bacthed_kf_outputs = vectorize_graph(kf_outputs, vec_subs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "987a4647",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[filtered_states,\n",
+       " predicted_states,\n",
+       " observed_states,\n",
+       " filtered_covariances,\n",
+       " predicted_covariances,\n",
+       " observed_covariances,\n",
+       " loglike_obs]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "kf_outputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4b8be0f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mu = bacthed_kf_outputs[1]\n",
+    "cov = bacthed_kf_outputs[4]\n",
+    "logp = bacthed_kf_outputs[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1dc80f94",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(None, 10, 5)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mu.type.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "1262c7d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pmss_dist = reload(pmss_dist)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "2dcd3958",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mus_.type.shape: (None, 10, 5), covs_.type.shape: (None, 10, 5, 5)\n",
+      "mus.type.shape: (10, None, 5), covs.type.shape: (10, None, 5, 5)\n",
+      "mvn_seq.type.shape: (None, None, 5)\n",
+      "mvn_seq.type.shape: (None, 10, 5)\n",
+      "mvn_seq.type.shape: (None, 10, 5)\n",
+      "mvn_seq.type.shape: (None, 10, 5)\n",
+      "mus_.type.shape: (None, 10, 5), covs_.type.shape: (None, 10, 5, 5)\n",
+      "mus.type.shape: (10, None, 5), covs.type.shape: (10, None, 5, 5)\n",
+      "mvn_seq.type.shape: (None, None, 5)\n",
+      "mvn_seq.type.shape: (None, 10, 5)\n",
+      "mvn_seq.type.shape: (None, 10, 5)\n",
+      "mvn_seq.type.shape: (None, 10, 5)\n"
+     ]
+    }
+   ],
+   "source": [
+    "mv_outputs = pmss_dist.SequenceMvNormal.dist(mus=mu, covs=cov, logp=logp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "6f41344f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np_batch_inputs = create_batch_inputs(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "44905b8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np_batch_inputs[0] = rng.normal(size=(3, 10, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "34fe01b8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 10, 5)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "f_test = pytensor.function(batched_inputs, mv_outputs)\n",
+    "f_test(*np_batch_inputs).shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "f37efe79",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(None, 10, 1) (None, 10, 5) (None, 10, 5, 5)\n"
+     ]
+    }
+   ],
+   "source": [
+    "f_mv = pytensor.function(batched_inputs, pm.logp(mv_outputs, batched_inputs[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "7b45de74",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3, 10)"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "f_mv(*np_batch_inputs).shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f14596aa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "69519822",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = pytensor.function(batched_inputs, bacthed_kf_outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "3f745449",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "633 μs ± 18.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
+      "1.52 ms ± 35.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
+      "4.76 ms ± 259 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for s in [1, 3, 10]:\n",
+    "    np_batch_inputs = create_batch_inputs(s)\n",
+    "    %timeit outputs = f(*np_batch_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5fcadef",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c479ff22",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pymc-extras-test",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pymc_extras/statespace/filters/distributions.py b/pymc_extras/statespace/filters/distributions.py

-Original file line number
+Diff line change
   - pip:
       - jax
       - blackjax
 +      - -e .