Fix remaining docs execution and update tutorial

saulshanabrook · saulshanabrook · commit 697539983d7b · 2025-11-02T23:02:19.000-08:00
diff --git a/docs/conf.py b/docs/conf.py
@@ -166,11 +166,6 @@
 
 # Exclude (POSIX) glob patterns for notebooks
 # Temporarily exclude notebooks with unrelated errors (not @egraph.class_ issues)
-nb_execution_excludepatterns = (
-    "explanation/2024_03_17_community_talk.ipynb",  # sklearn config error
-    "explanation/indexing_pushdown.ipynb",  # array_api_module NameError
-)
-
 # Execution timeout (seconds)
 nb_execution_timeout = 60 * 10
 
diff --git a/docs/explanation/2024_03_17_community_talk.ipynb b/docs/explanation/2024_03_17_community_talk.ipynb
@@ -96,20 +96,26 @@
    "source": [
     "from __future__ import annotations\n",
     "\n",
+    "import os\n",
+    "import numpy as np\n",
+    "\n",
+    "# Ensure SciPy array API support is enabled before importing sklearn/scipy\n",
+    "os.environ.setdefault(\"SCIPY_ARRAY_API\", \"1\")\n",
+    "\n",
     "import sklearn\n",
     "from sklearn.datasets import make_classification\n",
     "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
     "\n",
     "# Tell sklearn to treat arrays as following array API\n",
     "sklearn.set_config(array_api_dispatch=True)\n",
     "\n",
-    "X_np, y_np = make_classification(random_state=0, n_samples=1000000)\n",
+    "X_np, y_np = make_classification(random_state=0, n_samples=10000)\n",
     "\n",
     "\n",
     "# Assumption: I want to optimize calling this many times on data similar to that above\n",
     "def run_lda(x, y):\n",
     "    lda = LinearDiscriminantAnalysis()\n",
-    "    return lda.fit(x, y).transform(x)"
+    "    return lda.fit(x, y).transform(x)\n"
    ]
   },
   {
@@ -831,7 +837,7 @@
     "        egraph = EGraph()\n",
     "        egraph.register(self)\n",
     "        egraph.run(bool_rewrites.saturate())\n",
-    "        return egraph.eval(self.bool)\n",
+    "        return egraph.extract(self.bool).value\n",
     "\n",
     "\n",
     "x = var(\"x\", Boolean)\n",
@@ -1392,7 +1398,11 @@
    "source": [
     "from egglog.exp.array_api_numba import array_api_numba_schedule\n",
     "\n",
-    "simplified_res = EGraph().simplify(res, array_api_numba_schedule)\n",
+    "with EGraph() as egraph:\n",
+    "    egraph.register(res)\n",
+    "    egraph.run(array_api_numba_schedule)\n",
+    "    simplified_res = egraph.extract(res)\n",
+    "\n",
     "simplified_res"
    ]
   },
@@ -1411,9 +1421,7 @@
    "source": [
     "Now that we have a program, what do we do with it?\n",
     "\n",
-    "Well we showed how we can use eager evaluation to get a result, but what if we don't want to do the computation in egglog, but instead export a program so we can execute that back in Python or in this case feed it to Python?\n",
-    "\n",
-    "Well in this case we have designed a `Program` object which we can use to convert a funtional egglog expression back to imperative Python code:\n"
+    "Previously this tutorial emitted runnable Python code using the experimental program generation APIs. Those APIs are in flux, so for now we'll skip directly emitting source and focus on the symbolic optimizations above.\n"
    ]
   },
   {
@@ -1433,137 +1441,14 @@
     }
    ],
    "source": [
-    "from egglog.exp.array_api_program_gen import *\n",
-    "\n",
-    "egraph = EGraph()\n",
-    "fn_program = egraph.let(\n",
-    "    \"fn_program\",\n",
-    "    ndarray_function_two(simplified_res, NDArray.var(\"X\"), NDArray.var(\"y\")),\n",
-    ")\n",
-    "egraph.run(array_api_program_gen_schedule)\n",
-    "fn = egraph.eval(fn_program.py_object)\n",
-    "\n",
-    "fn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "def __fn(X, y):\n",
-      "    assert X.dtype == np.dtype(np.float64)\n",
-      "    assert X.shape == (1000000, 20,)\n",
-      "    assert np.all(np.isfinite(X))\n",
-      "    assert y.dtype == np.dtype(np.int64)\n",
-      "    assert y.shape == (1000000,)\n",
-      "    assert set(np.unique(y)) == set((0, 1,))\n",
-      "    _0 = y == np.array(0)\n",
-      "    _1 = np.sum(_0)\n",
-      "    _2 = y == np.array(1)\n",
-      "    _3 = np.sum(_2)\n",
-      "    _4 = np.array((_1, _3,)).astype(np.dtype(np.float64))\n",
-      "    _5 = _4 / np.array(1000000.0)\n",
-      "    _6 = np.zeros((2, 20,), dtype=np.dtype(np.float64))\n",
-      "    _7 = np.sum(X[_0], axis=0)\n",
-      "    _8 = _7 / np.array(X[_0].shape[0])\n",
-      "    _6[0, :] = _8\n",
-      "    _9 = np.sum(X[_2], axis=0)\n",
-      "    _10 = _9 / np.array(X[_2].shape[0])\n",
-      "    _6[1, :] = _10\n",
-      "    _11 = _5 @ _6\n",
-      "    _12 = X - _11\n",
-      "    _13 = np.sqrt(np.array(float(1 / 999998)))\n",
-      "    _14 = X[_0] - _6[0, :]\n",
-      "    _15 = X[_2] - _6[1, :]\n",
-      "    _16 = np.concatenate((_14, _15,), axis=0)\n",
-      "    _17 = np.sum(_16, axis=0)\n",
-      "    _18 = _17 / np.array(_16.shape[0])\n",
-      "    _19 = np.expand_dims(_18, 0)\n",
-      "    _20 = _16 - _19\n",
-      "    _21 = np.square(_20)\n",
-      "    _22 = np.sum(_21, axis=0)\n",
-      "    _23 = _22 / np.array(_21.shape[0])\n",
-      "    _24 = np.sqrt(_23)\n",
-      "    _25 = _24 == np.array(0)\n",
-      "    _24[_25] = np.array(1.0)\n",
-      "    _26 = _16 / _24\n",
-      "    _27 = _13 * _26\n",
-      "    _28 = np.linalg.svd(_27, full_matrices=False)\n",
-      "    _29 = _28[1] > np.array(0.0001)\n",
-      "    _30 = _29.astype(np.dtype(np.int32))\n",
-      "    _31 = np.sum(_30)\n",
-      "    _32 = _28[2][:_31, :] / _24\n",
-      "    _33 = _32.T / _28[1][:_31]\n",
-      "    _34 = np.array(1000000) * _5\n",
-      "    _35 = _34 * np.array(1.0)\n",
-      "    _36 = np.sqrt(_35)\n",
-      "    _37 = _6 - _11\n",
-      "    _38 = _36 * _37.T\n",
-      "    _39 = _38.T @ _33\n",
-      "    _40 = np.linalg.svd(_39, full_matrices=False)\n",
-      "    _41 = np.array(0.0001) * _40[1][0]\n",
-      "    _42 = _40[1] > _41\n",
-      "    _43 = _42.astype(np.dtype(np.int32))\n",
-      "    _44 = np.sum(_43)\n",
-      "    _45 = _33 @ _40[2].T[:, :_44]\n",
-      "    _46 = _12 @ _45\n",
-      "    return _46[:, :1]\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "import inspect\n",
-    "\n",
-    "print(inspect.getsource(fn))"
+    "print(\"Program generation to Python source is temporarily disabled in this tutorial example.\")\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "From there we can complete our work, by optimizing with numba and we can call with our original values:\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/xn/05ktz3056kqd9n8frgd6236h0000gn/T/egglog-9b40af4a-3b8a-4996-a78a-fd6284dbf541.py:56: NumbaPerformanceWarning: '@' is faster on contiguous arrays, called on (Array(float64, 2, 'C', False, aligned=True), Array(float64, 2, 'A', False, aligned=True))\n",
-      "  _45 = _33 @ _40[2].T[:, :_44]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "array([[ 0.64233002],\n",
-       "       [ 0.63661245],\n",
-       "       [-1.603293  ],\n",
-       "       ...,\n",
-       "       [-1.1506433 ],\n",
-       "       [ 0.71687176],\n",
-       "       [-1.51119579]])"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from numba import njit\n",
-    "\n",
-    "njit(fn)(X_np, y_np)"
+    "With the direct code emission skipped, you can still use the symbolic results above or plug them into your own pipelines.\n"
    ]
   },
   {
@@ -1623,7 +1508,7 @@
     "egraph = EGraph()\n",
     "egraph.register(fn.compile())\n",
     "egraph.run(program_gen_ruleset.saturate())\n",
-    "print(egraph.eval(fn.statements))"
+    "print(egraph.extract(fn.statements).value)"
    ]
   },
   {
diff --git a/docs/explanation/indexing_pushdown.ipynb b/docs/explanation/indexing_pushdown.ipynb
@@ -257,7 +257,7 @@
     "\n",
     "from egglog.exp.array_api import *\n",
     "\n",
-    "egraph = EGraph([array_api_module])\n",
+    "egraph = EGraph()\n",
     "\n",
     "\n",
     "@egraph.register\n",
@@ -267,6 +267,7 @@
     "\n",
     "res = abs(NDArray.var(\"x\"))[NDArray.var(\"idx\")]\n",
     "egraph.register(res)\n",
+    "egraph.run(array_api_schedule)\n",
     "egraph.run(100)\n",
     "egraph.display()\n",
     "\n",
@@ -720,7 +721,7 @@
     }
    ],
    "source": [
-    "egraph = EGraph([array_api_module])\n",
+    "egraph = EGraph()\n",
     "\n",
     "\n",
     "@function(cost=0)\n",
@@ -758,6 +759,7 @@
     "\n",
     "\n",
     "egraph.register(res.shape, res.dtype, res.index(an_index()))\n",
+    "egraph.run(array_api_schedule)\n",
     "egraph.run(100)\n",
     "egraph.display()\n",
     "\n",
@@ -807,4 +809,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}