minor notebook edit, incremented python min version

NDevanathan · NDevanathan · commit 27f33ea97cfc · 2025-10-15T15:12:42.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -34,6 +34,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+*/_version.py
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,5 +1,5 @@
 default_language_version:
-  python: "3.14"
+  python: "3.11"
 
 repos:
   - repo: local
diff --git a/notebooks/shapley_toy.py b/notebooks/shapley_toy.py
@@ -1,34 +1,24 @@
 import marimo
 
-__generated_with = "0.3.2"
+__generated_with = "0.17.0"
 app = marimo.App()
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        # LS-SPA Demonstration Notebook
-        """
-    )
+    mo.md(r"""# LS-SPA Demonstration Notebook""")
     return
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"### In this notebook, we use the data from the toy example in Section 2.5 of the paper \"Efficient Shapley Performance Attribution for Least-Squares Regression\" to demonstrate how Shapley values can be computed directly for a linear, least-squares model. We then demonstrate how LS-SPA can be used to generate the same Shapley attribution. In this specific case, we have a very small number of features, so it is feaible to compute the exact Shapley attribution. When the number of features exceeds 15, this is no longer the case. LS-SPA is able to accurately approximate Shapley attributions for linear least-squares models even when the number of features exceeds 1000."
-    )
+    mo.md(r"""### In this notebook, we use the data from the toy example in Section 2.5 of the paper "Efficient Shapley Performance Attribution for Least-Squares Regression" to demonstrate how Shapley values can be computed directly for a linear, least-squares model. We then demonstrate how LS-SPA can be used to generate the same Shapley attribution. In this specific case, we have a very small number of features, so it is feaible to compute the exact Shapley attribution. When the number of features exceeds 15, this is no longer the case. LS-SPA is able to accurately approximate Shapley attributions for linear least-squares models even when the number of features exceeds 1000.""")
     return
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ## Imports
-        """
-    )
+    mo.md(r"""## Imports""")
     return
 
 
@@ -41,17 +31,12 @@ def _():
     import matplotlib.pyplot as plt
 
     from ls_spa import ls_spa
-
-    return itertools, ls_spa, math, mo, np, plt
+    return itertools, ls_spa, math, mo, np
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ## Data loading
-        """
-    )
+    mo.md(r"""## Data loading""")
     return
 
 
@@ -60,45 +45,34 @@ def _():
     N = 50
     M = 50
     p = 3
-    return M, N, p
+    return (p,)
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ### The rows of $X$ correspond to observations and the columns of $X$ correspond to features. We fit a least-squares model on the training data `X_train` and `y_train` and evaluate its performance on the test data `X_test` and `y_test`.
-        """
-    )
+    mo.md(r"""### The rows of $X$ correspond to observations and the columns of $X$ correspond to features. We fit a least-squares model on the training data `X_train` and `y_train` and evaluate its performance on the test data `X_test` and `y_test`.""")
     return
 
 
 @app.cell
 def _(np):
-    X_train, X_test, y_train, y_test = (
-        np.load("./data/toy_data.npz")[key]
-        for key in ["X_train", "X_test", "y_train", "y_test"]
-    )
+    with np.load("./data/toy_data.npz") as data:
+        X_train = data["X_train"]
+        X_test = data["X_test"]
+        y_train = data["y_train"]
+        y_test = data["y_test"]
     return X_test, X_train, y_test, y_train
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ## Direct computation of lifts and $R^2$
-        """
-    )
+    mo.md(r"""## Direct computation of lifts and $R^2$""")
     return
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ### We compute the out-of-sample $R^2$ for a least-squares model fitted on each subset of our features.
-        """
-    )
+    mo.md(r"""### We compute the out-of-sample $R^2$ for a least-squares model fitted on each subset of our features.""")
     return
 
 
@@ -119,16 +93,12 @@ def _(X_test, X_train, itertools, np, p, y_test, y_train):
         ) / (np.linalg.norm(y_test) ** 2)
 
     R2 = np.around(R2, 2)
-    return R2, X_test_sel, X_train_sel, theta
+    return (R2,)
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ### For every ordering of our features, we remove one from our model and re-fit sequentially. For each feature, we consider the change in the $R^2$ of the model due to its addition/removal. For a single ordering, the vector of these performance differences due to each feature is a lift vector. The Shapley attribution of our model is the average of the lift vectors for every possible ordering of the features.
-        """
-    )
+    mo.md(r"""### For every ordering of our features, we remove one from our model and re-fit sequentially. For each feature, we consider the change in the $R^2$ of the model due to its addition/removal. For a single ordering, the vector of these performance differences due to each feature is a lift vector. The Shapley attribution of our model is the average of the lift vectors for every possible ordering of the features.""")
     return
 
 
@@ -146,16 +116,12 @@ def _(R2, itertools, math, np, p):
             perf = R2[*inds]
 
     attrs = np.around(np.mean(lifts, axis=0), 2)
-    return attrs, inds, lift, lifts, perf, perms
+    return attrs, lifts, perms
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ### We display the $R^2$ for the model fitted with each subset of the features, and we also display the lift vectors corresponding to each permutation of the features.
-        """
-    )
+    mo.md(r"""### We display the $R^2$ for the model fitted with each subset of the features, and we also display the lift vectors corresponding to each permutation of the features.""")
     return
 
 
@@ -169,7 +135,7 @@ def _(R2, itertools, p):
         S = "{" + "".join("{},".format(idx + 1) for idx in indices)[:-1] + "}"
 
         print("{: ^8}| {}".format(S, R2[*_mask]))
-    return S, indices
+    return
 
 
 @app.cell
@@ -179,26 +145,18 @@ def _(lifts, perms):
     for _i, _perm in enumerate(perms):
         pi = "(" + "".join("{},".format(_p + 1) for _p in _perm)[:-1] + ")"
         print("{: ^12}| {}".format(pi, lifts[_i]))
-    return (pi,)
+    return
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ## Comparison of true Shapley attribution and LS-SPA
-        """
-    )
+    mo.md(r"""## Comparison of true Shapley attribution and LS-SPA""")
     return
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        r"""
-        ### We use LS-SPA to estimate (in one line) the Shapley attribution we computed exactly. We show both for comparison.
-        """
-    )
+    mo.md(r"""### We use LS-SPA to estimate (in one line) the Shapley attribution we computed exactly. We show both for comparison.""")
     return
 
 
@@ -208,16 +166,12 @@ def _(X_test, X_train, attrs, ls_spa, np, y_test, y_train):
     ls_spa_attrs = np.around(np.array(results.attribution), 2)
     print("Explicit Shapley attribution: {}".format(attrs))
     print("LS-SPA Shapley attribution:   {}".format(ls_spa_attrs))
-    return ls_spa_attrs, results
+    return (results,)
 
 
 @app.cell
 def _(mo):
-    mo.md(
-        """
-        ### We can also print the ShapleyResults object returned by `ls_spa` to see a useful dashboard about the computed Shapley attribution.
-        """
-    )
+    mo.md("""### We can also print the ShapleyResults object returned by `ls_spa` to see a useful dashboard about the computed Shapley attribution.""")
     return
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,13 +6,13 @@ authors = [
     { name = "Logan Bell", email = "20belllemail@gmail.com" },
     { name = "Nikhil Devanathan", email = "nikhil.devanathan@gmail.com" },
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 readme = "README.md"
 license = "Apache-2.0"
 dependencies = [
-    "numpy>=2.1.0,<3",
-    "scipy>=1.14.1,<2",
-    "pandas>=2.2.2,<3",
+    "numpy>=2.3.4,<3",
+    "scipy>=1.16.2,<2",
+    "pandas>=2.3.3,<3",
 ]
 
 [dependency-groups]
@@ -35,6 +35,9 @@ build-backend = "hatchling.build"
 [tool.hatch.version]
 source = "vcs"
 
+[tool.hatch.build.hooks.vcs]
+version-file = "ls_spa/_version.py"
+
 [tool.hatch.build.targets.wheel]
 packages = ["ls_spa"]
 
@@ -66,7 +69,7 @@ skip_covered = true
 # ---------------
 
 [tool.ruff]
-target-version = "py313"
+target-version = "py311"
 line-length = 100
 extend-exclude = [
     ".git",
@@ -79,6 +82,7 @@ extend-exclude = [
     "__pycache__",
     "build",
     "dist",
+    "_version.py",
 ]
 fix = true
 show-fixes = true
diff --git a/uv.lock b/uv.lock