Skip to content

Commit 27f33ea

Browse files
committed
minor notebook edit, incremented python min version
1 parent e53425b commit 27f33ea

File tree

5 files changed

+49
-557
lines changed

5 files changed

+49
-557
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ share/python-wheels/
3434
.installed.cfg
3535
*.egg
3636
MANIFEST
37+
*/_version.py
3738

3839
# PyInstaller
3940
# Usually these files are written by a python script from a template

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
default_language_version:
2-
python: "3.14"
2+
python: "3.11"
33

44
repos:
55
- repo: local

notebooks/shapley_toy.py

Lines changed: 25 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,24 @@
11
import marimo
22

3-
__generated_with = "0.3.2"
3+
__generated_with = "0.17.0"
44
app = marimo.App()
55

66

77
@app.cell
88
def _(mo):
9-
mo.md(
10-
r"""
11-
# LS-SPA Demonstration Notebook
12-
"""
13-
)
9+
mo.md(r"""# LS-SPA Demonstration Notebook""")
1410
return
1511

1612

1713
@app.cell
1814
def _(mo):
19-
mo.md(
20-
r"### In this notebook, we use the data from the toy example in Section 2.5 of the paper \"Efficient Shapley Performance Attribution for Least-Squares Regression\" to demonstrate how Shapley values can be computed directly for a linear, least-squares model. We then demonstrate how LS-SPA can be used to generate the same Shapley attribution. In this specific case, we have a very small number of features, so it is feaible to compute the exact Shapley attribution. When the number of features exceeds 15, this is no longer the case. LS-SPA is able to accurately approximate Shapley attributions for linear least-squares models even when the number of features exceeds 1000."
21-
)
15+
mo.md(r"""### In this notebook, we use the data from the toy example in Section 2.5 of the paper "Efficient Shapley Performance Attribution for Least-Squares Regression" to demonstrate how Shapley values can be computed directly for a linear, least-squares model. We then demonstrate how LS-SPA can be used to generate the same Shapley attribution. In this specific case, we have a very small number of features, so it is feaible to compute the exact Shapley attribution. When the number of features exceeds 15, this is no longer the case. LS-SPA is able to accurately approximate Shapley attributions for linear least-squares models even when the number of features exceeds 1000.""")
2216
return
2317

2418

2519
@app.cell
2620
def _(mo):
27-
mo.md(
28-
r"""
29-
## Imports
30-
"""
31-
)
21+
mo.md(r"""## Imports""")
3222
return
3323

3424

@@ -41,17 +31,12 @@ def _():
4131
import matplotlib.pyplot as plt
4232

4333
from ls_spa import ls_spa
44-
45-
return itertools, ls_spa, math, mo, np, plt
34+
return itertools, ls_spa, math, mo, np
4635

4736

4837
@app.cell
4938
def _(mo):
50-
mo.md(
51-
r"""
52-
## Data loading
53-
"""
54-
)
39+
mo.md(r"""## Data loading""")
5540
return
5641

5742

@@ -60,45 +45,34 @@ def _():
6045
N = 50
6146
M = 50
6247
p = 3
63-
return M, N, p
48+
return (p,)
6449

6550

6651
@app.cell
6752
def _(mo):
68-
mo.md(
69-
r"""
70-
### The rows of $X$ correspond to observations and the columns of $X$ correspond to features. We fit a least-squares model on the training data `X_train` and `y_train` and evaluate its performance on the test data `X_test` and `y_test`.
71-
"""
72-
)
53+
mo.md(r"""### The rows of $X$ correspond to observations and the columns of $X$ correspond to features. We fit a least-squares model on the training data `X_train` and `y_train` and evaluate its performance on the test data `X_test` and `y_test`.""")
7354
return
7455

7556

7657
@app.cell
7758
def _(np):
78-
X_train, X_test, y_train, y_test = (
79-
np.load("./data/toy_data.npz")[key]
80-
for key in ["X_train", "X_test", "y_train", "y_test"]
81-
)
59+
with np.load("./data/toy_data.npz") as data:
60+
X_train = data["X_train"]
61+
X_test = data["X_test"]
62+
y_train = data["y_train"]
63+
y_test = data["y_test"]
8264
return X_test, X_train, y_test, y_train
8365

8466

8567
@app.cell
8668
def _(mo):
87-
mo.md(
88-
r"""
89-
## Direct computation of lifts and $R^2$
90-
"""
91-
)
69+
mo.md(r"""## Direct computation of lifts and $R^2$""")
9270
return
9371

9472

9573
@app.cell
9674
def _(mo):
97-
mo.md(
98-
r"""
99-
### We compute the out-of-sample $R^2$ for a least-squares model fitted on each subset of our features.
100-
"""
101-
)
75+
mo.md(r"""### We compute the out-of-sample $R^2$ for a least-squares model fitted on each subset of our features.""")
10276
return
10377

10478

@@ -119,16 +93,12 @@ def _(X_test, X_train, itertools, np, p, y_test, y_train):
11993
) / (np.linalg.norm(y_test) ** 2)
12094

12195
R2 = np.around(R2, 2)
122-
return R2, X_test_sel, X_train_sel, theta
96+
return (R2,)
12397

12498

12599
@app.cell
126100
def _(mo):
127-
mo.md(
128-
r"""
129-
### For every ordering of our features, we remove one from our model and re-fit sequentially. For each feature, we consider the change in the $R^2$ of the model due to its addition/removal. For a single ordering, the vector of these performance differences due to each feature is a lift vector. The Shapley attribution of our model is the average of the lift vectors for every possible ordering of the features.
130-
"""
131-
)
101+
mo.md(r"""### For every ordering of our features, we remove one from our model and re-fit sequentially. For each feature, we consider the change in the $R^2$ of the model due to its addition/removal. For a single ordering, the vector of these performance differences due to each feature is a lift vector. The Shapley attribution of our model is the average of the lift vectors for every possible ordering of the features.""")
132102
return
133103

134104

@@ -146,16 +116,12 @@ def _(R2, itertools, math, np, p):
146116
perf = R2[*inds]
147117

148118
attrs = np.around(np.mean(lifts, axis=0), 2)
149-
return attrs, inds, lift, lifts, perf, perms
119+
return attrs, lifts, perms
150120

151121

152122
@app.cell
153123
def _(mo):
154-
mo.md(
155-
r"""
156-
### We display the $R^2$ for the model fitted with each subset of the features, and we also display the lift vectors corresponding to each permutation of the features.
157-
"""
158-
)
124+
mo.md(r"""### We display the $R^2$ for the model fitted with each subset of the features, and we also display the lift vectors corresponding to each permutation of the features.""")
159125
return
160126

161127

@@ -169,7 +135,7 @@ def _(R2, itertools, p):
169135
S = "{" + "".join("{},".format(idx + 1) for idx in indices)[:-1] + "}"
170136

171137
print("{: ^8}| {}".format(S, R2[*_mask]))
172-
return S, indices
138+
return
173139

174140

175141
@app.cell
@@ -179,26 +145,18 @@ def _(lifts, perms):
179145
for _i, _perm in enumerate(perms):
180146
pi = "(" + "".join("{},".format(_p + 1) for _p in _perm)[:-1] + ")"
181147
print("{: ^12}| {}".format(pi, lifts[_i]))
182-
return (pi,)
148+
return
183149

184150

185151
@app.cell
186152
def _(mo):
187-
mo.md(
188-
r"""
189-
## Comparison of true Shapley attribution and LS-SPA
190-
"""
191-
)
153+
mo.md(r"""## Comparison of true Shapley attribution and LS-SPA""")
192154
return
193155

194156

195157
@app.cell
196158
def _(mo):
197-
mo.md(
198-
r"""
199-
### We use LS-SPA to estimate (in one line) the Shapley attribution we computed exactly. We show both for comparison.
200-
"""
201-
)
159+
mo.md(r"""### We use LS-SPA to estimate (in one line) the Shapley attribution we computed exactly. We show both for comparison.""")
202160
return
203161

204162

@@ -208,16 +166,12 @@ def _(X_test, X_train, attrs, ls_spa, np, y_test, y_train):
208166
ls_spa_attrs = np.around(np.array(results.attribution), 2)
209167
print("Explicit Shapley attribution: {}".format(attrs))
210168
print("LS-SPA Shapley attribution: {}".format(ls_spa_attrs))
211-
return ls_spa_attrs, results
169+
return (results,)
212170

213171

214172
@app.cell
215173
def _(mo):
216-
mo.md(
217-
"""
218-
### We can also print the ShapleyResults object returned by `ls_spa` to see a useful dashboard about the computed Shapley attribution.
219-
"""
220-
)
174+
mo.md("""### We can also print the ShapleyResults object returned by `ls_spa` to see a useful dashboard about the computed Shapley attribution.""")
221175
return
222176

223177

pyproject.toml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ authors = [
66
{ name = "Logan Bell", email = "[email protected]" },
77
{ name = "Nikhil Devanathan", email = "[email protected]" },
88
]
9-
requires-python = ">=3.10"
9+
requires-python = ">=3.11"
1010
readme = "README.md"
1111
license = "Apache-2.0"
1212
dependencies = [
13-
"numpy>=2.1.0,<3",
14-
"scipy>=1.14.1,<2",
15-
"pandas>=2.2.2,<3",
13+
"numpy>=2.3.4,<3",
14+
"scipy>=1.16.2,<2",
15+
"pandas>=2.3.3,<3",
1616
]
1717

1818
[dependency-groups]
@@ -35,6 +35,9 @@ build-backend = "hatchling.build"
3535
[tool.hatch.version]
3636
source = "vcs"
3737

38+
[tool.hatch.build.hooks.vcs]
39+
version-file = "ls_spa/_version.py"
40+
3841
[tool.hatch.build.targets.wheel]
3942
packages = ["ls_spa"]
4043

@@ -66,7 +69,7 @@ skip_covered = true
6669
# ---------------
6770

6871
[tool.ruff]
69-
target-version = "py313"
72+
target-version = "py311"
7073
line-length = 100
7174
extend-exclude = [
7275
".git",
@@ -79,6 +82,7 @@ extend-exclude = [
7982
"__pycache__",
8083
"build",
8184
"dist",
85+
"_version.py",
8286
]
8387
fix = true
8488
show-fixes = true

0 commit comments

Comments
 (0)