Skip to content

Commit 15c7691

Browse files
[DOC] clean up examples (#427)
* print intermerdiate results * add horizontal spaces * clean * Update examples/plot_conditional_vs_marginal_xor_data.py * Update examples/plot_importance_classification_iris.py --------- Co-authored-by: lionel kusch <[email protected]>
1 parent eb6f855 commit 15c7691

File tree

5 files changed

+37
-15
lines changed

5 files changed

+37
-15
lines changed

examples/plot_conditional_vs_marginal_xor_data.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
from hidimstat import CFI
2020

2121
# %%
22-
# To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel. The decision function of
23-
# the fitted model shows that the model is able to separate the two classes.
22+
# To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel.
23+
#
2424
rng = np.random.RandomState(0)
2525
X = rng.randn(400, 2)
2626
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0).astype(int)
@@ -38,12 +38,16 @@
3838
)
3939
model = SVC(kernel="rbf", random_state=0)
4040
model.fit(X_train, y_train)
41-
Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
4241

4342

4443
# %%
4544
# Visualizing the decision function of the SVC
4645
# --------------------------------------------
46+
# Let's plot the decision function of the fitted model
47+
# to confirm that the model is able to separate the two classes.
48+
49+
Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
50+
4751
fig, ax = plt.subplots()
4852
ax.imshow(
4953
Z.reshape(xx.shape),
@@ -86,7 +90,9 @@
8690
# :math:`Y \perp\!\!\!\perp X^1 | X^2`).
8791
cv = KFold(n_splits=5, shuffle=True, random_state=0)
8892
clf = SVC(kernel="rbf", random_state=0)
89-
# Compute marginal importance using univariate models
93+
94+
# %%
95+
# Compute marginal importance using univariate models.
9096
marginal_scores = []
9197
for i in range(X.shape[1]):
9298
feat_scores = []
@@ -101,10 +107,11 @@
101107
univariate_model.fit(X_train_univariate, y_train)
102108

103109
feat_scores.append(univariate_model.score(X_test_univariate, y_test))
110+
104111
marginal_scores.append(feat_scores)
105112

106113
# %%
107-
114+
# Compute the conditional importance using the CFI class.
108115
importances = []
109116
for i, (train_index, test_index) in enumerate(cv.split(X)):
110117
X_train, X_test = X[train_index], X[test_index]
@@ -132,6 +139,7 @@
132139
# ---------------------------------
133140
# We will use boxplots to visualize the distribution of the importance scores.
134141
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(6, 2.5))
142+
135143
# Marginal scores boxplot
136144
sns.boxplot(
137145
data=np.array(marginal_scores).T,

examples/plot_diabetes_variable_importance_example.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
# -------------------------
6060
diabetes = load_diabetes()
6161
X, y = diabetes.data, diabetes.target
62+
6263
# Encode sex as binary
6364
X[:, 1] = (X[:, 1] > 0.0).astype(int)
6465

@@ -81,8 +82,8 @@
8182
y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
8283
)
8384

84-
print(f"Fold {i}: {score}")
85-
print(f"Fold {i}: {mse}")
85+
print(f"Fold {i}: {score=}")
86+
print(f"Fold {i}: {mse=}")
8687

8788
# %%
8889
# Fit a baselien model on the diabetes dataset
@@ -103,8 +104,8 @@
103104
y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
104105
)
105106

106-
print(f"Fold {i}: {score}")
107-
print(f"Fold {i}: {mse}")
107+
print(f"Fold {i}: {score=}")
108+
print(f"Fold {i}: {mse=}")
108109

109110
# %%
110111
# Measure the importance of variables using the CFI method

examples/plot_fmri_data_example.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,12 @@ def preprocess_haxby(subject=2, memory=None):
128128
# For fMRI data taking 500 clusters is generally a good default choice.
129129

130130
n_clusters = 500
131+
131132
# Deriving voxels connectivity.
132133
shape = mask.shape
133134
n_x, n_y, n_z = shape[0], shape[1], shape[2]
134135
connectivity = image.grid_to_graph(n_x=n_x, n_y=n_y, n_z=n_z, mask=mask)
136+
135137
# Initializing FeatureAgglomeration object.
136138
ward = FeatureAgglomeration(n_clusters=n_clusters, connectivity=connectivity)
137139

examples/plot_importance_classification_iris.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111
1212
To briefly summarize the two methods:
1313
14-
- PFI (Permutation Feature Importance) shuffles the values of a feature and measures
15-
the increase in the loss when predicting (using om the same full model) on the
16-
shuffled data.
14+
- PFI (Permutation Feature Importance) shuffles the values of a feature and measures
15+
the increase in the loss when predicting (using om the same full model) on the
16+
shuffled data.
1717
18-
- CFI (Conditional Feature Importance) is a conditional version of PFI that
19-
preserves the conditional distribution of the feature. It introduces a second model to
20-
estimate this conditional distribution.
18+
- CFI (Conditional Feature Importance) is a conditional version of PFI that
19+
preserves the conditional distribution of the feature. It introduces a second model to
20+
estimate this conditional distribution.
2121
2222
"""
2323

@@ -43,6 +43,7 @@
4343
# the petal length, width amd some noise but not related to the target. The spurious feature
4444
# allows to illustrate that `PFI` is not robust to spurious features,
4545
# contrarily to `CFI`.
46+
4647
dataset = load_iris()
4748
rng = np.random.RandomState(0)
4849
X, y = dataset.data, dataset.target

examples/plot_model_agnostic_importance.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,15 @@
7373
d0crt_linear = D0CRT(estimator=clone(linear_model), screening_threshold=None)
7474
d0crt_linear.fit_importance(X, y)
7575
pval_dcrt_linear = d0crt_linear.pvalues_
76+
print(f"{pval_dcrt_linear=}")
7677

7778
d0crt_non_linear = D0CRT(
7879
estimator=clone(non_linear_model),
7980
screening_threshold=None,
8081
)
8182
d0crt_non_linear.fit_importance(X, y)
8283
pval_dcrt_non_linear = d0crt_non_linear.pvalues_
84+
print(f"{pval_dcrt_non_linear=}")
8385

8486
# %%
8587
# Compute p-values using LOCO
@@ -136,6 +138,14 @@
136138
importances_non_linear, 0, axis=0, alternative="greater"
137139
)
138140

141+
print(f"{pval_linear=}")
142+
print(f"{pval_non_linear=}")
143+
144+
145+
#################################################################################
146+
# Plot the :math:`-log_{10}(pval)` for each method and variable
147+
# -------------------------------------------------------------
148+
139149
df_pval = pd.DataFrame(
140150
{
141151
"pval": np.hstack(

0 commit comments

Comments
 (0)