[DOC] clean up examples (#427)

Remi-Gau · lionelkusch · web-flow · commit 15c7691c2d7b · 2025-09-17T15:01:37.000+02:00
* print intermerdiate results

* add horizontal spaces

* clean

* Update examples/plot_conditional_vs_marginal_xor_data.py

* Update examples/plot_importance_classification_iris.py

---------

Co-authored-by: lionel kusch &lt;lionel.kusch@grenoble-inp.org&gt;
diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -19,8 +19,8 @@
 from hidimstat import CFI
 
 # %%
-# To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel. The decision function of
-# the fitted model shows that the model is able to separate the two classes.
+# To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel.
+#
 rng = np.random.RandomState(0)
 X = rng.randn(400, 2)
 Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0).astype(int)
@@ -38,12 +38,16 @@
 )
 model = SVC(kernel="rbf", random_state=0)
 model.fit(X_train, y_train)
-Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
 
 
 # %%
 # Visualizing the decision function of the SVC
 # --------------------------------------------
+# Let's plot the decision function of the fitted model
+# to confirm that the model is able to separate the two classes.
+
+Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
+
 fig, ax = plt.subplots()
 ax.imshow(
     Z.reshape(xx.shape),
@@ -86,7 +90,9 @@
 # :math:`Y \perp\!\!\!\perp X^1 | X^2`).
 cv = KFold(n_splits=5, shuffle=True, random_state=0)
 clf = SVC(kernel="rbf", random_state=0)
-# Compute marginal importance using univariate models
+
+# %%
+# Compute marginal importance using univariate models.
 marginal_scores = []
 for i in range(X.shape[1]):
     feat_scores = []
@@ -101,10 +107,11 @@
         univariate_model.fit(X_train_univariate, y_train)
 
         feat_scores.append(univariate_model.score(X_test_univariate, y_test))
+
     marginal_scores.append(feat_scores)
 
 # %%
-
+# Compute the conditional importance using the CFI class.
 importances = []
 for i, (train_index, test_index) in enumerate(cv.split(X)):
     X_train, X_test = X[train_index], X[test_index]
@@ -132,6 +139,7 @@
 # ---------------------------------
 # We will use boxplots to visualize the distribution of the importance scores.
 fig, axes = plt.subplots(1, 2, sharey=True, figsize=(6, 2.5))
+
 # Marginal scores boxplot
 sns.boxplot(
     data=np.array(marginal_scores).T,
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
@@ -59,6 +59,7 @@
 # -------------------------
 diabetes = load_diabetes()
 X, y = diabetes.data, diabetes.target
+
 # Encode sex as binary
 X[:, 1] = (X[:, 1] > 0.0).astype(int)
 
@@ -81,8 +82,8 @@
         y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
     )
 
-    print(f"Fold {i}: {score}")
-    print(f"Fold {i}: {mse}")
+    print(f"Fold {i}: {score=}")
+    print(f"Fold {i}: {mse=}")
 
 # %%
 # Fit a baselien model on the diabetes dataset
@@ -103,8 +104,8 @@
         y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
     )
 
-    print(f"Fold {i}: {score}")
-    print(f"Fold {i}: {mse}")
+    print(f"Fold {i}: {score=}")
+    print(f"Fold {i}: {mse=}")
 
 # %%
 # Measure the importance of variables using the CFI method
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
@@ -128,10 +128,12 @@ def preprocess_haxby(subject=2, memory=None):
 # For fMRI data taking 500 clusters is generally a good default choice.
 
 n_clusters = 500
+
 # Deriving voxels connectivity.
 shape = mask.shape
 n_x, n_y, n_z = shape[0], shape[1], shape[2]
 connectivity = image.grid_to_graph(n_x=n_x, n_y=n_y, n_z=n_z, mask=mask)
+
 # Initializing FeatureAgglomeration object.
 ward = FeatureAgglomeration(n_clusters=n_clusters, connectivity=connectivity)
 
diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
@@ -11,13 +11,13 @@
 
 To briefly summarize the two methods:
 
- - PFI (Permutation Feature Importance) shuffles the values of a feature and measures
-   the increase in the loss when predicting (using om the same full model) on the
-   shuffled data.
+- PFI (Permutation Feature Importance) shuffles the values of a feature and measures
+  the increase in the loss when predicting (using om the same full model) on the
+  shuffled data.
 
- - CFI (Conditional Feature Importance) is a conditional version of PFI that
-   preserves the conditional distribution of the feature. It introduces a second model to
-   estimate this conditional distribution.
+- CFI (Conditional Feature Importance) is a conditional version of PFI that
+  preserves the conditional distribution of the feature. It introduces a second model to
+  estimate this conditional distribution.
 
 """
 
@@ -43,6 +43,7 @@
 # the petal length, width amd some noise but not related to the target. The spurious feature
 # allows to illustrate that `PFI` is not robust to spurious features,
 # contrarily to `CFI`.
+
 dataset = load_iris()
 rng = np.random.RandomState(0)
 X, y = dataset.data, dataset.target
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
@@ -73,13 +73,15 @@
 d0crt_linear = D0CRT(estimator=clone(linear_model), screening_threshold=None)
 d0crt_linear.fit_importance(X, y)
 pval_dcrt_linear = d0crt_linear.pvalues_
+print(f"{pval_dcrt_linear=}")
 
 d0crt_non_linear = D0CRT(
     estimator=clone(non_linear_model),
     screening_threshold=None,
 )
 d0crt_non_linear.fit_importance(X, y)
 pval_dcrt_non_linear = d0crt_non_linear.pvalues_
+print(f"{pval_dcrt_non_linear=}")
 
 # %%
 # Compute p-values using LOCO
@@ -136,6 +138,14 @@
     importances_non_linear, 0, axis=0, alternative="greater"
 )
 
+print(f"{pval_linear=}")
+print(f"{pval_non_linear=}")
+
+
+#################################################################################
+# Plot the :math:`-log_{10}(pval)` for each method and variable
+# -------------------------------------------------------------
+
 df_pval = pd.DataFrame(
     {
         "pval": np.hstack(