|
| 1 | +""" |
| 2 | +================= |
| 3 | +Affine Invariance |
| 4 | +================= |
| 5 | +
|
| 6 | +.. currentmodule:: fastcan |
| 7 | +
|
| 8 | +In this examples, we will compare the robustness of the three feature |
| 9 | +selection methods on affine transformed features. |
| 10 | +""" |
| 11 | + |
| 12 | +# Authors: Sikai Zhang |
| 13 | +# SPDX-License-Identifier: MIT |
| 14 | + |
| 15 | +# %% |
| 16 | +# Initialize test |
| 17 | +# --------------- |
| 18 | +# The three feature selection methods, i.e., OMP, OLS, and :class:`FastCan`, |
| 19 | +# will select three features from the 10 features of `diabetes` dataset. It can be |
| 20 | +# seen, the three methods select the same features. |
| 21 | + |
| 22 | +import numpy as np |
| 23 | +from sklearn.datasets import load_diabetes |
| 24 | +from sklearn.linear_model import OrthogonalMatchingPursuit |
| 25 | + |
| 26 | +from fastcan import FastCan, ols |
| 27 | + |
| 28 | +X, y = load_diabetes(return_X_y=True) |
| 29 | + |
| 30 | +n_selected = 3 |
| 31 | +omp_selector = OrthogonalMatchingPursuit(n_nonzero_coefs=n_selected) |
| 32 | +fastcan_selector = FastCan(n_features_to_select=n_selected, verbose=0) |
| 33 | +(ids_omp,) = omp_selector.fit(X, y).coef_.nonzero() |
| 34 | +ids_ols, _ = ols(X, y, n_selected) |
| 35 | +ids_fastcan = fastcan_selector.fit(X, y).indices_ |
| 36 | + |
| 37 | +print("Indices of features selected by:") |
| 38 | +print("OMP: ", np.sort(ids_omp)) |
| 39 | +print("OLS: ", np.sort(ids_ols)) |
| 40 | +print("FastCan: ", np.sort(ids_fastcan)) |
| 41 | + |
| 42 | + |
| 43 | + |
| 44 | +# %% |
| 45 | +# Affine transformation |
| 46 | +# --------------------- |
| 47 | +# In this test, the 10 features of ``diabetes`` dataset will be randomly polluted |
| 48 | +# by the affine transformation. The three feature selection methods will select |
| 49 | +# three features from the polluted features. The more stable the result, the better. |
| 50 | + |
| 51 | + |
| 52 | + |
| 53 | +n_features = X.shape[1] |
| 54 | +rng = np.random.default_rng() |
| 55 | + |
| 56 | +ids_omp_all = [] |
| 57 | +ids_ols_all = [] |
| 58 | +ids_fastcan_all = [] |
| 59 | +for i in range(10): |
| 60 | + X_affine = X @ np.diag(rng.random(n_features)) + rng.random(n_features) |
| 61 | + |
| 62 | + (ids_omp,) = omp_selector.fit(X_affine, y).coef_.nonzero() |
| 63 | + ids_ols, _ = ols(X_affine, y, n_selected) |
| 64 | + ids_fastcan = fastcan_selector.fit(X_affine, y).indices_ |
| 65 | + ids_omp_all += ids_omp.tolist() |
| 66 | + ids_ols_all += ids_ols.tolist() |
| 67 | + ids_fastcan_all += ids_fastcan.tolist() |
| 68 | + |
| 69 | +# %% |
| 70 | +# Plot results |
| 71 | +# ------------ |
| 72 | +# It can be seen, only :class:`FastCan` has robust results when the feature |
| 73 | +# is polluted by the affine transformation. |
| 74 | + |
| 75 | +import matplotlib.pyplot as plt |
| 76 | + |
| 77 | +bin_lims = np.arange(n_features+1) |
| 78 | +counts_omp, _ = np.histogram(ids_omp_all, bins=bin_lims) |
| 79 | +counts_ols, _ = np.histogram(ids_ols_all, bins=bin_lims) |
| 80 | +counts_fastcan, _ = np.histogram(ids_fastcan_all, bins=bin_lims) |
| 81 | + |
| 82 | +fig, axs = plt.subplots(1, 3, figsize=(8, 3)) |
| 83 | + |
| 84 | +axs[0].bar(bin_lims[:-1], counts_omp) |
| 85 | +axs[0].set_xticks(bin_lims[:-1]) |
| 86 | +axs[0].set_ylim((0, 11)) |
| 87 | +axs[0].set_title("OMP") |
| 88 | +axs[0].set_xlabel("Feature Index") |
| 89 | +axs[0].set_ylabel("Count of Selected Times") |
| 90 | + |
| 91 | + |
| 92 | +axs[1].bar(bin_lims[:-1], counts_ols) |
| 93 | +axs[1].set_xticks(bin_lims[:-1]) |
| 94 | +axs[1].set_ylim((0, 11)) |
| 95 | +axs[1].set_title("OLS") |
| 96 | +axs[1].set_xlabel("Feature Index") |
| 97 | + |
| 98 | +axs[2].bar(bin_lims[:-1], counts_fastcan) |
| 99 | +axs[2].set_xticks(bin_lims[:-1]) |
| 100 | +axs[2].set_ylim((0, 11)) |
| 101 | +axs[2].set_title("FastCan") |
| 102 | +axs[2].set_xlabel("Feature Index") |
| 103 | + |
| 104 | +plt.tight_layout() |
| 105 | +plt.show() |
0 commit comments