|
| 1 | +""" |
| 2 | +========================= |
| 3 | +Repeated Edited nearest-neighbours |
| 4 | +========================= |
| 5 | +
|
| 6 | +An illustration of the repeated edited nearest-neighbours method. |
| 7 | +
|
| 8 | +""" |
| 9 | + |
| 10 | +print(__doc__) |
| 11 | + |
| 12 | +import matplotlib.pyplot as plt |
| 13 | +import seaborn as sns |
| 14 | +sns.set() |
| 15 | + |
| 16 | +# Define some color for the plotting |
| 17 | +almost_black = '#262626' |
| 18 | +palette = sns.color_palette() |
| 19 | + |
| 20 | +from sklearn.datasets import make_classification |
| 21 | +from sklearn.decomposition import PCA |
| 22 | + |
| 23 | +from unbalanced_dataset.under_sampling import EditedNearestNeighbours, \ |
| 24 | + RepeatedEditedNearestNeighbours |
| 25 | + |
| 26 | +# Generate the dataset |
| 27 | +X, y = make_classification(n_classes=2, class_sep=1.25, weights=[0.3, 0.7], |
| 28 | + n_informative=3, n_redundant=1, flip_y=0, |
| 29 | + n_features=5, n_clusters_per_class=1, |
| 30 | + n_samples=5000, random_state=10) |
| 31 | + |
| 32 | +# Instanciate a PCA object for the sake of easy visualisation |
| 33 | +pca = PCA(n_components=2) |
| 34 | +# Fit and transform x to visualise inside a 2D feature space |
| 35 | +X_vis = pca.fit_transform(X) |
| 36 | + |
| 37 | +# Three subplots, unpack the axes array immediately |
| 38 | +f, (ax1, ax2, ax3) = plt.subplots(1, 3) |
| 39 | + |
| 40 | +ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=.5, |
| 41 | + edgecolor=almost_black, facecolor=palette[0], linewidth=0.15) |
| 42 | +ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=.5, |
| 43 | + edgecolor=almost_black, facecolor=palette[2], linewidth=0.15) |
| 44 | +ax1.set_title('Original set') |
| 45 | + |
| 46 | +# Apply the ENN |
| 47 | +print('ENN') |
| 48 | +enn = EditedNearestNeighbours() |
| 49 | +X_resampled, y_resampled = enn.fit_transform(X, y) |
| 50 | +X_res_vis = pca.transform(X_resampled) |
| 51 | + |
| 52 | +ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1], |
| 53 | + label="Class #0", alpha=.5, edgecolor=almost_black, |
| 54 | + facecolor=palette[0], linewidth=0.15) |
| 55 | +ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1], |
| 56 | + label="Class #1", alpha=.5, edgecolor=almost_black, |
| 57 | + facecolor=palette[2], linewidth=0.15) |
| 58 | +ax2.set_title('Edited nearest neighbours') |
| 59 | + |
| 60 | +# Apply the RENN |
| 61 | +print('RENN') |
| 62 | +renn = RepeatedEditedNearestNeighbours() |
| 63 | +X_resampled, y_resampled = renn.fit_transform(X, y) |
| 64 | +X_res_vis = pca.transform(X_resampled) |
| 65 | + |
| 66 | +ax3.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1], |
| 67 | + label="Class #0", alpha=.5, edgecolor=almost_black, |
| 68 | + facecolor=palette[0], linewidth=0.15) |
| 69 | +ax3.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1], |
| 70 | + label="Class #1", alpha=.5, edgecolor=almost_black, |
| 71 | + facecolor=palette[2], linewidth=0.15) |
| 72 | +ax3.set_title('Repeated Edited nearest neighbours') |
| 73 | + |
| 74 | +plt.show() |
0 commit comments