|
| 1 | +""" |
| 2 | +========================= |
| 3 | +Repeated Edited nearest-neighbours |
| 4 | +========================= |
| 5 | +
|
| 6 | +An illustration of the edited nearest-neighbours and repeated |
| 7 | +edited nearest-neighbours method combined in a pipeline object. |
| 8 | +
|
| 9 | +""" |
| 10 | + |
| 11 | +print(__doc__) |
| 12 | + |
| 13 | +import matplotlib.pyplot as plt |
| 14 | +import seaborn as sns |
| 15 | +sns.set() |
| 16 | + |
| 17 | +# Define some color for the plotting |
| 18 | +almost_black = '#262626' |
| 19 | +palette = sns.color_palette() |
| 20 | + |
| 21 | +from sklearn.datasets import make_classification |
| 22 | +from sklearn.decomposition import PCA |
| 23 | + |
| 24 | +from unbalanced_dataset.under_sampling import EditedNearestNeighbours, \ |
| 25 | + RepeatedEditedNearestNeighbours |
| 26 | +from unbalanced_dataset.pipeline import make_pipeline |
| 27 | + |
| 28 | +# Generate the dataset |
| 29 | +X, y = make_classification(n_classes=2, class_sep=1.25, weights=[0.3, 0.7], |
| 30 | + n_informative=3, n_redundant=1, flip_y=0, |
| 31 | + n_features=5, n_clusters_per_class=1, |
| 32 | + n_samples=5000, random_state=10) |
| 33 | + |
| 34 | + |
| 35 | +# Fit and transform x to visualise inside a 2D feature space |
| 36 | +pca = PCA(n_components=2) |
| 37 | +X_vis = pca.fit_transform(X) |
| 38 | + |
| 39 | +# Three subplots, unpack the axes array immediately |
| 40 | +f, (ax1, ax3) = plt.subplots(1, 2) |
| 41 | + |
| 42 | +ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=.5, |
| 43 | + edgecolor=almost_black, facecolor=palette[0], linewidth=0.15) |
| 44 | +ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=.5, |
| 45 | + edgecolor=almost_black, facecolor=palette[2], linewidth=0.15) |
| 46 | +ax1.set_title('Original set') |
| 47 | + |
| 48 | +# Create the samplers |
| 49 | +enn = EditedNearestNeighbours() |
| 50 | +renn = RepeatedEditedNearestNeighbours() |
| 51 | + |
| 52 | +# Add the samplers in the pipeline |
| 53 | +pipeline = make_pipeline(enn, renn) |
| 54 | +X_resampled, y_resampled = pipeline.fit_sample(X, y) |
| 55 | +X_res_vis = pca.transform(X_resampled) |
| 56 | + |
| 57 | +ax3.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1], |
| 58 | + label="Class #0", alpha=.5, edgecolor=almost_black, |
| 59 | + facecolor=palette[0], linewidth=0.15) |
| 60 | +ax3.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1], |
| 61 | + label="Class #1", alpha=.5, edgecolor=almost_black, |
| 62 | + facecolor=palette[2], linewidth=0.15) |
| 63 | +ax3.set_title('RENN + ENN ') |
| 64 | + |
| 65 | +plt.show() |
0 commit comments