|
| 1 | +#!/usr/bin/env python |
| 2 | +# coding: utf-8 |
| 3 | + |
| 4 | +""" |
| 5 | +Comparing KPCovC with KPCA |
| 6 | +====================================== |
| 7 | +""" |
| 8 | +# %% |
| 9 | +# |
| 10 | + |
| 11 | +import numpy as np |
| 12 | + |
| 13 | +import matplotlib.pyplot as plt |
| 14 | +import matplotlib as mpl |
| 15 | +from matplotlib.colors import ListedColormap |
| 16 | + |
| 17 | +from sklearn import datasets |
| 18 | +from sklearn.preprocessing import StandardScaler |
| 19 | +from sklearn.svm import LinearSVC |
| 20 | +from sklearn.decomposition import PCA, KernelPCA |
| 21 | +from sklearn.inspection import DecisionBoundaryDisplay |
| 22 | +from sklearn.model_selection import train_test_split |
| 23 | +from sklearn.linear_model import ( |
| 24 | + LogisticRegressionCV, |
| 25 | + RidgeClassifierCV, |
| 26 | + SGDClassifier, |
| 27 | +) |
| 28 | + |
| 29 | +from skmatter.decomposition import PCovC, KernelPCovC |
| 30 | + |
| 31 | +plt.rcParams["scatter.edgecolors"] = "k" |
| 32 | +cm_bright = ListedColormap(["#d7191c", "#fdae61", "#a6d96a", "#3a7cdf"]) |
| 33 | + |
| 34 | +random_state = 0 |
| 35 | +n_components = 2 |
| 36 | + |
| 37 | +# %% |
| 38 | +# |
| 39 | +# For this, we will combine two ``sklearn`` datasets from |
| 40 | +# :func:`sklearn.datasets.make_moons`. |
| 41 | + |
| 42 | +X1, y1 = datasets.make_moons(n_samples=750, noise=0.10, random_state=random_state) |
| 43 | +X2, y2 = datasets.make_moons(n_samples=750, noise=0.10, random_state=random_state) |
| 44 | + |
| 45 | +X2, y2 = X2 + 2, y2 + 2 |
| 46 | +R = np.array( |
| 47 | + [ |
| 48 | + [np.cos(np.pi / 2), -np.sin(np.pi / 2)], |
| 49 | + [np.sin(np.pi / 2), np.cos(np.pi / 2)], |
| 50 | + ] |
| 51 | +) |
| 52 | +# rotate second pair of moons |
| 53 | +X2 = X2 @ R.T |
| 54 | + |
| 55 | +X = np.vstack([X1, X2]) |
| 56 | +y = np.concatenate([y1, y2]) |
| 57 | + |
| 58 | +# %% |
| 59 | +# |
| 60 | +# Original Data |
| 61 | +# ------------- |
| 62 | + |
| 63 | +fig, ax = plt.subplots(figsize=(5.5, 5)) |
| 64 | +ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright) |
| 65 | +ax.set_title("Original Data") |
| 66 | + |
| 67 | + |
| 68 | +# %% |
| 69 | +# |
| 70 | +# Scale Data |
| 71 | + |
| 72 | +X_train, X_test, y_train, y_test = train_test_split( |
| 73 | + X, y, test_size=0.25, stratify=y, random_state=random_state |
| 74 | +) |
| 75 | + |
| 76 | +scaler = StandardScaler() |
| 77 | +X_train_scaled = scaler.fit_transform(X_train) |
| 78 | +X_test_scaled = scaler.transform(X_test) |
| 79 | + |
| 80 | +# %% |
| 81 | +# |
| 82 | +# PCA and PCovC |
| 83 | +# ------------- |
| 84 | +# |
| 85 | +# Both PCA and PCovC fail to produce linearly separable latent space |
| 86 | +# maps. We will need a kernel method to effectively separate the moon classes. |
| 87 | + |
| 88 | +mixing = 0.10 |
| 89 | +alpha_d = 0.5 |
| 90 | +alpha_p = 0.4 |
| 91 | + |
| 92 | +models = { |
| 93 | + PCA(n_components=n_components): "PCA", |
| 94 | + PCovC( |
| 95 | + n_components=n_components, |
| 96 | + random_state=random_state, |
| 97 | + mixing=mixing, |
| 98 | + classifier=LinearSVC(), |
| 99 | + ): "PCovC", |
| 100 | +} |
| 101 | + |
| 102 | +fig, axs = plt.subplots(1, 2, figsize=(10, 4)) |
| 103 | + |
| 104 | +for ax, model in zip(axs, models): |
| 105 | + t_train = model.fit_transform(X_train_scaled, y_train) |
| 106 | + t_test = model.transform(X_test_scaled) |
| 107 | + |
| 108 | + ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_d, cmap=cm_bright, c=y_test) |
| 109 | + ax.scatter(t_train[:, 0], t_train[:, 1], cmap=cm_bright, c=y_train) |
| 110 | + |
| 111 | + ax.set_title(models[model]) |
| 112 | + plt.tight_layout() |
| 113 | + |
| 114 | +# %% |
| 115 | +# |
| 116 | +# Kernel PCA and Kernel PCovC |
| 117 | +# --------------------------- |
| 118 | +# |
| 119 | +# A comparison of the latent spaces produced by KPCA and KPCovC is shown. |
| 120 | +# A logistic regression classifier is trained on the KPCA latent space (this is also |
| 121 | +# the default classifier used in KPCovC), and we see the comparison of the respective |
| 122 | +# decision boundaries and test data accuracy scores. |
| 123 | + |
| 124 | +fig, axs = plt.subplots(1, 2, figsize=(13, 6)) |
| 125 | + |
| 126 | +center = True |
| 127 | +resolution = 1000 |
| 128 | + |
| 129 | +kernel_params = {"kernel": "rbf", "gamma": 2} |
| 130 | + |
| 131 | +models = { |
| 132 | + KernelPCA(n_components=n_components, **kernel_params): { |
| 133 | + "title": "Kernel PCA", |
| 134 | + "eps": 0.1, |
| 135 | + }, |
| 136 | + KernelPCovC( |
| 137 | + n_components=n_components, |
| 138 | + random_state=random_state, |
| 139 | + mixing=mixing, |
| 140 | + center=center, |
| 141 | + **kernel_params, |
| 142 | + ): {"title": "Kernel PCovC", "eps": 2}, |
| 143 | +} |
| 144 | + |
| 145 | +for ax, model in zip(axs, models): |
| 146 | + t_train = model.fit_transform(X_train_scaled, y_train) |
| 147 | + t_test = model.transform(X_test_scaled) |
| 148 | + |
| 149 | + if isinstance(model, KernelPCA): |
| 150 | + t_classifier = LinearSVC(random_state=random_state).fit(t_train, y_train) |
| 151 | + score = t_classifier.score(t_test, y_test) |
| 152 | + else: |
| 153 | + t_classifier = model.classifier_ |
| 154 | + score = model.score(X_test_scaled, y_test) |
| 155 | + |
| 156 | + DecisionBoundaryDisplay.from_estimator( |
| 157 | + estimator=t_classifier, |
| 158 | + X=t_test, |
| 159 | + ax=ax, |
| 160 | + response_method="predict", |
| 161 | + cmap=cm_bright, |
| 162 | + alpha=alpha_d, |
| 163 | + eps=models[model]["eps"], |
| 164 | + grid_resolution=resolution, |
| 165 | + ) |
| 166 | + ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_p, cmap=cm_bright, c=y_test) |
| 167 | + ax.scatter(t_train[:, 0], t_train[:, 1], cmap=cm_bright, c=y_train) |
| 168 | + ax.set_title(models[model]["title"]) |
| 169 | + |
| 170 | + ax.text( |
| 171 | + 0.82, |
| 172 | + 0.03, |
| 173 | + f"Score: {round(score, 3)}", |
| 174 | + fontsize=mpl.rcParams["axes.titlesize"], |
| 175 | + transform=ax.transAxes, |
| 176 | + ) |
| 177 | + ax.set_xticks([]) |
| 178 | + ax.set_yticks([]) |
| 179 | + |
| 180 | +fig.subplots_adjust(wspace=0.04) |
| 181 | +plt.tight_layout() |
| 182 | + |
| 183 | + |
| 184 | +# %% |
| 185 | +# |
| 186 | +# Effect of KPCovC Classifier on KPCovC Maps and Decision Boundaries |
| 187 | +# ------------------------------------------------------------------------------ |
| 188 | +# |
| 189 | +# Based on the evidence :math:`\mathbf{Z}` generated by the underlying classifier fit |
| 190 | +# on a computed kernel :math:`\mathbf{K}` and :math:`\mathbf{Y}`, Kernel PCovC will |
| 191 | +# produce varying latent space maps. Hence, the decision boundaries produced by the |
| 192 | +# linear classifier fit between :math:`\mathbf{T}` and :math:`\mathbf{Y}` to make |
| 193 | +# predictions will also vary. |
| 194 | + |
| 195 | +names = ["Logistic Regression", "Ridge Classifier", "Linear SVC", "SGD Classifier"] |
| 196 | + |
| 197 | +models = { |
| 198 | + LogisticRegressionCV(random_state=random_state): { |
| 199 | + "kernel_params": {"kernel": "rbf", "gamma": 12}, |
| 200 | + "title": "Logistic Regression", |
| 201 | + }, |
| 202 | + RidgeClassifierCV(): { |
| 203 | + "kernel_params": {"kernel": "rbf", "gamma": 1}, |
| 204 | + "title": "Ridge Classifier", |
| 205 | + "eps": 0.40, |
| 206 | + }, |
| 207 | + LinearSVC(random_state=random_state): { |
| 208 | + "kernel_params": {"kernel": "rbf", "gamma": 15}, |
| 209 | + "title": "Support Vector Classification", |
| 210 | + }, |
| 211 | + SGDClassifier(random_state=random_state): { |
| 212 | + "kernel_params": {"kernel": "rbf", "gamma": 15}, |
| 213 | + "title": "SGD Classifier", |
| 214 | + "eps": 10, |
| 215 | + }, |
| 216 | +} |
| 217 | + |
| 218 | +fig, axs = plt.subplots(1, len(models), figsize=(4 * len(models), 4)) |
| 219 | + |
| 220 | +for ax, name, model in zip(axs.flat, names, models): |
| 221 | + kpcovc = KernelPCovC( |
| 222 | + n_components=n_components, |
| 223 | + random_state=random_state, |
| 224 | + mixing=mixing, |
| 225 | + classifier=model, |
| 226 | + center=center, |
| 227 | + **models[model]["kernel_params"], |
| 228 | + ) |
| 229 | + t_kpcovc_train = kpcovc.fit_transform(X_train_scaled, y_train) |
| 230 | + t_kpcovc_test = kpcovc.transform(X_test_scaled) |
| 231 | + kpcovc_score = kpcovc.score(X_test_scaled, y_test) |
| 232 | + |
| 233 | + DecisionBoundaryDisplay.from_estimator( |
| 234 | + estimator=kpcovc.classifier_, |
| 235 | + X=t_kpcovc_test, |
| 236 | + ax=ax, |
| 237 | + response_method="predict", |
| 238 | + cmap=cm_bright, |
| 239 | + alpha=alpha_d, |
| 240 | + eps=models[model].get("eps", 1), |
| 241 | + grid_resolution=resolution, |
| 242 | + ) |
| 243 | + |
| 244 | + ax.scatter( |
| 245 | + t_kpcovc_test[:, 0], |
| 246 | + t_kpcovc_test[:, 1], |
| 247 | + cmap=cm_bright, |
| 248 | + alpha=alpha_p, |
| 249 | + c=y_test, |
| 250 | + ) |
| 251 | + ax.scatter(t_kpcovc_train[:, 0], t_kpcovc_train[:, 1], cmap=cm_bright, c=y_train) |
| 252 | + ax.text( |
| 253 | + 0.70, |
| 254 | + 0.03, |
| 255 | + f"Score: {round(kpcovc_score, 3)}", |
| 256 | + fontsize=mpl.rcParams["axes.titlesize"], |
| 257 | + transform=ax.transAxes, |
| 258 | + ) |
| 259 | + |
| 260 | + ax.set_title(name) |
| 261 | + ax.set_xticks([]) |
| 262 | + ax.set_yticks([]) |
| 263 | + fig.subplots_adjust(wspace=0.04) |
| 264 | + |
| 265 | + plt.tight_layout() |
0 commit comments