Skip to content

Commit 004287c

Browse files
authored
Adding Kernel PCovC (#254)
1 parent f7a1ae6 commit 004287c

20 files changed

+1822
-333
lines changed

CHANGELOG

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@ The rules for CHANGELOG file:
1313

1414
Unreleased
1515
----------
16+
- Fix typos and make minor edits to ``PCovC`` and ``KPCovR`` docs (#254)
17+
- Fix assertTrue typos in ``KPCovR`` testing suite (#254)
18+
- Add ``KernelPCovC`` class and examples (#254)
1619

1720
0.3.0 (2025/06/12)
1821
------------------
1922
- Add ``_BasePCov`` class (#248)
2023
- Add ``PCovC`` class that inherits shared functionality from ``_BasePCov`` (#248)
2124
- Add ``PCovC`` testing suite and examples (#248)
22-
- Modify ``PCovR`` to inherit shared functionality from ``_BasePCov_`` (#248)
25+
- Modify ``PCovR`` to inherit shared functionality from ``_BasePCov`` (#248)
2326
- Update to sklearn >= 1.7.0 and scipy >= 1.15.0 (#239, #257)
2427
- Fixed moved function import from scipy and bump scipy dependency to 1.15.0 (#236)
2528
- Fix rendering issues for `SparseKDE` and `QuickShift` (#236)

docs/src/references/decomposition.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,19 @@ Kernel PCovR
5454
.. automethod:: predict
5555
.. automethod:: inverse_transform
5656
.. automethod:: score
57+
58+
.. _KPCovC-api:
59+
60+
Kernel PCovC
61+
------------
62+
63+
.. autoclass:: skmatter.decomposition.KernelPCovC
64+
:show-inheritance:
65+
:special-members:
66+
67+
.. automethod:: fit
68+
.. automethod:: transform
69+
.. automethod:: predict
70+
.. automethod:: inverse_transform
71+
.. automethod:: decision_function
72+
.. automethod:: score

examples/pcovc/KPCovC_Comparison.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
"""
5+
Comparing KPCovC with KPCA
6+
======================================
7+
"""
8+
# %%
9+
#
10+
11+
import numpy as np
12+
13+
import matplotlib.pyplot as plt
14+
import matplotlib as mpl
15+
from matplotlib.colors import ListedColormap
16+
17+
from sklearn import datasets
18+
from sklearn.preprocessing import StandardScaler
19+
from sklearn.svm import LinearSVC
20+
from sklearn.decomposition import PCA, KernelPCA
21+
from sklearn.inspection import DecisionBoundaryDisplay
22+
from sklearn.model_selection import train_test_split
23+
from sklearn.linear_model import (
24+
LogisticRegressionCV,
25+
RidgeClassifierCV,
26+
SGDClassifier,
27+
)
28+
29+
from skmatter.decomposition import PCovC, KernelPCovC
30+
31+
plt.rcParams["scatter.edgecolors"] = "k"
32+
cm_bright = ListedColormap(["#d7191c", "#fdae61", "#a6d96a", "#3a7cdf"])
33+
34+
random_state = 0
35+
n_components = 2
36+
37+
# %%
38+
#
39+
# For this, we will combine two ``sklearn`` datasets from
40+
# :func:`sklearn.datasets.make_moons`.
41+
42+
X1, y1 = datasets.make_moons(n_samples=750, noise=0.10, random_state=random_state)
43+
X2, y2 = datasets.make_moons(n_samples=750, noise=0.10, random_state=random_state)
44+
45+
X2, y2 = X2 + 2, y2 + 2
46+
R = np.array(
47+
[
48+
[np.cos(np.pi / 2), -np.sin(np.pi / 2)],
49+
[np.sin(np.pi / 2), np.cos(np.pi / 2)],
50+
]
51+
)
52+
# rotate second pair of moons
53+
X2 = X2 @ R.T
54+
55+
X = np.vstack([X1, X2])
56+
y = np.concatenate([y1, y2])
57+
58+
# %%
59+
#
60+
# Original Data
61+
# -------------
62+
63+
fig, ax = plt.subplots(figsize=(5.5, 5))
64+
ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright)
65+
ax.set_title("Original Data")
66+
67+
68+
# %%
69+
#
70+
# Scale Data
71+
72+
X_train, X_test, y_train, y_test = train_test_split(
73+
X, y, test_size=0.25, stratify=y, random_state=random_state
74+
)
75+
76+
scaler = StandardScaler()
77+
X_train_scaled = scaler.fit_transform(X_train)
78+
X_test_scaled = scaler.transform(X_test)
79+
80+
# %%
81+
#
82+
# PCA and PCovC
83+
# -------------
84+
#
85+
# Both PCA and PCovC fail to produce linearly separable latent space
86+
# maps. We will need a kernel method to effectively separate the moon classes.
87+
88+
mixing = 0.10
89+
alpha_d = 0.5
90+
alpha_p = 0.4
91+
92+
models = {
93+
PCA(n_components=n_components): "PCA",
94+
PCovC(
95+
n_components=n_components,
96+
random_state=random_state,
97+
mixing=mixing,
98+
classifier=LinearSVC(),
99+
): "PCovC",
100+
}
101+
102+
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
103+
104+
for ax, model in zip(axs, models):
105+
t_train = model.fit_transform(X_train_scaled, y_train)
106+
t_test = model.transform(X_test_scaled)
107+
108+
ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_d, cmap=cm_bright, c=y_test)
109+
ax.scatter(t_train[:, 0], t_train[:, 1], cmap=cm_bright, c=y_train)
110+
111+
ax.set_title(models[model])
112+
plt.tight_layout()
113+
114+
# %%
115+
#
116+
# Kernel PCA and Kernel PCovC
117+
# ---------------------------
118+
#
119+
# A comparison of the latent spaces produced by KPCA and KPCovC is shown.
120+
# A logistic regression classifier is trained on the KPCA latent space (this is also
121+
# the default classifier used in KPCovC), and we see the comparison of the respective
122+
# decision boundaries and test data accuracy scores.
123+
124+
fig, axs = plt.subplots(1, 2, figsize=(13, 6))
125+
126+
center = True
127+
resolution = 1000
128+
129+
kernel_params = {"kernel": "rbf", "gamma": 2}
130+
131+
models = {
132+
KernelPCA(n_components=n_components, **kernel_params): {
133+
"title": "Kernel PCA",
134+
"eps": 0.1,
135+
},
136+
KernelPCovC(
137+
n_components=n_components,
138+
random_state=random_state,
139+
mixing=mixing,
140+
center=center,
141+
**kernel_params,
142+
): {"title": "Kernel PCovC", "eps": 2},
143+
}
144+
145+
for ax, model in zip(axs, models):
146+
t_train = model.fit_transform(X_train_scaled, y_train)
147+
t_test = model.transform(X_test_scaled)
148+
149+
if isinstance(model, KernelPCA):
150+
t_classifier = LinearSVC(random_state=random_state).fit(t_train, y_train)
151+
score = t_classifier.score(t_test, y_test)
152+
else:
153+
t_classifier = model.classifier_
154+
score = model.score(X_test_scaled, y_test)
155+
156+
DecisionBoundaryDisplay.from_estimator(
157+
estimator=t_classifier,
158+
X=t_test,
159+
ax=ax,
160+
response_method="predict",
161+
cmap=cm_bright,
162+
alpha=alpha_d,
163+
eps=models[model]["eps"],
164+
grid_resolution=resolution,
165+
)
166+
ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_p, cmap=cm_bright, c=y_test)
167+
ax.scatter(t_train[:, 0], t_train[:, 1], cmap=cm_bright, c=y_train)
168+
ax.set_title(models[model]["title"])
169+
170+
ax.text(
171+
0.82,
172+
0.03,
173+
f"Score: {round(score, 3)}",
174+
fontsize=mpl.rcParams["axes.titlesize"],
175+
transform=ax.transAxes,
176+
)
177+
ax.set_xticks([])
178+
ax.set_yticks([])
179+
180+
fig.subplots_adjust(wspace=0.04)
181+
plt.tight_layout()
182+
183+
184+
# %%
185+
#
186+
# Effect of KPCovC Classifier on KPCovC Maps and Decision Boundaries
187+
# ------------------------------------------------------------------------------
188+
#
189+
# Based on the evidence :math:`\mathbf{Z}` generated by the underlying classifier fit
190+
# on a computed kernel :math:`\mathbf{K}` and :math:`\mathbf{Y}`, Kernel PCovC will
191+
# produce varying latent space maps. Hence, the decision boundaries produced by the
192+
# linear classifier fit between :math:`\mathbf{T}` and :math:`\mathbf{Y}` to make
193+
# predictions will also vary.
194+
195+
names = ["Logistic Regression", "Ridge Classifier", "Linear SVC", "SGD Classifier"]
196+
197+
models = {
198+
LogisticRegressionCV(random_state=random_state): {
199+
"kernel_params": {"kernel": "rbf", "gamma": 12},
200+
"title": "Logistic Regression",
201+
},
202+
RidgeClassifierCV(): {
203+
"kernel_params": {"kernel": "rbf", "gamma": 1},
204+
"title": "Ridge Classifier",
205+
"eps": 0.40,
206+
},
207+
LinearSVC(random_state=random_state): {
208+
"kernel_params": {"kernel": "rbf", "gamma": 15},
209+
"title": "Support Vector Classification",
210+
},
211+
SGDClassifier(random_state=random_state): {
212+
"kernel_params": {"kernel": "rbf", "gamma": 15},
213+
"title": "SGD Classifier",
214+
"eps": 10,
215+
},
216+
}
217+
218+
fig, axs = plt.subplots(1, len(models), figsize=(4 * len(models), 4))
219+
220+
for ax, name, model in zip(axs.flat, names, models):
221+
kpcovc = KernelPCovC(
222+
n_components=n_components,
223+
random_state=random_state,
224+
mixing=mixing,
225+
classifier=model,
226+
center=center,
227+
**models[model]["kernel_params"],
228+
)
229+
t_kpcovc_train = kpcovc.fit_transform(X_train_scaled, y_train)
230+
t_kpcovc_test = kpcovc.transform(X_test_scaled)
231+
kpcovc_score = kpcovc.score(X_test_scaled, y_test)
232+
233+
DecisionBoundaryDisplay.from_estimator(
234+
estimator=kpcovc.classifier_,
235+
X=t_kpcovc_test,
236+
ax=ax,
237+
response_method="predict",
238+
cmap=cm_bright,
239+
alpha=alpha_d,
240+
eps=models[model].get("eps", 1),
241+
grid_resolution=resolution,
242+
)
243+
244+
ax.scatter(
245+
t_kpcovc_test[:, 0],
246+
t_kpcovc_test[:, 1],
247+
cmap=cm_bright,
248+
alpha=alpha_p,
249+
c=y_test,
250+
)
251+
ax.scatter(t_kpcovc_train[:, 0], t_kpcovc_train[:, 1], cmap=cm_bright, c=y_train)
252+
ax.text(
253+
0.70,
254+
0.03,
255+
f"Score: {round(kpcovc_score, 3)}",
256+
fontsize=mpl.rcParams["axes.titlesize"],
257+
transform=ax.transAxes,
258+
)
259+
260+
ax.set_title(name)
261+
ax.set_xticks([])
262+
ax.set_yticks([])
263+
fig.subplots_adjust(wspace=0.04)
264+
265+
plt.tight_layout()

0 commit comments

Comments
 (0)