Skip to content

Commit 72a951e

Browse files
committed
Moving files around
1 parent 9226b7a commit 72a951e

File tree

11 files changed

+153
-95
lines changed

11 files changed

+153
-95
lines changed

examples/pcovc/PCovC-BreastCancerDataset.ipynb

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
2424
"from sklearn.linear_model import LogisticRegressionCV\n",
2525
"\n",
26-
"from pcovc import PCovC\n",
26+
"import sys\n",
27+
"sys.path.append('../../')\n",
28+
"from src.skmatter.decomposition._pcovc import PCovC\n",
2729
"\n",
2830
"plt.rcParams[\"image.cmap\"] = \"tab10\"\n",
2931
"plt.rcParams['scatter.edgecolors'] = \"k\"\n",
@@ -40,7 +42,7 @@
4042
},
4143
{
4244
"cell_type": "code",
43-
"execution_count": 2,
45+
"execution_count": null,
4446
"metadata": {},
4547
"outputs": [
4648
{
@@ -188,7 +190,7 @@
188190
},
189191
{
190192
"cell_type": "code",
191-
"execution_count": 3,
193+
"execution_count": null,
192194
"metadata": {},
193195
"outputs": [],
194196
"source": [
@@ -208,16 +210,16 @@
208210
},
209211
{
210212
"cell_type": "code",
211-
"execution_count": 4,
213+
"execution_count": null,
212214
"metadata": {},
213215
"outputs": [
214216
{
215217
"data": {
216218
"text/plain": [
217-
"<matplotlib.legend.Legend at 0x117e29160>"
219+
"<matplotlib.legend.Legend at 0x11a62f610>"
218220
]
219221
},
220-
"execution_count": 4,
222+
"execution_count": 46,
221223
"metadata": {},
222224
"output_type": "execute_result"
223225
},
@@ -256,16 +258,16 @@
256258
},
257259
{
258260
"cell_type": "code",
259-
"execution_count": 5,
261+
"execution_count": null,
260262
"metadata": {},
261263
"outputs": [
262264
{
263265
"data": {
264266
"text/plain": [
265-
"<matplotlib.collections.PathCollection at 0x1180c9a90>"
267+
"<matplotlib.collections.PathCollection at 0x11a6d3390>"
266268
]
267269
},
268-
"execution_count": 5,
270+
"execution_count": 47,
269271
"metadata": {},
270272
"output_type": "execute_result"
271273
},
@@ -300,7 +302,7 @@
300302
},
301303
{
302304
"cell_type": "code",
303-
"execution_count": 6,
305+
"execution_count": null,
304306
"metadata": {},
305307
"outputs": [
306308
{

examples/pcovc/PCovC-IrisDataset.ipynb

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 2,
12+
"execution_count": 1,
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
@@ -22,7 +22,9 @@
2222
"from sklearn.linear_model import LogisticRegressionCV, RidgeClassifierCV, SGDClassifier\n",
2323
"from sklearn.inspection import DecisionBoundaryDisplay\n",
2424
"\n",
25-
"from pcovc import PCovC\n",
25+
"import sys\n",
26+
"sys.path.append('../../')\n",
27+
"from src.skmatter.decomposition._pcovc import PCovC\n",
2628
"\n",
2729
"plt.rcParams[\"image.cmap\"] = \"tab10\"\n",
2830
"plt.rcParams['scatter.edgecolors'] = \"k\"\n",
@@ -40,7 +42,7 @@
4042
},
4143
{
4244
"cell_type": "code",
43-
"execution_count": 3,
45+
"execution_count": 2,
4446
"metadata": {},
4547
"outputs": [
4648
{
@@ -94,22 +96,26 @@
9496
"type of iris plant. One class is linearly separable from the other 2; the\n",
9597
"latter are NOT linearly separable from each other.\n",
9698
"\n",
97-
".. dropdown:: References\n",
99+
"|details-start|\n",
100+
"**References**\n",
101+
"|details-split|\n",
98102
"\n",
99-
" - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n",
100-
" Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n",
101-
" Mathematical Statistics\" (John Wiley, NY, 1950).\n",
102-
" - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n",
103-
" (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n",
104-
" - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n",
105-
" Structure and Classification Rule for Recognition in Partially Exposed\n",
106-
" Environments\". IEEE Transactions on Pattern Analysis and Machine\n",
107-
" Intelligence, Vol. PAMI-2, No. 1, 67-71.\n",
108-
" - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n",
109-
" on Information Theory, May 1972, 431-433.\n",
110-
" - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n",
111-
" conceptual clustering system finds 3 classes in the data.\n",
112-
" - Many, many more ...\n",
103+
"- Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n",
104+
" Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n",
105+
" Mathematical Statistics\" (John Wiley, NY, 1950).\n",
106+
"- Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n",
107+
" (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n",
108+
"- Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n",
109+
" Structure and Classification Rule for Recognition in Partially Exposed\n",
110+
" Environments\". IEEE Transactions on Pattern Analysis and Machine\n",
111+
" Intelligence, Vol. PAMI-2, No. 1, 67-71.\n",
112+
"- Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n",
113+
" on Information Theory, May 1972, 431-433.\n",
114+
"- See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n",
115+
" conceptual clustering system finds 3 classes in the data.\n",
116+
"- Many, many more ...\n",
117+
"\n",
118+
"|details-end|\n",
113119
"\n"
114120
]
115121
}
@@ -129,7 +135,7 @@
129135
},
130136
{
131137
"cell_type": "code",
132-
"execution_count": 4,
138+
"execution_count": 3,
133139
"metadata": {},
134140
"outputs": [],
135141
"source": [
@@ -149,16 +155,16 @@
149155
},
150156
{
151157
"cell_type": "code",
152-
"execution_count": 5,
158+
"execution_count": 4,
153159
"metadata": {},
154160
"outputs": [
155161
{
156162
"data": {
157163
"text/plain": [
158-
"<matplotlib.legend.Legend at 0x117392f90>"
164+
"<matplotlib.legend.Legend at 0x118a1de80>"
159165
]
160166
},
161-
"execution_count": 5,
167+
"execution_count": 4,
162168
"metadata": {},
163169
"output_type": "execute_result"
164170
},
@@ -197,7 +203,7 @@
197203
},
198204
{
199205
"cell_type": "code",
200-
"execution_count": 6,
206+
"execution_count": 5,
201207
"metadata": {},
202208
"outputs": [
203209
{
@@ -249,7 +255,7 @@
249255
},
250256
{
251257
"cell_type": "code",
252-
"execution_count": 11,
258+
"execution_count": 6,
253259
"metadata": {},
254260
"outputs": [
255261
{
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+

tests/kernel_pcovc.py renamed to src/skmatter/decomposition/_kernel_pcovc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
from sklearn.utils._array_api import get_namespace, indexing_dtype
1818
from sklearn.svm import SVC
1919

20-
from skmatter.preprocessing import KernelNormalizer
21-
from skmatter.utils import check_krr_fit, pcovr_kernel
20+
from ..preprocessing import KernelNormalizer
21+
from ..utils import check_krr_fit, pcovr_kernel
2222

2323

2424
class KernelPCovC(_BasePCA, LinearModel):

tests/pcovc.py renamed to src/skmatter/decomposition/_pcovc.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,18 @@ def check_cl_fit(classifier, X, y):
9191
fitted_classifier._validate_data(X, y, reset=False, multi_output=True)
9292

9393
# Check compatibility with y
94-
if fitted_classifier.coef_.ndim != y.ndim:
94+
95+
# changed from if fitted_classifier.coef_.ndim != y.ndim:
96+
# dimension of classifier coefficients is always 2, hence we don't need to check
97+
# for match with Y
98+
if fitted_classifier.coef_.shape[1] != X.shape[1]:
9599
raise ValueError(
96-
"The classifier coefficients have a dimension incompatible "
97-
"with the supplied target space. "
98-
"The coefficients have dimension %d and the targets "
99-
"have dimension %d" % (fitted_classifier.coef_.ndim, y.ndim)
100+
"The classifier coefficients have a shape incompatible "
101+
"with the supplied feature space. "
102+
"The coefficients have shape %d and the features "
103+
"have shape %d" % (fitted_classifier.coef_.shape, X.shape)
100104
)
105+
# LogisticRegression does not support multioutput, but RidgeClassifier does
101106
elif y.ndim == 2:
102107
if fitted_classifier.coef_.shape[0] != y.shape[1]:
103108
raise ValueError(

tests/kernel_pcovr.py renamed to src/skmatter/decomposition/kernel_pcovr_comments.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
1515
from sklearn.utils.validation import check_is_fitted, check_X_y
1616

17-
from skmatter.preprocessing import KernelNormalizer
18-
from skmatter.utils import check_krr_fit, pcovr_kernel
17+
from ..preprocessing import KernelNormalizer
18+
from ..utils import check_krr_fit, pcovr_kernel
1919

2020

2121
class KernelPCovR(_BasePCA, LinearModel):
File renamed without changes.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
2+
from sklearn.discriminant_analysis import StandardScaler
3+
from sklearn.kernel_ridge import KernelRidge
4+
from sklearn.linear_model import LogisticRegression, LinearRegression
5+
from sklearn.svm import SVC
6+
from _kernel_pcovc import KernelPCovC
7+
from _kernel_pcovr import KernelPCovR
8+
from _pcovc import PCovC
9+
from sklearn.datasets import load_breast_cancer as get_dataset
10+
from sklearn.datasets import load_diabetes as get_dataset2
11+
from sklearn.metrics import accuracy_score
12+
from _pcovr import PCovR
13+
14+
X, Y = get_dataset(return_X_y=True)
15+
16+
scaler = StandardScaler()
17+
X = scaler.fit_transform(X)
18+
print(X.shape)
19+
print(Y.shape)
20+
21+
# classifier = LogisticRegression()
22+
# classifier.fit(X, Y)
23+
24+
# print(classifier.coef_.ndim)
25+
26+
# pcovc = PCovC(mixing=0.5, classifier=LogisticRegression())
27+
# print(pcovc.classifier.coef_.ndim)
28+
29+
# pcovc.fit(X, Y)
30+
X = [[1, 2, 3, 4, 5],
31+
[2, 3, 4, 5, 6]]
32+
Y = [[0, 1, 0, 1, 0],
33+
[0, 1, 0, 1, 0]]
34+
35+
classifier = LogisticRegression()
36+
classifier.fit(X, Y)
37+
model = PCovC(classifier=classifier)
38+
39+
#model2 = PCovC(classifier=LogisticRegression())
40+
#model2.fit(X, Y)
41+
42+
#problem is that coef_.shape (1, n_features=30) is not the same as
43+
print(model.classifier.coef_.shape)
44+
#print(model2.classifier.coef_.ndim)
45+
46+
model.fit(X, Y)
47+
y_pred = model.predict(X)
48+
print(accuracy_score(y_pred, Y))
49+
50+
X_new, Y_new = get_dataset2(return_X_y=True)
51+
print(X_new.shape)
52+
print(Y_new.shape)
53+
54+
55+
'''
56+
Problem is this: check_lr_fit and check_cl_fit do different things because the coefficients for Logistic/Linear regression are different.
57+
So we need to change check_cl_fit
58+
'''
59+
scaler = StandardScaler()
60+
X_new = scaler.fit_transform(X_new)
61+
regressor = LinearRegression()
62+
63+
regressor.fit(X_new, Y_new)
64+
model2 = PCovR(regressor = regressor)
65+
print(model2.regressor.coef_)
66+
67+
68+
69+
70+
# model = KernelPCovC(
71+
# mixing=0.5,
72+
# classifier=SVC(),
73+
# n_components=4
74+
# )
75+
76+
# model2 = KernelPCovR(
77+
# mixing=0.5,
78+
# regressor=KernelRidge(gamma="scale"),
79+
# n_components=4
80+
# )
81+
# model3 = SVC()
82+
# model3.fit(X, Y)
83+
# print(model3.dual_coef_.shape)
84+
# # print(model2.gamma, model2.regressor.gamma)
85+
# # model2.fit(X, Y)

tests/playground.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

0 commit comments

Comments
 (0)