scikit-learn-contrib
diff --git a/‎examples/pcovc/PCovC-BreastCancerDataset.ipynb‎
Lines changed: 12 additions & 10 deletions b/‎examples/pcovc/PCovC-BreastCancerDataset.ipynb‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎examples/pcovc/PCovC-IrisDataset.ipynb‎
Lines changed: 30 additions & 24 deletions b/‎examples/pcovc/PCovC-IrisDataset.ipynb‎
Lines changed: 30 additions & 24 deletions
diff --git a/‎src/skmatter/decomposition/_base_pcov.py‎
Lines changed: 2 additions & 0 deletions b/‎src/skmatter/decomposition/_base_pcov.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/kernel_pcovc.py‎ renamed to ‎src/skmatter/decomposition/_kernel_pcovc.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/kernel_pcovc.py‎ renamed to ‎src/skmatter/decomposition/_kernel_pcovc.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/pcovc.py‎ renamed to ‎src/skmatter/decomposition/_pcovc.py‎
Lines changed: 10 additions & 5 deletions b/‎tests/pcovc.py‎ renamed to ‎src/skmatter/decomposition/_pcovc.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎tests/kernel_pcovr.py‎ renamed to ‎src/skmatter/decomposition/kernel_pcovr_comments.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/kernel_pcovr.py‎ renamed to ‎src/skmatter/decomposition/kernel_pcovr_comments.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/pcovr.py‎ renamed to ‎src/skmatter/decomposition/pcovr_comments.py‎ b/‎tests/pcovr.py‎ renamed to ‎src/skmatter/decomposition/pcovr_comments.py‎
diff --git a/‎src/skmatter/decomposition/playground.py‎
Lines changed: 85 additions & 0 deletions b/‎src/skmatter/decomposition/playground.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎tests/playground.py‎
Lines changed: 0 additions & 47 deletions b/‎tests/playground.py‎
Lines changed: 0 additions & 47 deletions
@@ -23,7 +23,9 @@
     "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
     "from sklearn.linear_model import LogisticRegressionCV\n",
     "\n",
-    "from pcovc import PCovC\n",
+    "import sys\n",
+    "sys.path.append('../../')\n",
+    "from src.skmatter.decomposition._pcovc import PCovC\n",
     "\n",
     "plt.rcParams[\"image.cmap\"] = \"tab10\"\n",
     "plt.rcParams['scatter.edgecolors'] = \"k\"\n",
@@ -40,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -188,7 +190,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -208,16 +210,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.legend.Legend at 0x117e29160>"
+       "<matplotlib.legend.Legend at 0x11a62f610>"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -256,16 +258,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1180c9a90>"
+       "<matplotlib.collections.PathCollection at 0x11a6d3390>"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 47,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -300,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
 
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -22,7 +22,9 @@
     "from sklearn.linear_model import LogisticRegressionCV, RidgeClassifierCV, SGDClassifier\n",
     "from sklearn.inspection import DecisionBoundaryDisplay\n",
     "\n",
-    "from pcovc import PCovC\n",
+    "import sys\n",
+    "sys.path.append('../../')\n",
+    "from src.skmatter.decomposition._pcovc import PCovC\n",
     "\n",
     "plt.rcParams[\"image.cmap\"] = \"tab10\"\n",
     "plt.rcParams['scatter.edgecolors'] = \"k\"\n",
@@ -40,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -94,22 +96,26 @@
       "type of iris plant.  One class is linearly separable from the other 2; the\n",
       "latter are NOT linearly separable from each other.\n",
       "\n",
-      ".. dropdown:: References\n",
+      "|details-start|\n",
+      "**References**\n",
+      "|details-split|\n",
       "\n",
-      "  - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n",
-      "    Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n",
-      "    Mathematical Statistics\" (John Wiley, NY, 1950).\n",
-      "  - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n",
-      "    (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n",
-      "  - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n",
-      "    Structure and Classification Rule for Recognition in Partially Exposed\n",
-      "    Environments\".  IEEE Transactions on Pattern Analysis and Machine\n",
-      "    Intelligence, Vol. PAMI-2, No. 1, 67-71.\n",
-      "  - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\n",
-      "    on Information Theory, May 1972, 431-433.\n",
-      "  - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\n",
-      "    conceptual clustering system finds 3 classes in the data.\n",
-      "  - Many, many more ...\n",
+      "- Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n",
+      "  Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n",
+      "  Mathematical Statistics\" (John Wiley, NY, 1950).\n",
+      "- Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n",
+      "  (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n",
+      "- Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n",
+      "  Structure and Classification Rule for Recognition in Partially Exposed\n",
+      "  Environments\".  IEEE Transactions on Pattern Analysis and Machine\n",
+      "  Intelligence, Vol. PAMI-2, No. 1, 67-71.\n",
+      "- Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\n",
+      "  on Information Theory, May 1972, 431-433.\n",
+      "- See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\n",
+      "  conceptual clustering system finds 3 classes in the data.\n",
+      "- Many, many more ...\n",
+      "\n",
+      "|details-end|\n",
       "\n"
      ]
     }
@@ -129,7 +135,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -149,16 +155,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.legend.Legend at 0x117392f90>"
+       "<matplotlib.legend.Legend at 0x118a1de80>"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -197,7 +203,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -249,7 +255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
 
@@ -0,0 +1,2 @@
+
+
@@ -17,8 +17,8 @@
 from sklearn.utils._array_api import get_namespace, indexing_dtype
 from sklearn.svm import SVC
 
-from skmatter.preprocessing import KernelNormalizer
-from skmatter.utils import check_krr_fit, pcovr_kernel
+from ..preprocessing import KernelNormalizer
+from ..utils import check_krr_fit, pcovr_kernel
 
 
 class KernelPCovC(_BasePCA, LinearModel):
 
@@ -91,13 +91,18 @@ def check_cl_fit(classifier, X, y):
         fitted_classifier._validate_data(X, y, reset=False, multi_output=True)
 
         # Check compatibility with y
-        if fitted_classifier.coef_.ndim != y.ndim:
+
+        # changed from if fitted_classifier.coef_.ndim != y.ndim:
+        # dimension of classifier coefficients is always 2, hence we don't need to check 
+        # for match with Y
+        if fitted_classifier.coef_.shape[1] != X.shape[1]:
             raise ValueError(
-                "The classifier coefficients have a dimension incompatible "
-                "with the supplied target space. "
-                "The coefficients have dimension %d and the targets "
-                "have dimension %d" % (fitted_classifier.coef_.ndim, y.ndim)
+                "The classifier coefficients have a shape incompatible "
+                "with the supplied feature space. "
+                "The coefficients have shape %d and the features "
+                "have shape %d" % (fitted_classifier.coef_.shape, X.shape)
             )
+        # LogisticRegression does not support multioutput, but RidgeClassifier does
         elif y.ndim == 2:
             if fitted_classifier.coef_.shape[0] != y.shape[1]:
                 raise ValueError(
 
@@ -14,8 +14,8 @@
 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
 from sklearn.utils.validation import check_is_fitted, check_X_y
 
-from skmatter.preprocessing import KernelNormalizer
-from skmatter.utils import check_krr_fit, pcovr_kernel
+from ..preprocessing import KernelNormalizer
+from ..utils import check_krr_fit, pcovr_kernel
 
 
 class KernelPCovR(_BasePCA, LinearModel):
 
@@ -0,0 +1,85 @@
+ 
+from sklearn.discriminant_analysis import StandardScaler
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.svm import SVC
+from _kernel_pcovc import KernelPCovC
+from _kernel_pcovr import KernelPCovR
+from _pcovc import PCovC
+from sklearn.datasets import load_breast_cancer as get_dataset
+from sklearn.datasets import load_diabetes as get_dataset2
+from sklearn.metrics import accuracy_score
+from _pcovr import PCovR
+
+X, Y = get_dataset(return_X_y=True)
+
+scaler = StandardScaler()
+X = scaler.fit_transform(X)
+print(X.shape)
+print(Y.shape)
+
+# classifier = LogisticRegression()
+# classifier.fit(X, Y)
+
+# print(classifier.coef_.ndim)
+
+# pcovc = PCovC(mixing=0.5, classifier=LogisticRegression())
+# print(pcovc.classifier.coef_.ndim)
+
+# pcovc.fit(X, Y)
+X = [[1, 2, 3, 4, 5],
+     [2, 3, 4, 5, 6]]
+Y = [[0, 1, 0, 1, 0],
+     [0, 1, 0, 1, 0]]
+
+classifier = LogisticRegression()
+classifier.fit(X, Y)
+model = PCovC(classifier=classifier)
+
+#model2 = PCovC(classifier=LogisticRegression())
+#model2.fit(X, Y)
+
+#problem is that coef_.shape (1, n_features=30) is not the same as 
+print(model.classifier.coef_.shape)
+#print(model2.classifier.coef_.ndim)
+
+model.fit(X, Y)
+y_pred = model.predict(X)
+print(accuracy_score(y_pred, Y))
+
+X_new, Y_new = get_dataset2(return_X_y=True)
+print(X_new.shape)
+print(Y_new.shape)
+
+
+'''
+Problem is this: check_lr_fit and check_cl_fit do different things because the coefficients for Logistic/Linear regression are different.
+So we need to change check_cl_fit
+'''
+scaler = StandardScaler()
+X_new = scaler.fit_transform(X_new)
+regressor = LinearRegression()
+
+regressor.fit(X_new, Y_new)
+model2 = PCovR(regressor = regressor)
+print(model2.regressor.coef_)
+
+
+
+
+# model = KernelPCovC(
+#             mixing=0.5,
+#             classifier=SVC(),
+#             n_components=4
+# )
+
+# model2 = KernelPCovR(
+#             mixing=0.5,
+#             regressor=KernelRidge(gamma="scale"),
+#             n_components=4
+# )
+# model3 = SVC()
+# model3.fit(X, Y)
+# print(model3.dual_coef_.shape)
+# # print(model2.gamma, model2.regressor.gamma)
+# # model2.fit(X, Y)