Docstring and other skmatter/decomposition suggestions

rvasav26 · rvasav26 · commit 5e619ae8103a · 2025-06-25T17:29:36.000-05:00
diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py
@@ -22,9 +22,11 @@
 
 
 class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
-    r"""Kernel Principal Covariates Classification is a modification on the Principal
-    Covariates Classification proposed in [Jorgensen2025]_. It determines a latent-space
-    projection :math:`\mathbf{T}` which minimizes a combined loss in supervised and unsupervised
+    r"""Kernel Principal Covariates Classification (KPCovC).
+
+    KPCovC is a modification on the PrincipalCovariates Classification
+    proposed in [Jorgensen2025]_.  It determines  a latent-space projection
+    :math:`\mathbf{T}` which minimizes a combined loss in supervised and unsupervised
     tasks in the reproducing kernel Hilbert space (RKHS).
 
     This projection is determined by the eigendecomposition of a modified gram matrix
@@ -69,16 +71,22 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
             run randomized SVD by the method of Halko et al.
 
     classifier: `estimator object` or `precomputed`, default=None
-        classifier for computing :math:`{\mathbf{Z}}`. The classifier should be one of
-        `sklearn.linear_model.LogisticRegression`, `sklearn.linear_model.LogisticRegressionCV`,
-        `sklearn.svm.LinearSVC`, `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`,
-        `sklearn.linear_model.RidgeClassifier`, `sklearn.linear_model.RidgeClassifierCV`,
-        `sklearn.linear_model.SGDClassifier`, or `Perceptron`.
+        classifier for computing :math:`{\mathbf{Z}}`. The classifier should be
+        one of the following:
+
+        - ``sklearn.linear_model.LogisticRegression()``
+        - ``sklearn.linear_model.LogisticRegressionCV()``
+        - ``sklearn.svm.LinearSVC()``
+        - ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
+        - ``sklearn.linear_model.RidgeClassifier()``
+        - ``sklearn.linear_model.RidgeClassifierCV()``
+        - ``sklearn.linear_model.Perceptron()``
+
         If a pre-fitted classifier is provided, it is used to compute :math:`{\mathbf{Z}}`.
         If None, ``sklearn.linear_model.LogisticRegression()``
         is used as the classifier.
 
-    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='linear'
+    kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear"
         Kernel.
 
     gamma : {'scale', 'auto'} or float, default=None
@@ -223,8 +231,9 @@ def __init__(
         self.classifier = classifier
 
     def fit(self, X, Y, W=None):
-        r"""Fit the model with X and Y. A computed kernel K is
-        derived from X, and W is taken from the
+        r"""Fit the model with X and Y.
+
+        A computed kernel K is derived from X, and W is taken from the
         coefficients of a linear classifier fit between K and Y to compute
         Z:
 
@@ -265,7 +274,7 @@ def fit(self, X, Y, W=None):
 
         super().fit(X)
 
-        K = super()._get_kernel(X)
+        K = self._get_kernel(X)
 
         if self.center:
             self.centerer_ = KernelNormalizer()
diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py
@@ -10,10 +10,11 @@
 
 
 class KernelPCovR(_BaseKPCov):
-    r"""Kernel Principal Covariates Regression, as described in [Helfrecht2020]_,
-    determines a latent-space projection :math:`\mathbf{T}` which minimizes a combined
-    loss in supervised and unsupervised tasks in the reproducing kernel Hilbert space
-    (RKHS).
+    r"""Kernel Principal Covariates Regression (KPCovR).
+
+    As described in [Helfrecht2020]_, KPCovR determines a latent-space projection
+    :math:`\mathbf{T}` which minimizes a combined loss in supervised and unsupervised
+    tasks in the reproducing kernel Hilbert space (RKHS).
 
     This projection is determined by the eigendecomposition of a modified gram matrix
     :math:`\mathbf{\tilde{K}}`
@@ -243,7 +244,7 @@ def fit(self, X, Y, W=None):
 
         super().fit(X)
 
-        K = super()._get_kernel(X)
+        K = self._get_kernel(X)
 
         if self.center:
             self.centerer_ = KernelNormalizer()
@@ -382,7 +383,9 @@ def inverse_transform(self, T):
 
     def score(self, X, y):
         r"""Computes the (negative) loss values for KernelPCovR on the given predictor
-        and response variables. The loss in :math:`\mathbf{K}`, as explained in
+        and response variables.
+
+        The loss in :math:`\mathbf{K}`, as explained in
         [Helfrecht2020]_ does not correspond to a traditional Gram loss
         :math:`\mathbf{K} - \mathbf{TT}^T`. Indicating the kernel between set A and B as
         :math:`\mathbf{K}_{AB}`, the projection of set A as :math:`\mathbf{T}_A`, and
diff --git a/src/skmatter/decomposition/_kpcov.py b/src/skmatter/decomposition/_kpcov.py
@@ -21,6 +21,12 @@
 
 
 class _BaseKPCov(_BasePCA, LinearModel, metaclass=ABCMeta):
+    """Base class for KernelPCovR and KernelPCovC methods.
+
+    Warning: This class should not be used directly.
+    Use derived classes instead.
+    """
+
     @abstractmethod
     def __init__(
         self,
@@ -68,7 +74,6 @@ def _get_kernel(self, X, Y=None):
             X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, **params
         )
 
-    @abstractmethod
     def fit(self, X):
         """Contains the common functionality for the KPCovR and KPCovC fit methods,
         but leaves the rest of the functionality to the subclass.
diff --git a/src/skmatter/decomposition/_pcov.py b/src/skmatter/decomposition/_pcov.py
@@ -21,6 +21,12 @@
 
 
 class _BasePCov(_BasePCA, LinearModel, metaclass=ABCMeta):
+    """Base class for PCovR and PCovC methods.
+
+    Warning: This class should not be used directly.
+    Use derived classes instead.
+    """
+
     @abstractmethod
     def __init__(
         self,
@@ -42,7 +48,6 @@ def __init__(
         self.random_state = random_state
         self.whiten = whiten
 
-    @abstractmethod
     def fit(self, X):
         """Contains the common functionality for the PCovR and PCovC fit methods,
         but leaves the rest of the functionality to the subclass.
diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py
@@ -19,9 +19,11 @@
 
 
 class PCovC(LinearClassifierMixin, _BasePCov):
-    r"""Principal Covariates Classification, as described in [Jorgensen2025]_,
-    determines a latent-space projection :math:`\mathbf{T}`
-    which minimizes a combined loss in supervised and unsupervised tasks.
+    r"""Principal Covariates Classification (PCovC).
+
+    As described in [Jorgensen2025]_, PCovC determines a latent-space projection
+    :math:`\mathbf{T}` which minimizes a combined loss in supervised and
+    unsupervised tasks.
 
     This projection is determined by the eigendecomposition of a modified gram
     matrix :math:`\mathbf{\tilde{K}}`
@@ -99,12 +101,19 @@ class PCovC(LinearClassifierMixin, _BasePCov):
         default=`sample` when :math:`{n_{samples} < n_{features}}` and
         `feature` when :math:`{n_{features} < n_{samples}}`
 
-    classifier: `estimator object` or `precomputed`, default=None
-        classifier for computing :math:`{\mathbf{Z}}`. The classifier should be one of
-        `sklearn.linear_model.LogisticRegression`, `sklearn.linear_model.LogisticRegressionCV`,
-        `sklearn.svm.LinearSVC`, `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`,
-        `sklearn.linear_model.RidgeClassifier`, `sklearn.linear_model.RidgeClassifierCV`,
-        `sklearn.linear_model.SGDClassifier`, or `Perceptron`. If a pre-fitted classifier
+     classifier: `estimator object` or `precomputed`, default=None
+        classifier for computing :math:`{\mathbf{Z}}`. The classifier should be
+        one of the following:
+
+        - ``sklearn.linear_model.LogisticRegression()``
+        - ``sklearn.linear_model.LogisticRegressionCV()``
+        - ``sklearn.svm.LinearSVC()``
+        - ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
+        - ``sklearn.linear_model.RidgeClassifier()``
+        - ``sklearn.linear_model.RidgeClassifierCV()``
+        - ``sklearn.linear_model.Perceptron()``
+
+        If a pre-fitted classifier
         is provided, it is used to compute :math:`{\mathbf{Z}}`.
         Note that any pre-fitting of the classifier will be lost if `PCovC` is
         within a composite estimator that enforces cloning, e.g.,
@@ -218,9 +227,10 @@ def __init__(
         self.classifier = classifier
 
     def fit(self, X, Y, W=None):
-        r"""Fit the model with X and Y. Note that W is taken from the
-        coefficients of a linear classifier fit between X and Y to compute
-        Z:
+        r"""Fit the model with X and Y.
+
+        Note that W is taken from the coefficients of a linear classifier fit
+        between X and Y to compute Z:
 
         .. math::
             \mathbf{Z} = \mathbf{X} \mathbf{W}
diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py
@@ -9,9 +9,11 @@
 
 
 class PCovR(RegressorMixin, MultiOutputMixin, _BasePCov):
-    r"""Principal Covariates Regression, as described in [deJong1992]_,
-    determines a latent-space projection :math:`\mathbf{T}` which
-    minimizes a combined loss in supervised and unsupervised tasks.
+    r"""Principal Covariates Regression (PCovR).
+
+    As described in [deJong1992]_, PCovR determines a latent-space projection
+    :math:`\mathbf{T}` which minimizes a combined loss in supervised and
+    unsupervised tasks.
 
     This projection is determined by the eigendecomposition of a modified gram
     matrix :math:`\mathbf{\tilde{K}}`