Merge pull request #6 from bystrogenomics/feature/standardize2

austinTalbot7241993 · web-flow · commit 55c06377beec · 2025-11-25T22:37:07.000-05:00
Standardized
diff --git a/covtest/methods/hypothesis_proportionality.py b/covtest/methods/hypothesis_proportionality.py
@@ -12,6 +12,7 @@
 from . import _flurry_1986 as flurry1986
 from . import _liu_2014 as liu2014
 from . import _tsukuda_2019 as tsukuda2019
+from .utils import validate_data_matrix
 
 ArrayLike = np.ndarray
 
@@ -21,7 +22,7 @@
 
 
 def flury_proportionality_test(
-    X_in,
+    X,
     Y,
     max_iter: int = 1000,
     tol: float = 1e-9,
@@ -40,7 +41,9 @@ def flury_proportionality_test(
     out : dict
         See flury_proportionality_test_from_cov.
     """
-    X_list = [X_in, Y]
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
+    X_list = [X, Y]
     S_list = []
     n_list = []
     for X in X_list:
@@ -120,6 +123,8 @@ def bartlett_adjusted_proportionality_test(
         - 'pvalue_adj'  : Bartlett-adjusted p-value    (χ²_df right-tail)
         - 'Sigma_hat', 'c_hat', 'converged', 'iterations', 'n_list', 'S_list'
     """
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     X_groups = [X, Y]
     rng = np.random.default_rng(random_state)
 
@@ -212,8 +217,8 @@ def proportionality_test_LZ(X, Y, regularize=0.0):
     -----
     Requires p < n2 = N2 - 1.
     """
-    X = np.asarray(X, float)
-    Y = np.asarray(Y, float)
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     N1, p = X.shape
     N2, p2 = Y.shape
     assert p == p2, "X and Y must have the same number of columns (p)."
@@ -327,8 +332,8 @@ def proportionality_test_signs(
     Use asymptotic calibrations when permutation is too costly or when exchangeability
     is not appropriate, but expect some approximation error in small samples.
     """
-    X = np.asarray(X, dtype=float)
-    Y = np.asarray(Y, dtype=float)
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     n1, p1 = X.shape
     n2, p2 = Y.shape
     if p1 != p2:
@@ -427,6 +432,8 @@ def proportionality_plrt(X, Y, dist_moments="gaussian"):
         raise NotImplementedError("Only Gaussian case implemented.")
 
     # T1 statistic
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     A = S1 @ inv(S2)
     tr_term = np.trace(A) / p
     sign, logdet = slogdet(A)
@@ -553,8 +560,8 @@ def proportional_cov_test_tsukuda(
     - Since Sx and Sy are independent unbiased estimators of Sigma_x and
       Sigma_y, tr(Sx * Sy) is an unbiased estimator of tr(Sigma_x * Sigma_y).
     """
-    X = np.asarray(X, dtype=float)
-    Y = np.asarray(Y, dtype=float)
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     if X.ndim != 2 or Y.ndim != 2:
         raise ValueError("X and Y must be 2D arrays (observations × features).")
     m, pX = X.shape
diff --git a/covtest/methods/hypothesis_two_sample.py b/covtest/methods/hypothesis_two_sample.py
@@ -8,6 +8,7 @@
 from . import _ding as ding2023
 from . import _ishii2015 as ishii2015
 from . import _tylers as tyler
+from .utils import validate_data_matrix
 
 
 def ahmad_2015_two_sample(X, Y):
@@ -29,6 +30,8 @@ def ahmad_2015_two_sample(X, Y):
         - 'stat': test statistic
         - 'p_value': p-value based on asymptotic normality
     """
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     n1, p = X.shape
     n2 = Y.shape[0]
 
@@ -54,7 +57,7 @@ def ahmad_2015_two_sample(X, Y):
     }
 
 
-def boxm_test(x, y, type="chi.squared"):
+def boxm_test(X, Y, type="chi.squared"):
     """
     Test equality of two covariance matrices via Box's M test.
 
@@ -66,9 +69,9 @@ def boxm_test(x, y, type="chi.squared"):
 
     Parameters
     ----------
-    x : array-like of shape (n_samples_x, n_features)
+    X : array-like of shape (n_samples_x, n_features)
         First data matrix. Rows are samples and columns are features.
-    y : array-like of shape (n_samples_y, n_features)
+    Y : array-like of shape (n_samples_y, n_features)
         Second data matrix. Rows are samples and columns are features.
     type : {"chi.squared", "F"}, default="chi.squared"
         Reference distribution used to compute the p-value.
@@ -91,7 +94,7 @@ def boxm_test(x, y, type="chi.squared"):
     Raises
     ------
     ValueError
-        If ``x`` and ``y`` do not have the same number of columns
+        If ``X`` and ``Y`` do not have the same number of columns
         (features).
     ValueError
         If ``n_features >= n_samples_x`` or ``n_features >= n_samples_y``.
@@ -140,20 +143,20 @@ def boxm_test(x, y, type="chi.squared"):
     >>> res_alt["p_value"] < 0.05
     True
     """
-    x = np.asarray(x)
-    y = np.asarray(y)
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
 
-    if x.shape[1] != y.shape[1]:
+    if X.shape[1] != Y.shape[1]:
         raise ValueError("Dimensions do not match")
-    if x.shape[1] >= x.shape[0] or y.shape[1] >= y.shape[0]:
+    if X.shape[1] >= X.shape[0] or Y.shape[1] >= Y.shape[0]:
         raise ValueError("This is not a high dimensional test")
 
-    n, p = x.shape
-    m = y.shape[0]
+    n, p = X.shape
+    m = Y.shape[0]
 
     # Sample covariance matrices
-    s1 = np.cov(x, rowvar=False, bias=False)
-    s2 = np.cov(y, rowvar=False, bias=False)
+    s1 = np.cov(X, rowvar=False, bias=False)
+    s2 = np.cov(Y, rowvar=False, bias=False)
 
     # Pooled covariance
     s_pooled = ((n - 1) * s1 + (m - 1) * s2) / (n + m - 2)
@@ -220,6 +223,8 @@ def ishii_two_sample(X1, X2, test="full"):
     -------
     result : dict
     """
+    X1 = validate_data_matrix(X1)
+    X2 = validate_data_matrix(X2)
     if X1.shape[1] < 1000:
         raise Warning(
             "Ishii et al (2015) known to be unreliable when d is small"
@@ -321,6 +326,8 @@ def schott_2001(X, Y):
     Schott (2001) homogeneity of covariance matrices test.
     Uses asymptotic normal distribution under H0.
     """
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     # k = len(x)
     k = 2
     p = X.shape[1]
@@ -427,6 +434,8 @@ def _srivastava_yanagihara_stat(x):
 
 
 def srivastava_yanagihara_two_sample(X, Y):
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     matrix_ls = [X, Y]
 
     # Compute the statistic
@@ -519,6 +528,8 @@ def _srivastava_2007_stat(x):
 
 
 def srivastava_two_sample_2007(X, Y):
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
     matrix_ls = [X, Y]
 
     # Compute the statistic
@@ -535,7 +546,7 @@ def srivastava_two_sample_2007(X, Y):
     return results
 
 
-def wald_two_sample(x, y):
+def wald_two_sample(X, Y):
     """
     Two-sample Wald test for equality of covariance matrices.
 
@@ -547,10 +558,10 @@ def wald_two_sample(x, y):
 
     Parameters
     ----------
-    x : array-like, shape (n, p)
+    X : array-like, shape (n, p)
         Data matrix for group 1 with rows as samples and columns as
         variables.
-    y : array-like, shape (m, p)
+    Y : array-like, shape (m, p)
         Data matrix for group 2 with rows as samples and columns as
         variables.
 
@@ -561,20 +572,20 @@ def wald_two_sample(x, y):
         - "test_statistic": the Wald test statistic
         - "p_value": the corresponding chi-squared p-value
     """
-    x = np.asarray(x)
-    y = np.asarray(y)
+    X = validate_data_matrix(X)
+    Y = validate_data_matrix(Y)
 
-    if x.shape[1] != y.shape[1]:
+    if X.shape[1] != Y.shape[1]:
         raise ValueError("Dimensions do not match")
-    if x.shape[1] >= x.shape[0] or y.shape[1] >= y.shape[0]:
+    if X.shape[1] >= X.shape[0] or Y.shape[1] >= Y.shape[0]:
         raise ValueError("This is not a high dimensional test")
 
-    n, p = x.shape
-    m = y.shape[0]
+    n, p = X.shape
+    m = Y.shape[0]
 
     # Sample covariances
-    s1 = np.cov(x, rowvar=False, bias=False)
-    s2 = np.cov(y, rowvar=False, bias=False)
+    s1 = np.cov(X, rowvar=False, bias=False)
+    s2 = np.cov(Y, rowvar=False, bias=False)
 
     # Pooled covariance
     s_pooled = ((n - 1) * s1 + (m - 1) * s2) / (n + m - 2)
@@ -608,6 +619,8 @@ def wald_two_sample(x, y):
 
 
 def tyler_two_sample(X1, X2, unknown_mean=False):
+    X1 = validate_data_matrix(X1)
+    X2 = validate_data_matrix(X2)
     n1, p = X1.shape
     n2, _ = X2.shape