Skip to content

Commit 55c0637

Browse files
Merge pull request #6 from bystrogenomics/feature/standardize2
Standardized
2 parents b8d086f + 60e0248 commit 55c0637

File tree

2 files changed

+51
-31
lines changed

2 files changed

+51
-31
lines changed

covtest/methods/hypothesis_proportionality.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from . import _flurry_1986 as flurry1986
1313
from . import _liu_2014 as liu2014
1414
from . import _tsukuda_2019 as tsukuda2019
15+
from .utils import validate_data_matrix
1516

1617
ArrayLike = np.ndarray
1718

@@ -21,7 +22,7 @@
2122

2223

2324
def flury_proportionality_test(
24-
X_in,
25+
X,
2526
Y,
2627
max_iter: int = 1000,
2728
tol: float = 1e-9,
@@ -40,7 +41,9 @@ def flury_proportionality_test(
4041
out : dict
4142
See flury_proportionality_test_from_cov.
4243
"""
43-
X_list = [X_in, Y]
44+
X = validate_data_matrix(X)
45+
Y = validate_data_matrix(Y)
46+
X_list = [X, Y]
4447
S_list = []
4548
n_list = []
4649
for X in X_list:
@@ -120,6 +123,8 @@ def bartlett_adjusted_proportionality_test(
120123
- 'pvalue_adj' : Bartlett-adjusted p-value (χ²_df right-tail)
121124
- 'Sigma_hat', 'c_hat', 'converged', 'iterations', 'n_list', 'S_list'
122125
"""
126+
X = validate_data_matrix(X)
127+
Y = validate_data_matrix(Y)
123128
X_groups = [X, Y]
124129
rng = np.random.default_rng(random_state)
125130

@@ -212,8 +217,8 @@ def proportionality_test_LZ(X, Y, regularize=0.0):
212217
-----
213218
Requires p < n2 = N2 - 1.
214219
"""
215-
X = np.asarray(X, float)
216-
Y = np.asarray(Y, float)
220+
X = validate_data_matrix(X)
221+
Y = validate_data_matrix(Y)
217222
N1, p = X.shape
218223
N2, p2 = Y.shape
219224
assert p == p2, "X and Y must have the same number of columns (p)."
@@ -327,8 +332,8 @@ def proportionality_test_signs(
327332
Use asymptotic calibrations when permutation is too costly or when exchangeability
328333
is not appropriate, but expect some approximation error in small samples.
329334
"""
330-
X = np.asarray(X, dtype=float)
331-
Y = np.asarray(Y, dtype=float)
335+
X = validate_data_matrix(X)
336+
Y = validate_data_matrix(Y)
332337
n1, p1 = X.shape
333338
n2, p2 = Y.shape
334339
if p1 != p2:
@@ -427,6 +432,8 @@ def proportionality_plrt(X, Y, dist_moments="gaussian"):
427432
raise NotImplementedError("Only Gaussian case implemented.")
428433

429434
# T1 statistic
435+
X = validate_data_matrix(X)
436+
Y = validate_data_matrix(Y)
430437
A = S1 @ inv(S2)
431438
tr_term = np.trace(A) / p
432439
sign, logdet = slogdet(A)
@@ -553,8 +560,8 @@ def proportional_cov_test_tsukuda(
553560
- Since Sx and Sy are independent unbiased estimators of Sigma_x and
554561
Sigma_y, tr(Sx * Sy) is an unbiased estimator of tr(Sigma_x * Sigma_y).
555562
"""
556-
X = np.asarray(X, dtype=float)
557-
Y = np.asarray(Y, dtype=float)
563+
X = validate_data_matrix(X)
564+
Y = validate_data_matrix(Y)
558565
if X.ndim != 2 or Y.ndim != 2:
559566
raise ValueError("X and Y must be 2D arrays (observations × features).")
560567
m, pX = X.shape

covtest/methods/hypothesis_two_sample.py

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from . import _ding as ding2023
99
from . import _ishii2015 as ishii2015
1010
from . import _tylers as tyler
11+
from .utils import validate_data_matrix
1112

1213

1314
def ahmad_2015_two_sample(X, Y):
@@ -29,6 +30,8 @@ def ahmad_2015_two_sample(X, Y):
2930
- 'stat': test statistic
3031
- 'p_value': p-value based on asymptotic normality
3132
"""
33+
X = validate_data_matrix(X)
34+
Y = validate_data_matrix(Y)
3235
n1, p = X.shape
3336
n2 = Y.shape[0]
3437

@@ -54,7 +57,7 @@ def ahmad_2015_two_sample(X, Y):
5457
}
5558

5659

57-
def boxm_test(x, y, type="chi.squared"):
60+
def boxm_test(X, Y, type="chi.squared"):
5861
"""
5962
Test equality of two covariance matrices via Box's M test.
6063
@@ -66,9 +69,9 @@ def boxm_test(x, y, type="chi.squared"):
6669
6770
Parameters
6871
----------
69-
x : array-like of shape (n_samples_x, n_features)
72+
X : array-like of shape (n_samples_x, n_features)
7073
First data matrix. Rows are samples and columns are features.
71-
y : array-like of shape (n_samples_y, n_features)
74+
Y : array-like of shape (n_samples_y, n_features)
7275
Second data matrix. Rows are samples and columns are features.
7376
type : {"chi.squared", "F"}, default="chi.squared"
7477
Reference distribution used to compute the p-value.
@@ -91,7 +94,7 @@ def boxm_test(x, y, type="chi.squared"):
9194
Raises
9295
------
9396
ValueError
94-
If ``x`` and ``y`` do not have the same number of columns
97+
If ``X`` and ``Y`` do not have the same number of columns
9598
(features).
9699
ValueError
97100
If ``n_features >= n_samples_x`` or ``n_features >= n_samples_y``.
@@ -140,20 +143,20 @@ def boxm_test(x, y, type="chi.squared"):
140143
>>> res_alt["p_value"] < 0.05
141144
True
142145
"""
143-
x = np.asarray(x)
144-
y = np.asarray(y)
146+
X = validate_data_matrix(X)
147+
Y = validate_data_matrix(Y)
145148

146-
if x.shape[1] != y.shape[1]:
149+
if X.shape[1] != Y.shape[1]:
147150
raise ValueError("Dimensions do not match")
148-
if x.shape[1] >= x.shape[0] or y.shape[1] >= y.shape[0]:
151+
if X.shape[1] >= X.shape[0] or Y.shape[1] >= Y.shape[0]:
149152
raise ValueError("This is not a high dimensional test")
150153

151-
n, p = x.shape
152-
m = y.shape[0]
154+
n, p = X.shape
155+
m = Y.shape[0]
153156

154157
# Sample covariance matrices
155-
s1 = np.cov(x, rowvar=False, bias=False)
156-
s2 = np.cov(y, rowvar=False, bias=False)
158+
s1 = np.cov(X, rowvar=False, bias=False)
159+
s2 = np.cov(Y, rowvar=False, bias=False)
157160

158161
# Pooled covariance
159162
s_pooled = ((n - 1) * s1 + (m - 1) * s2) / (n + m - 2)
@@ -220,6 +223,8 @@ def ishii_two_sample(X1, X2, test="full"):
220223
-------
221224
result : dict
222225
"""
226+
X1 = validate_data_matrix(X1)
227+
X2 = validate_data_matrix(X2)
223228
if X1.shape[1] < 1000:
224229
raise Warning(
225230
"Ishii et al (2015) known to be unreliable when d is small"
@@ -321,6 +326,8 @@ def schott_2001(X, Y):
321326
Schott (2001) homogeneity of covariance matrices test.
322327
Uses asymptotic normal distribution under H0.
323328
"""
329+
X = validate_data_matrix(X)
330+
Y = validate_data_matrix(Y)
324331
# k = len(x)
325332
k = 2
326333
p = X.shape[1]
@@ -427,6 +434,8 @@ def _srivastava_yanagihara_stat(x):
427434

428435

429436
def srivastava_yanagihara_two_sample(X, Y):
437+
X = validate_data_matrix(X)
438+
Y = validate_data_matrix(Y)
430439
matrix_ls = [X, Y]
431440

432441
# Compute the statistic
@@ -519,6 +528,8 @@ def _srivastava_2007_stat(x):
519528

520529

521530
def srivastava_two_sample_2007(X, Y):
531+
X = validate_data_matrix(X)
532+
Y = validate_data_matrix(Y)
522533
matrix_ls = [X, Y]
523534

524535
# Compute the statistic
@@ -535,7 +546,7 @@ def srivastava_two_sample_2007(X, Y):
535546
return results
536547

537548

538-
def wald_two_sample(x, y):
549+
def wald_two_sample(X, Y):
539550
"""
540551
Two-sample Wald test for equality of covariance matrices.
541552
@@ -547,10 +558,10 @@ def wald_two_sample(x, y):
547558
548559
Parameters
549560
----------
550-
x : array-like, shape (n, p)
561+
X : array-like, shape (n, p)
551562
Data matrix for group 1 with rows as samples and columns as
552563
variables.
553-
y : array-like, shape (m, p)
564+
Y : array-like, shape (m, p)
554565
Data matrix for group 2 with rows as samples and columns as
555566
variables.
556567
@@ -561,20 +572,20 @@ def wald_two_sample(x, y):
561572
- "test_statistic": the Wald test statistic
562573
- "p_value": the corresponding chi-squared p-value
563574
"""
564-
x = np.asarray(x)
565-
y = np.asarray(y)
575+
X = validate_data_matrix(X)
576+
Y = validate_data_matrix(Y)
566577

567-
if x.shape[1] != y.shape[1]:
578+
if X.shape[1] != Y.shape[1]:
568579
raise ValueError("Dimensions do not match")
569-
if x.shape[1] >= x.shape[0] or y.shape[1] >= y.shape[0]:
580+
if X.shape[1] >= X.shape[0] or Y.shape[1] >= Y.shape[0]:
570581
raise ValueError("This is not a high dimensional test")
571582

572-
n, p = x.shape
573-
m = y.shape[0]
583+
n, p = X.shape
584+
m = Y.shape[0]
574585

575586
# Sample covariances
576-
s1 = np.cov(x, rowvar=False, bias=False)
577-
s2 = np.cov(y, rowvar=False, bias=False)
587+
s1 = np.cov(X, rowvar=False, bias=False)
588+
s2 = np.cov(Y, rowvar=False, bias=False)
578589

579590
# Pooled covariance
580591
s_pooled = ((n - 1) * s1 + (m - 1) * s2) / (n + m - 2)
@@ -608,6 +619,8 @@ def wald_two_sample(x, y):
608619

609620

610621
def tyler_two_sample(X1, X2, unknown_mean=False):
622+
X1 = validate_data_matrix(X1)
623+
X2 = validate_data_matrix(X2)
611624
n1, p = X1.shape
612625
n2, _ = X2.shape
613626

0 commit comments

Comments
 (0)