88from . import _ding as ding2023
99from . import _ishii2015 as ishii2015
1010from . import _tylers as tyler
11+ from .utils import validate_data_matrix
1112
1213
1314def ahmad_2015_two_sample (X , Y ):
@@ -29,6 +30,8 @@ def ahmad_2015_two_sample(X, Y):
2930 - 'stat': test statistic
3031 - 'p_value': p-value based on asymptotic normality
3132 """
33+ X = validate_data_matrix (X )
34+ Y = validate_data_matrix (Y )
3235 n1 , p = X .shape
3336 n2 = Y .shape [0 ]
3437
@@ -54,7 +57,7 @@ def ahmad_2015_two_sample(X, Y):
5457 }
5558
5659
57- def boxm_test (x , y , type = "chi.squared" ):
60+ def boxm_test (X , Y , type = "chi.squared" ):
5861 """
5962 Test equality of two covariance matrices via Box's M test.
6063
@@ -66,9 +69,9 @@ def boxm_test(x, y, type="chi.squared"):
6669
6770 Parameters
6871 ----------
69- x : array-like of shape (n_samples_x, n_features)
72+ X : array-like of shape (n_samples_x, n_features)
7073 First data matrix. Rows are samples and columns are features.
71- y : array-like of shape (n_samples_y, n_features)
74+ Y : array-like of shape (n_samples_y, n_features)
7275 Second data matrix. Rows are samples and columns are features.
7376 type : {"chi.squared", "F"}, default="chi.squared"
7477 Reference distribution used to compute the p-value.
@@ -91,7 +94,7 @@ def boxm_test(x, y, type="chi.squared"):
9194 Raises
9295 ------
9396 ValueError
94- If ``x `` and ``y `` do not have the same number of columns
97+ If ``X `` and ``Y `` do not have the same number of columns
9598 (features).
9699 ValueError
97100 If ``n_features >= n_samples_x`` or ``n_features >= n_samples_y``.
@@ -140,20 +143,20 @@ def boxm_test(x, y, type="chi.squared"):
140143 >>> res_alt["p_value"] < 0.05
141144 True
142145 """
143- x = np . asarray ( x )
144- y = np . asarray ( y )
146+ X = validate_data_matrix ( X )
147+ Y = validate_data_matrix ( Y )
145148
146- if x .shape [1 ] != y .shape [1 ]:
149+ if X .shape [1 ] != Y .shape [1 ]:
147150 raise ValueError ("Dimensions do not match" )
148- if x .shape [1 ] >= x .shape [0 ] or y .shape [1 ] >= y .shape [0 ]:
151+ if X .shape [1 ] >= X .shape [0 ] or Y .shape [1 ] >= Y .shape [0 ]:
149152 raise ValueError ("This is not a high dimensional test" )
150153
151- n , p = x .shape
152- m = y .shape [0 ]
154+ n , p = X .shape
155+ m = Y .shape [0 ]
153156
154157 # Sample covariance matrices
155- s1 = np .cov (x , rowvar = False , bias = False )
156- s2 = np .cov (y , rowvar = False , bias = False )
158+ s1 = np .cov (X , rowvar = False , bias = False )
159+ s2 = np .cov (Y , rowvar = False , bias = False )
157160
158161 # Pooled covariance
159162 s_pooled = ((n - 1 ) * s1 + (m - 1 ) * s2 ) / (n + m - 2 )
@@ -220,6 +223,8 @@ def ishii_two_sample(X1, X2, test="full"):
220223 -------
221224 result : dict
222225 """
226+ X1 = validate_data_matrix (X1 )
227+ X2 = validate_data_matrix (X2 )
223228 if X1 .shape [1 ] < 1000 :
224229 raise Warning (
225230 "Ishii et al (2015) known to be unreliable when d is small"
@@ -321,6 +326,8 @@ def schott_2001(X, Y):
321326 Schott (2001) homogeneity of covariance matrices test.
322327 Uses asymptotic normal distribution under H0.
323328 """
329+ X = validate_data_matrix (X )
330+ Y = validate_data_matrix (Y )
324331 # k = len(x)
325332 k = 2
326333 p = X .shape [1 ]
@@ -427,6 +434,8 @@ def _srivastava_yanagihara_stat(x):
427434
428435
429436def srivastava_yanagihara_two_sample (X , Y ):
437+ X = validate_data_matrix (X )
438+ Y = validate_data_matrix (Y )
430439 matrix_ls = [X , Y ]
431440
432441 # Compute the statistic
@@ -519,6 +528,8 @@ def _srivastava_2007_stat(x):
519528
520529
521530def srivastava_two_sample_2007 (X , Y ):
531+ X = validate_data_matrix (X )
532+ Y = validate_data_matrix (Y )
522533 matrix_ls = [X , Y ]
523534
524535 # Compute the statistic
@@ -535,7 +546,7 @@ def srivastava_two_sample_2007(X, Y):
535546 return results
536547
537548
538- def wald_two_sample (x , y ):
549+ def wald_two_sample (X , Y ):
539550 """
540551 Two-sample Wald test for equality of covariance matrices.
541552
@@ -547,10 +558,10 @@ def wald_two_sample(x, y):
547558
548559 Parameters
549560 ----------
550- x : array-like, shape (n, p)
561+ X : array-like, shape (n, p)
551562 Data matrix for group 1 with rows as samples and columns as
552563 variables.
553- y : array-like, shape (m, p)
564+ Y : array-like, shape (m, p)
554565 Data matrix for group 2 with rows as samples and columns as
555566 variables.
556567
@@ -561,20 +572,20 @@ def wald_two_sample(x, y):
561572 - "test_statistic": the Wald test statistic
562573 - "p_value": the corresponding chi-squared p-value
563574 """
564- x = np . asarray ( x )
565- y = np . asarray ( y )
575+ X = validate_data_matrix ( X )
576+ Y = validate_data_matrix ( Y )
566577
567- if x .shape [1 ] != y .shape [1 ]:
578+ if X .shape [1 ] != Y .shape [1 ]:
568579 raise ValueError ("Dimensions do not match" )
569- if x .shape [1 ] >= x .shape [0 ] or y .shape [1 ] >= y .shape [0 ]:
580+ if X .shape [1 ] >= X .shape [0 ] or Y .shape [1 ] >= Y .shape [0 ]:
570581 raise ValueError ("This is not a high dimensional test" )
571582
572- n , p = x .shape
573- m = y .shape [0 ]
583+ n , p = X .shape
584+ m = Y .shape [0 ]
574585
575586 # Sample covariances
576- s1 = np .cov (x , rowvar = False , bias = False )
577- s2 = np .cov (y , rowvar = False , bias = False )
587+ s1 = np .cov (X , rowvar = False , bias = False )
588+ s2 = np .cov (Y , rowvar = False , bias = False )
578589
579590 # Pooled covariance
580591 s_pooled = ((n - 1 ) * s1 + (m - 1 ) * s2 ) / (n + m - 2 )
@@ -608,6 +619,8 @@ def wald_two_sample(x, y):
608619
609620
610621def tyler_two_sample (X1 , X2 , unknown_mean = False ):
622+ X1 = validate_data_matrix (X1 )
623+ X2 = validate_data_matrix (X2 )
611624 n1 , p = X1 .shape
612625 n2 , _ = X2 .shape
613626
0 commit comments