Skip to content

Commit 5becb99

Browse files
author
Jaime Céspedes Sisniega
authored
Merge pull request #220 from IFCA/feature-mann-whitney-u-test
Add data drift method Mann-Whitney U test
2 parents ee84956 + f2a44b3 commit 5becb99

File tree

7 files changed

+74
-3
lines changed

7 files changed

+74
-3
lines changed

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,8 @@ The currently implemented detectors are listed in the following table.
284284
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.1007/978-3-540-75488-6_27">Nishida and Yamauchi (2007)</a></td>
285285
</tr>
286286
<tr>
287-
<td rowspan="14" style="text-align: center; border: 1px solid grey; padding: 8px;">Data drift</td>
288-
<td rowspan="12" style="text-align: center; border: 1px solid grey; padding: 8px;">Batch</td>
287+
<td rowspan="15" style="text-align: center; border: 1px solid grey; padding: 8px;">Data drift</td>
288+
<td rowspan="13" style="text-align: center; border: 1px solid grey; padding: 8px;">Batch</td>
289289
<td rowspan="8" style="text-align: center; border: 1px solid grey; padding: 8px;">Distance based</td>
290290
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
291291
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>
@@ -335,7 +335,7 @@ The currently implemented detectors are listed in the following table.
335335
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.1057/jors.2008.144">Wu and Olson (2010)</a></td>
336336
</tr>
337337
<tr>
338-
<td rowspan="4" style="text-align: center; border: 1px solid grey; padding: 8px;">Statistical test</td>
338+
<td rowspan="5" style="text-align: center; border: 1px solid grey; padding: 8px;">Statistical test</td>
339339
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
340340
<td style="text-align: center; border: 1px solid grey; padding: 8px;">C</td>
341341
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Chi-square test</td>
@@ -353,6 +353,12 @@ The currently implemented detectors are listed in the following table.
353353
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Kolmogorov-Smirnov test</td>
354354
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.2307/2280095">Massey Jr (1951)</a></td>
355355
</tr>
356+
<tr>
357+
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
358+
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>
359+
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Mann-Whitney U test</td>
360+
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.1214/aoms/1177730491">Mann and Whitney (1947)</a></td>
361+
</tr>
356362
<tr>
357363
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
358364
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>

docs/source/api_reference/detectors/data_drift/batch.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,6 @@ The {mod}`frouros.detectors.data_drift.batch` module contains batch data drift d
5050
ChiSquareTest
5151
CVMTest
5252
KSTest
53+
MannWhitneyUTest
5354
WelchTTest
5455
```

frouros/detectors/data_drift/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
KL,
1212
KSTest,
1313
PSI,
14+
MannWhitneyUTest,
1415
MMD,
1516
WelchTTest,
1617
)
@@ -29,6 +30,7 @@
2930
"KL",
3031
"KSTest",
3132
"PSI",
33+
"MannWhitneyUTest",
3234
"MMDStreaming",
3335
"WelchTTest",
3436
]

frouros/detectors/data_drift/batch/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
ChiSquareTest,
1515
CVMTest,
1616
KSTest,
17+
MannWhitneyUTest,
1718
WelchTTest,
1819
)
1920

@@ -28,6 +29,7 @@
2829
"KL",
2930
"KSTest",
3031
"PSI",
32+
"MannWhitneyUTest",
3133
"MMD",
3234
"WelchTTest",
3335
]

frouros/detectors/data_drift/batch/statistical_test/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
from .chisquare import ChiSquareTest
44
from .cvm import CVMTest
55
from .ks import KSTest
6+
from .mann_whitney_u import MannWhitneyUTest
67
from .welch_t_test import WelchTTest
78

89
__all__ = [
910
"ChiSquareTest",
1011
"CVMTest",
1112
"KSTest",
13+
"MannWhitneyUTest",
1214
"WelchTTest",
1315
]
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Mann-Whitney U test module."""
2+
3+
from typing import Optional, List, Union
4+
5+
import numpy as np # type: ignore
6+
from scipy.stats import mannwhitneyu # type: ignore
7+
8+
from frouros.callbacks.batch.base import BaseCallbackBatch
9+
from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10+
from frouros.detectors.data_drift.batch.statistical_test.base import (
11+
BaseStatisticalTest,
12+
StatisticalResult,
13+
)
14+
15+
16+
class MannWhitneyUTest(BaseStatisticalTest):
17+
"""Mann–Whitney U test [mann1947test]_ detector.
18+
19+
:References:
20+
21+
.. [mann1947test] Mann, Henry B., and Donald R. Whitney.
22+
"On a test of whether one of two random variables is stochastically larger than
23+
the other."
24+
The annals of mathematical statistics (1947): 50-60.
25+
"""
26+
27+
def __init__(
28+
self,
29+
callbacks: Optional[Union[BaseCallbackBatch, List[BaseCallbackBatch]]] = None,
30+
) -> None:
31+
"""Init method.
32+
33+
:param callbacks: callbacks
34+
:type callbacks: Optional[Union[BaseCallbackBatch, List[BaseCallbackBatch]]]
35+
"""
36+
super().__init__(
37+
data_type=NumericalData(),
38+
statistical_type=UnivariateData(),
39+
callbacks=callbacks,
40+
)
41+
42+
def _statistical_test(
43+
self, X_ref: np.ndarray, X: np.ndarray, **kwargs # noqa: N803
44+
) -> StatisticalResult:
45+
test = mannwhitneyu( # pylint: disable=unexpected-keyword-arg
46+
x=X_ref,
47+
y=X,
48+
alternative="two-sided",
49+
nan_policy="raise",
50+
**kwargs,
51+
)
52+
test = StatisticalResult(
53+
statistic=test.statistic,
54+
p_value=test.pvalue,
55+
)
56+
return test

frouros/tests/integration/test_data_drift.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ChiSquareTest,
2020
CVMTest,
2121
KSTest,
22+
MannWhitneyUTest,
2223
WelchTTest,
2324
)
2425
from frouros.detectors.data_drift.batch.base import BaseDataDriftBatch
@@ -161,6 +162,7 @@ def test_batch_distance_bins_based_univariate_same_distribution(
161162
[
162163
(CVMTest(), 3776.09848103, 5.38105056e-07),
163164
(KSTest(), 0.99576271, 0.0),
165+
(MannWhitneyUTest(), 6912.0, 0.0),
164166
(WelchTTest(), -287.92032554, 0.0),
165167
],
166168
)

0 commit comments

Comments
 (0)