Skip to content

Commit 99e47ba

Browse files
author
Jaime Céspedes Sisniega
authored
Merge pull request #242 from IFCA/feature-anderson-darling-test
Add data drift Anderson-Darling test
2 parents e5df711 + 18306f7 commit 99e47ba

File tree

7 files changed

+77
-3
lines changed

7 files changed

+77
-3
lines changed

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,15 @@ The currently implemented detectors are listed in the following table.
290290
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.1007/978-3-540-75488-6_27">Nishida and Yamauchi (2007)</a></td>
291291
</tr>
292292
<tr>
293-
<td rowspan="15" style="text-align: center; border: 1px solid grey; padding: 8px;">Data drift</td>
294-
<td rowspan="13" style="text-align: center; border: 1px solid grey; padding: 8px;">Batch</td>
295-
<td rowspan="8" style="text-align: center; border: 1px solid grey; padding: 8px;">Distance based</td>
293+
<td rowspan="16" style="text-align: center; border: 1px solid grey; padding: 8px;">Data drift</td>
294+
<td rowspan="14" style="text-align: center; border: 1px solid grey; padding: 8px;">Batch</td>
295+
<td rowspan="9" style="text-align: center; border: 1px solid grey; padding: 8px;">Distance based</td>
296+
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
297+
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>
298+
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Anderson-Darling test</td>
299+
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.2307/2288805">Scholz and Stephens (1987)</a></td>
300+
</tr>
301+
<tr>
296302
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
297303
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>
298304
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Bhattacharyya distance</td>

docs/source/api_reference/detectors/data_drift/batch.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ The {mod}`frouros.detectors.data_drift.batch` module contains batch data drift d
4747
:toctree: auto_generated/
4848
:template: class.md
4949
50+
AndersonDarlingTest
5051
ChiSquareTest
5152
CVMTest
5253
KSTest

frouros/detectors/data_drift/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Data drift detection methods init."""
22

33
from .batch import ( # noqa: F401
4+
AndersonDarlingTest,
45
BhattacharyyaDistance,
56
ChiSquareTest,
67
CVMTest,
@@ -19,6 +20,7 @@
1920
from .streaming import IncrementalKSTest, MMD as MMDStreaming # noqa: N811
2021

2122
__all__ = [
23+
"AndersonDarlingTest",
2224
"BhattacharyyaDistance",
2325
"ChiSquareTest",
2426
"CVMTest",

frouros/detectors/data_drift/batch/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
MMD,
1212
)
1313
from .statistical_test import (
14+
AndersonDarlingTest,
1415
ChiSquareTest,
1516
CVMTest,
1617
KSTest,
@@ -19,6 +20,7 @@
1920
)
2021

2122
__all__ = [
23+
"AndersonDarlingTest",
2224
"BhattacharyyaDistance",
2325
"ChiSquareTest",
2426
"CVMTest",

frouros/detectors/data_drift/batch/statistical_test/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""Data drift batch statistical test detection methods' init."""
22

3+
from .anderson_darling import AndersonDarlingTest
34
from .chisquare import ChiSquareTest
45
from .cvm import CVMTest
56
from .ks import KSTest
67
from .mann_whitney_u import MannWhitneyUTest
78
from .welch_t_test import WelchTTest
89

910
__all__ = [
11+
"AndersonDarlingTest",
1012
"ChiSquareTest",
1113
"CVMTest",
1214
"KSTest",
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""Anderson-Darling test module."""
2+
3+
from typing import Optional, List, Union
4+
5+
import numpy as np # type: ignore
6+
from scipy.stats import anderson_ksamp # type: ignore
7+
8+
from frouros.callbacks.batch.base import BaseCallbackBatch
9+
from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10+
from frouros.detectors.data_drift.batch.statistical_test.base import (
11+
BaseStatisticalTest,
12+
StatisticalResult,
13+
)
14+
15+
16+
class AndersonDarlingTest(BaseStatisticalTest):
17+
"""Anderson-Darling test [scholz1987k]_ detector.
18+
19+
:Note:
20+
p-values are bounded between 0.001 and 0.25 according to scipy documentation [1]_.
21+
22+
:References:
23+
24+
.. [scholz1987k] Scholz, Fritz W., and Michael A. Stephens.
25+
"K-sample Anderson–Darling tests."
26+
Journal of the American Statistical Association 82.399 (1987): 918-924.
27+
[1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html # noqa: E501 # pylint: disable=line-too-long
28+
"""
29+
30+
def __init__(
31+
self,
32+
callbacks: Optional[Union[BaseCallbackBatch, List[BaseCallbackBatch]]] = None,
33+
) -> None:
34+
"""Init method.
35+
36+
:param callbacks: callbacks
37+
:type callbacks: Optional[Union[BaseCallbackBatch, List[BaseCallbackBatch]]]
38+
"""
39+
super().__init__(
40+
data_type=NumericalData(),
41+
statistical_type=UnivariateData(),
42+
callbacks=callbacks,
43+
)
44+
45+
def _statistical_test(
46+
self, X_ref: np.ndarray, X: np.ndarray, **kwargs # noqa: N803
47+
) -> StatisticalResult:
48+
test = anderson_ksamp(
49+
samples=[
50+
X_ref,
51+
X,
52+
],
53+
**kwargs,
54+
)
55+
test = StatisticalResult(
56+
statistic=test.statistic,
57+
p_value=test.pvalue,
58+
)
59+
return test

frouros/tests/integration/test_data_drift.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
MMD,
1717
)
1818
from frouros.detectors.data_drift.batch import (
19+
AndersonDarlingTest,
1920
ChiSquareTest,
2021
CVMTest,
2122
KSTest,
@@ -160,6 +161,7 @@ def test_batch_distance_bins_based_univariate_same_distribution(
160161
@pytest.mark.parametrize(
161162
"detector, expected_statistic, expected_p_value",
162163
[
164+
(AndersonDarlingTest(), 23171.19994366, 0.001),
163165
(CVMTest(), 3776.09848103, 5.38105056e-07),
164166
(KSTest(), 0.99576271, 0.0),
165167
(MannWhitneyUTest(), 6912.0, 0.0),

0 commit comments

Comments
 (0)