From d8318e3ce1df62898b7d8d1406f31881cadafd0f Mon Sep 17 00:00:00 2001 From: Anton Date: Sun, 7 Dec 2025 20:28:03 +0300 Subject: [PATCH 1/7] add Log-Normal GoF criteria, tests --- pysatl_criterion/statistics/__init__.py | 82 ++++++++++ pysatl_criterion/statistics/log_normal.py | 176 ++++++++++++++++++++ tests/statistics/test_log_normal.py | 187 ++++++++++++++++++++++ 3 files changed, 445 insertions(+) create mode 100644 pysatl_criterion/statistics/log_normal.py create mode 100644 tests/statistics/test_log_normal.py diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 4aae312..1118ab4 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -33,6 +33,48 @@ WeExponentialityGofStatistic, WongWongExponentialityGofStatistic, ) +from pysatl_criterion.statistics.log_normal import ( + AbstractLogNormalGofStatistic, + AndersonDarlingLogNormalGofStatistic, + BonettSeierLogNormalGofStatistic, + BontempsMeddahi1LogNormalGofStatistic, + BontempsMeddahi2LogNormalGofStatistic, + CabanaCabana1LogNormalGofStatistic, + CabanaCabana2LogNormalGofStatistic, + ChenShapiroLogNormalGofStatistic, + CoinLogNormalGofStatistic, + CramerVonMiseLogNormalGofStatistic, + DagostinoLogNormalGofStatistic, + DAPLogNormalGofStatistic, + DesgagneLafayeLogNormalGofStatistic, + DoornikHansenLogNormalGofStatistic, + EppsPulleyLogNormalGofStatistic, + FilliLogNormalGofStatistic, + GlenLeemisBarrLogNormalGofStatistic, + GMGLogNormalGofStatistic, + Hosking1LogNormalGofStatistic, + Hosking2LogNormalGofStatistic, + Hosking3LogNormalGofStatistic, + Hosking4LogNormalGofStatistic, + JBLogNormalGofStatistic, + KolmogorovSmirnovLogNormalGofStatistic, + KurtosisLogNormalGofStatistic, + LillieforsLogNormalGofStatistic, + LooneyGulledgeLogNormalGofStatistic, + MartinezIglewiczLogNormalGofStatistic, + QuesenberryMillerLogNormalGofStatistic, + RobustJarqueBeraLogNormalGofStatistic, + RyanJoinerLogNormalGofStatistic, + SFLogNormalGofStatistic, + ShapiroWilkLogNormalGofStatistic, + SkewLogNormalGofStatistic, + SpiegelhalterLogNormalGofStatistic, + SWRGLogNormalGofStatistic, + ZhangQLogNormalGofStatistic, + ZhangQStarLogNormalGofStatistic, + ZhangWuALogNormalGofStatistic, + ZhangWuCLogNormalGofStatistic, +) from pysatl_criterion.statistics.models import AbstractStatistic from pysatl_criterion.statistics.normal import ( AbstractNormalityGofStatistic, @@ -196,4 +238,44 @@ "ZhangQStarNormalityGofStatistic", "ZhangWuANormalityGofStatistic", "ZhangWuCNormalityGofStatistic", + "AbstractLogNormalGofStatistic", + "AndersonDarlingLogNormalGofStatistic", + "BonettSeierLogNormalGofStatistic", + "BontempsMeddahi1LogNormalGofStatistic", + "BontempsMeddahi2LogNormalGofStatistic", + "CabanaCabana1LogNormalGofStatistic", + "CabanaCabana2LogNormalGofStatistic", + "ChenShapiroLogNormalGofStatistic", + "CoinLogNormalGofStatistic", + "CramerVonMiseLogNormalGofStatistic", + "DagostinoLogNormalGofStatistic", + "DAPLogNormalGofStatistic", + "DesgagneLafayeLogNormalGofStatistic", + "DoornikHansenLogNormalGofStatistic", + "EppsPulleyLogNormalGofStatistic", + "FilliLogNormalGofStatistic", + "GlenLeemisBarrLogNormalGofStatistic", + "GMGLogNormalGofStatistic", + "Hosking1LogNormalGofStatistic", + "Hosking2LogNormalGofStatistic", + "Hosking3LogNormalGofStatistic", + "Hosking4LogNormalGofStatistic", + "JBLogNormalGofStatistic", + "KolmogorovSmirnovLogNormalGofStatistic", + "KurtosisLogNormalGofStatistic", + "LillieforsLogNormalGofStatistic", + "LooneyGulledgeLogNormalGofStatistic", + "MartinezIglewiczLogNormalGofStatistic", + "QuesenberryMillerLogNormalGofStatistic", + "RobustJarqueBeraLogNormalGofStatistic", + "RyanJoinerLogNormalGofStatistic", + "SFLogNormalGofStatistic", + "ShapiroWilkLogNormalGofStatistic", + "SkewLogNormalGofStatistic", + "SpiegelhalterLogNormalGofStatistic", + "SWRGLogNormalGofStatistic", + "ZhangQLogNormalGofStatistic", + "ZhangQStarLogNormalGofStatistic", + "ZhangWuALogNormalGofStatistic", + "ZhangWuCLogNormalGofStatistic", ] diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py new file mode 100644 index 0000000..aeecbc1 --- /dev/null +++ b/pysatl_criterion/statistics/log_normal.py @@ -0,0 +1,176 @@ +import inspect +import sys +import numpy as np +import scipy.stats as scipy_stats +from numpy import histogram +from typing_extensions import override + +from pysatl_criterion.statistics import normal +from pysatl_criterion.statistics.common import ( + CrammerVonMisesStatistic, + KSStatistic, +) +from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic +from pysatl_criterion.statistics.normal import AbstractNormalityGofStatistic + + +class AbstractLogNormalGofStatistic(AbstractGoodnessOfFitStatistic): + """ + Base class for Log-Normal distribution Goodness-of-Fit statistics. + """ + + def __init__(self, s=1, scale=1): + """ + :param s: shape parameter (sigma) + :param scale: scale parameter (exp(mu)) + """ + if s <= 0: + raise ValueError("Shape parameter s must be positive") + if scale <= 0: + raise ValueError("Scale parameter must be positive") + + self.s = s + self.scale = scale + + @staticmethod + @override + def code(): + return f"LOGNORMAL_{AbstractGoodnessOfFitStatistic.code()}" + + +# ================================================================================================= +# EXPLICIT IMPLEMENTATIONS +# ================================================================================================= + + +class KolmogorovSmirnovLogNormalGofStatistic(AbstractLogNormalGofStatistic, KSStatistic): + @override + def __init__(self, alternative="two-sided", mode="auto", s=1, scale=1): + AbstractLogNormalGofStatistic.__init__(self, s=s, scale=scale) + KSStatistic.__init__(self, alternative, mode) + + @staticmethod + @override + def code(): + return f"KS_{AbstractLogNormalGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + rvs = np.sort(rvs) + cdf_vals = scipy_stats.lognorm.cdf(rvs, s=self.s, scale=self.scale) + return KSStatistic.execute_statistic(self, rvs, cdf_vals) + + +class CramerVonMiseLogNormalGofStatistic(AbstractLogNormalGofStatistic, CrammerVonMisesStatistic): + @staticmethod + @override + def code(): + return f"CVM_{AbstractLogNormalGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.lognorm.cdf(rvs_sorted, s=self.s, scale=self.scale) + return super().execute_statistic(rvs, cdf_vals) + + +class QuesenberryMillerLogNormalGofStatistic(AbstractLogNormalGofStatistic): + """ + Quesenberry and Miller's Q-test for Log-Normal distribution. + + References + ---------- + .. [1] Quesenberry, C. P., & Miller Jr, F. L. (1977). + Power studies of some tests for uniformity. + Journal of Statistical Computation and Simulation, 5, 169-191. + """ + + @staticmethod + @override + def code(): + return f"QUESENBERRY_MILLER_{AbstractLogNormalGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + rvs = np.asarray(rvs) + + # Transform data to [0, 1] using the CDF of the Log-Normal distribution + cdf_vals = scipy_stats.lognorm.cdf(rvs, s=self.s, scale=self.scale) + + x_sorted = np.sort(cdf_vals) + + # Add boundaries 0 and 1 because we are in the probability space [0, 1] + x_with_boundaries = np.concatenate([[0.0], x_sorted, [1.0]]) + + spacings = np.diff(x_with_boundaries) + + sum_squares = np.sum(spacings**2) + + sum_consecutive_products = np.sum(spacings[:-1] * spacings[1:]) + + q = float(sum_squares) + float(sum_consecutive_products) + + return q + + +# ================================================================================================= +# DYNAMIC GENERATION +# ================================================================================================= + + +def _create_lognormal_class(normal_cls): + new_class_name = normal_cls.__name__.replace("Normality", "LogNormal") + class_code = normal_cls.code().replace("NORMALITY", "LOGNORMAL") + + class LogNormalClass(AbstractLogNormalGofStatistic): + @override + def execute_statistic(self, rvs, **kwargs): + rvs_arr = np.array(rvs) + + if np.any(rvs_arr <= 0): + return float("inf") + + log_rvs = np.log(rvs_arr) + standardized_log_rvs = (log_rvs - np.log(self.scale)) / self.s + + return normal_cls().execute_statistic(standardized_log_rvs, **kwargs) + + @staticmethod + @override + def code(): + return class_code + + LogNormalClass.__name__ = new_class_name + LogNormalClass.__qualname__ = new_class_name + return LogNormalClass + + +# List of Normal statistics that we have explicitly implemented for LogNormal above. +# We should NOT generate dynamic wrappers for these. +EXPLICITLY_IMPLEMENTED_NORMAL_STATS = [ + "KolmogorovSmirnovNormalityGofStatistic", + "CramerVonMiseNormalityGofStatistic", + "QuesenberryMillerNormalityGofStatistic", + "BHSNormalityGofStatistic", +] + +current_module = sys.modules[__name__] +__all__ = [ + "AbstractLogNormalGofStatistic", + "KolmogorovSmirnovLogNormalGofStatistic", + "CramerVonMiseNormalityGofStatistic", + "QuesenberryMillerLogNormalGofStatistic", +] + +for name, obj in inspect.getmembers(normal): + if ( + inspect.isclass(obj) + and issubclass(obj, AbstractNormalityGofStatistic) + and obj is not AbstractNormalityGofStatistic + and name not in EXPLICITLY_IMPLEMENTED_NORMAL_STATS + and not name.startswith("Abstract") + and not name.startswith("Graph") # maybe shoud fix + ): + ln_class = _create_lognormal_class(obj) + setattr(current_module, ln_class.__name__, ln_class) + __all__.append(ln_class.__name__) diff --git a/tests/statistics/test_log_normal.py b/tests/statistics/test_log_normal.py new file mode 100644 index 0000000..acbf1c9 --- /dev/null +++ b/tests/statistics/test_log_normal.py @@ -0,0 +1,187 @@ +import inspect +import numpy as np +import pytest +import scipy.stats as scipy_stats + +from pysatl_criterion.statistics.log_normal import ( + AbstractLogNormalGofStatistic, + KolmogorovSmirnovLogNormalGofStatistic, + CramerVonMiseLogNormalGofStatistic, + QuesenberryMillerLogNormalGofStatistic, +) + + +@pytest.mark.parametrize( + ("data", "result"), + [ + ( + [ + 0.279, + 1.455, + 0.323, + 2.981, + 0.732, + 0.654, + 1.121, + 0.485, + 2.341, + 0.892, + ], + 0.1545, + ), + ], +) +def test_ks_lognormal_criterion(data, result): + statistic = KolmogorovSmirnovLogNormalGofStatistic(s=1, scale=1).execute_statistic(data) + assert result == pytest.approx(statistic, 0.001) + + +def test_ks_lognormal_criterion_code(): + assert "KS_LOGNORMAL_GOODNESS_OF_FIT" == KolmogorovSmirnovLogNormalGofStatistic().code() + + +def test_ks_lognormal_negative_data(): + np.random.seed(42) + s, scale = 0.8, 1.5 + data = scipy_stats.lognorm.rvs(s=s, scale=scale, size=50) + data = np.append(data, -1) + + our_stat = KolmogorovSmirnovLogNormalGofStatistic(s=s, scale=scale).execute_statistic(data) + + scipy_stat, _ = scipy_stats.kstest(data, "lognorm", args=(s, 0, scale)) + + assert our_stat == pytest.approx(scipy_stat, 1e-10) + + +@pytest.mark.parametrize( + ("data", "result"), + [ + ( + [ + 0.279, + 1.455, + 0.323, + 2.981, + 0.732, + 0.654, + 1.121, + 0.485, + 2.341, + 0.892, + ], + 0.05776, + ), + ], +) +def test_cvm_lognormal_criterion(data, result): + statistic = CramerVonMiseLogNormalGofStatistic(s=1, scale=1).execute_statistic(data) + assert result == pytest.approx(statistic, 0.001) + + +def test_cvm_lognormal_criterion_code(): + assert "CVM_LOGNORMAL_GOODNESS_OF_FIT" == CramerVonMiseLogNormalGofStatistic().code() + + +@pytest.mark.parametrize( + ("data", "result"), + [ + # 3 points mapping to CDF 0.25, 0.5, 0.75 for LogNorm(s=1, scale=1) + ( + [0.50941628, 1.0, 1.96303108], + 0.4375, + ), + # 4 points mapping to CDF 0.2, 0.4, 0.6, 0.8 + ( + [0.431011, 0.776198, 1.288330, 2.320149], + 0.36, + ), + ], +) +def test_quesenberry_miller_lognormal_criterion(data, result): + statistic = QuesenberryMillerLogNormalGofStatistic(s=1, scale=1).execute_statistic(data) + assert result == pytest.approx(statistic, 0.0001) + + +def test_quesenberry_miller_lognormal_criterion_code(): + assert ( + "QUESENBERRY_MILLER_LOGNORMAL_GOODNESS_OF_FIT" + == QuesenberryMillerLogNormalGofStatistic().code() + ) + + +def get_dynamic_lognormal_classes(): + from pysatl_criterion.statistics import log_normal as log_normal_module + from pysatl_criterion.statistics import normal as normal_module + + excluded_classes = { + "KolmogorovSmirnovLogNormalGofStatistic", + "CramerVonMiseLogNormalGofStatistic", + "QuesenberryMillerLogNormalGofStatistic", + "AbstractLogNormalGofStatistic", + } + + dynamic_pairs = [] + + for name, obj in inspect.getmembers(log_normal_module, inspect.isclass): + if not name.endswith("LogNormalGofStatistic"): + continue + + if name in excluded_classes: + continue + + normal_name = name.replace("LogNormal", "Normality") + + try: + normal_cls = getattr(normal_module, normal_name) + except AttributeError: + continue + + if hasattr(obj, "execute_statistic") and hasattr(normal_cls, "execute_statistic"): + dynamic_pairs.append((obj, normal_cls)) + + return dynamic_pairs + + +DYNAMIC_CLASSES = get_dynamic_lognormal_classes() + + +@pytest.mark.parametrize("log_normal_cls, normal_cls", DYNAMIC_CLASSES) +def test_dynamic_lognormal_equivalence(log_normal_cls, normal_cls): + np.random.seed(42) + s = 0.8 + scale = 2.0 + z_data = np.random.normal(loc=0, scale=1, size=50) + + x_data = np.exp(z_data * s + np.log(scale)) + + ln_stat_obj = log_normal_cls(s=s, scale=scale) + ln_val = ln_stat_obj.execute_statistic(x_data) + + n_stat_obj = normal_cls() + n_val = n_stat_obj.execute_statistic(z_data) + + print(log_normal_cls.code()) + assert ln_val == pytest.approx(n_val, rel=1e-5) + + +@pytest.mark.parametrize("log_normal_cls, normal_cls", DYNAMIC_CLASSES) +def test_dynamic_lognormal_negative_data_handling(log_normal_cls, normal_cls): + data = [1.0, 2.0, -0.5, 3.0] + stat = log_normal_cls(s=1, scale=1).execute_statistic(data) + assert stat == float("inf") + + data_zero = [1.0, 2.0, 0.0, 3.0] + stat_z = log_normal_cls(s=1, scale=1).execute_statistic(data_zero) + print(log_normal_cls.code()) + assert stat_z == float("inf") + + +def test_dynamic_lognormal_example_code_method(): + from pysatl_criterion.statistics.log_normal import ( + ShapiroWilkLogNormalGofStatistic, + LillieforsLogNormalGofStatistic, + ) + + print("!") + assert "SW_LOGNORMAL_GOODNESS_OF_FIT" == ShapiroWilkLogNormalGofStatistic().code() + assert "LILLIE_LOGNORMAL_GOODNESS_OF_FIT" == LillieforsLogNormalGofStatistic().code() From 7aaaf804876fa2b61f313b7a3567ddb66f61a4a1 Mon Sep 17 00:00:00 2001 From: Anton Date: Sun, 7 Dec 2025 21:00:22 +0300 Subject: [PATCH 2/7] fix debug --- pysatl_criterion/statistics/log_normal.py | 2 +- tests/statistics/test_log_normal.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py index aeecbc1..5bf357b 100644 --- a/pysatl_criterion/statistics/log_normal.py +++ b/pysatl_criterion/statistics/log_normal.py @@ -169,7 +169,7 @@ def code(): and obj is not AbstractNormalityGofStatistic and name not in EXPLICITLY_IMPLEMENTED_NORMAL_STATS and not name.startswith("Abstract") - and not name.startswith("Graph") # maybe shoud fix + and not name.startswith("Graph") ): ln_class = _create_lognormal_class(obj) setattr(current_module, ln_class.__name__, ln_class) diff --git a/tests/statistics/test_log_normal.py b/tests/statistics/test_log_normal.py index acbf1c9..27484af 100644 --- a/tests/statistics/test_log_normal.py +++ b/tests/statistics/test_log_normal.py @@ -160,7 +160,6 @@ def test_dynamic_lognormal_equivalence(log_normal_cls, normal_cls): n_stat_obj = normal_cls() n_val = n_stat_obj.execute_statistic(z_data) - print(log_normal_cls.code()) assert ln_val == pytest.approx(n_val, rel=1e-5) @@ -172,7 +171,6 @@ def test_dynamic_lognormal_negative_data_handling(log_normal_cls, normal_cls): data_zero = [1.0, 2.0, 0.0, 3.0] stat_z = log_normal_cls(s=1, scale=1).execute_statistic(data_zero) - print(log_normal_cls.code()) assert stat_z == float("inf") @@ -182,6 +180,5 @@ def test_dynamic_lognormal_example_code_method(): LillieforsLogNormalGofStatistic, ) - print("!") assert "SW_LOGNORMAL_GOODNESS_OF_FIT" == ShapiroWilkLogNormalGofStatistic().code() assert "LILLIE_LOGNORMAL_GOODNESS_OF_FIT" == LillieforsLogNormalGofStatistic().code() From 5dc80a52858df5584e520a3cf617f474b97d45f2 Mon Sep 17 00:00:00 2001 From: Anton Date: Sun, 7 Dec 2025 21:14:32 +0300 Subject: [PATCH 3/7] fix ruff comments --- pysatl_criterion/statistics/log_normal.py | 4 ++-- tests/statistics/test_log_normal.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py index 5bf357b..b8eb057 100644 --- a/pysatl_criterion/statistics/log_normal.py +++ b/pysatl_criterion/statistics/log_normal.py @@ -1,8 +1,8 @@ import inspect import sys + import numpy as np import scipy.stats as scipy_stats -from numpy import histogram from typing_extensions import override from pysatl_criterion.statistics import normal @@ -158,7 +158,7 @@ def code(): __all__ = [ "AbstractLogNormalGofStatistic", "KolmogorovSmirnovLogNormalGofStatistic", - "CramerVonMiseNormalityGofStatistic", + "CramerVonMiseLogNormalityGofStatistic", "QuesenberryMillerLogNormalGofStatistic", ] diff --git a/tests/statistics/test_log_normal.py b/tests/statistics/test_log_normal.py index 27484af..84b3119 100644 --- a/tests/statistics/test_log_normal.py +++ b/tests/statistics/test_log_normal.py @@ -1,12 +1,12 @@ import inspect + import numpy as np import pytest import scipy.stats as scipy_stats from pysatl_criterion.statistics.log_normal import ( - AbstractLogNormalGofStatistic, - KolmogorovSmirnovLogNormalGofStatistic, CramerVonMiseLogNormalGofStatistic, + KolmogorovSmirnovLogNormalGofStatistic, QuesenberryMillerLogNormalGofStatistic, ) @@ -176,8 +176,8 @@ def test_dynamic_lognormal_negative_data_handling(log_normal_cls, normal_cls): def test_dynamic_lognormal_example_code_method(): from pysatl_criterion.statistics.log_normal import ( - ShapiroWilkLogNormalGofStatistic, LillieforsLogNormalGofStatistic, + ShapiroWilkLogNormalGofStatistic, ) assert "SW_LOGNORMAL_GOODNESS_OF_FIT" == ShapiroWilkLogNormalGofStatistic().code() From f089b90a790902e79dbcfa0df4cbd310382f005c Mon Sep 17 00:00:00 2001 From: Anton Date: Tue, 9 Dec 2025 02:51:44 +0300 Subject: [PATCH 4/7] ignore mypy error --- pysatl_criterion/statistics/__init__.py | 2 +- pysatl_criterion/statistics/log_normal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 1118ab4..74252f6 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -33,7 +33,7 @@ WeExponentialityGofStatistic, WongWongExponentialityGofStatistic, ) -from pysatl_criterion.statistics.log_normal import ( +from pysatl_criterion.statistics.log_normal import ( # type: ignore[attr-defined] AbstractLogNormalGofStatistic, AndersonDarlingLogNormalGofStatistic, BonettSeierLogNormalGofStatistic, diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py index b8eb057..235dd10 100644 --- a/pysatl_criterion/statistics/log_normal.py +++ b/pysatl_criterion/statistics/log_normal.py @@ -158,7 +158,7 @@ def code(): __all__ = [ "AbstractLogNormalGofStatistic", "KolmogorovSmirnovLogNormalGofStatistic", - "CramerVonMiseLogNormalityGofStatistic", + "CramerVonMiseLogNormalGofStatistic", "QuesenberryMillerLogNormalGofStatistic", ] From e3d38163d153f5a87d4593d04dfd2f7fdd9dcbfa Mon Sep 17 00:00:00 2001 From: Anton Date: Tue, 9 Dec 2025 02:53:36 +0300 Subject: [PATCH 5/7] fix code style --- pysatl_criterion/statistics/log_normal.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py index 235dd10..ccd9925 100644 --- a/pysatl_criterion/statistics/log_normal.py +++ b/pysatl_criterion/statistics/log_normal.py @@ -6,10 +6,7 @@ from typing_extensions import override from pysatl_criterion.statistics import normal -from pysatl_criterion.statistics.common import ( - CrammerVonMisesStatistic, - KSStatistic, -) +from pysatl_criterion.statistics.common import CrammerVonMisesStatistic, KSStatistic from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic from pysatl_criterion.statistics.normal import AbstractNormalityGofStatistic From fa988845d0745076cc270b5c1b833453548d122e Mon Sep 17 00:00:00 2001 From: Anton Date: Mon, 22 Dec 2025 14:05:04 +0300 Subject: [PATCH 6/7] add KLSupremum and KLIntegral criteria --- pysatl_criterion/statistics/__init__.py | 4 + pysatl_criterion/statistics/log_normal.py | 227 ++++++++++++++++++++ tests/statistics/test_log_normal.py | 242 ++++++++++++++++++++++ 3 files changed, 473 insertions(+) diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 74252f6..7fa8781 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -57,6 +57,8 @@ Hosking3LogNormalGofStatistic, Hosking4LogNormalGofStatistic, JBLogNormalGofStatistic, + KLIntegralLogNormalGoFStatistic, + KLSupremumLogNormalGoFStatistic, KolmogorovSmirnovLogNormalGofStatistic, KurtosisLogNormalGofStatistic, LillieforsLogNormalGofStatistic, @@ -261,6 +263,8 @@ "Hosking3LogNormalGofStatistic", "Hosking4LogNormalGofStatistic", "JBLogNormalGofStatistic", + "KLSupremumLogNormalGoFStatistic", + "KLIntegralLogNormalGoFStatistic", "KolmogorovSmirnovLogNormalGofStatistic", "KurtosisLogNormalGofStatistic", "LillieforsLogNormalGofStatistic", diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py index ccd9925..7229bd5 100644 --- a/pysatl_criterion/statistics/log_normal.py +++ b/pysatl_criterion/statistics/log_normal.py @@ -1,8 +1,10 @@ import inspect import sys +import warnings import numpy as np import scipy.stats as scipy_stats +from scipy import integrate from typing_extensions import override from pysatl_criterion.statistics import normal @@ -41,6 +43,15 @@ def code(): class KolmogorovSmirnovLogNormalGofStatistic(AbstractLogNormalGofStatistic, KSStatistic): + """ + Kolmogorov-Smirnov test statistic for Log-Normal distribution. + + References + ---------- + .. [1] Massey, F. J. (1951). The Kolmogorov-Smirnov test for goodness of fit. + Journal of the American Statistical Association, 46(253), 68-78. + """ + @override def __init__(self, alternative="two-sided", mode="auto", s=1, scale=1): AbstractLogNormalGofStatistic.__init__(self, s=s, scale=scale) @@ -59,6 +70,17 @@ def execute_statistic(self, rvs, **kwargs): class CramerVonMiseLogNormalGofStatistic(AbstractLogNormalGofStatistic, CrammerVonMisesStatistic): + """ + Cramér-von Mises test statistic for Log-Normal distribution. + + References + ---------- + .. [1] Cramér, H. (1928). On the composition of elementary errors. + Scandinavian Actuarial Journal, 1928(1), 13-74. + .. [2] von Mises, R. (1928). Wahrscheinlichkeit, Statistik und Wahrheit. + Julius Springer. + """ + @staticmethod @override def code(): @@ -110,6 +132,207 @@ def execute_statistic(self, rvs, **kwargs): return q +class KLSupremumLogNormalGoFStatistic(AbstractGoodnessOfFitStatistic): + """ + Supremum test for lognormality based on Kullback-Leibler divergences. + + References: + ---------- + .. [1] Batsidis, A., Tzavelas, G., & Economou, P. (2015). + Testing lognormality using a family of Kullback-Leibler type divergences. + Journal of Statistical Computation and Simulation, 85(11), 215-233. + """ + + @staticmethod + @override + def code(): + return "KL_SUP_LOGNORMAL_GOODNESS_OF_FIT" + + @override + def execute_statistic(self, rvs, **kwargs): + rvs = np.array(rvs) + + if np.any(rvs <= 0): + return float("inf") + + n = len(rvs) + + # Estimate parameters + log_rvs = np.log(rvs) + sigma_hat = np.std(log_rvs, ddof=0) # MLE + + # Transform data + Y = rvs ** (1 / sigma_hat) + logY = np.log(Y) + + mean_logY = np.mean(logY) + mean_logY2 = np.mean(logY**2) + mean_logY3 = np.mean(logY**3) + + # D_{n,r} function + def D_n_r(r): + if abs(r) < 1e-8: + C = -mean_logY3 + 3 * mean_logY2 * mean_logY - 2 * (mean_logY**3) + return (np.sqrt(6) / 6) * C * (r**3) + + Yr = Y**r + mean_Yr = np.mean(Yr) + mean_Yr_logY = np.mean(Yr * logY) + + return 2 * np.log(mean_Yr) - r * mean_logY - r * mean_Yr_logY / mean_Yr + + # sigma_r + def sigma_r(r): + if abs(r) < 1e-8: + return (abs(r) ** 3) / np.sqrt(6) + + expr = np.exp(r**2) * (r**4 - 3 * (r**2) + 4) - (4 + r**2) + + if expr < 1e-16: + return 1e-8 + return np.sqrt(expr) + + # Calculate supremum over r in [-5, 5] + r_grid = np.linspace(-5, 5, 1000) + values = [] + for r in r_grid: + D = D_n_r(r) + sigma = sigma_r(r) + if sigma > 1e-10 and not np.isnan(D): # Avoid division by zero + values.append(abs(D / sigma)) + + if not values: + return np.inf + + T1 = np.sqrt(n) * np.max(values) + return T1 + + def calculate_critical_value(self, n, alpha=0.05): + if alpha == 0.05: + # Formula (27) + return 2.67076 - 3.90704 / np.sqrt(n) + 1.07162 / n + elif alpha == 0.01: + # Formula (29) + return 4.10183 - 7.48001 / np.sqrt(n) + 0.638394 * np.log(n) / np.sqrt(n) + else: + # Formula (31) for general alpha in [0.001, 0.1] + if alpha < 0.001 or alpha > 0.1: + raise ValueError("alpha must be between 0.001 and 0.1") + + return ( + 0.48632 + - 0.00148063 / alpha + - 0.0000699 * n + + 0.217752 / np.sqrt(alpha) + - 2.39217 * np.sqrt(n / alpha) + - 0.532872 / (np.sqrt(alpha) * n) + + 0.179786 * np.log(n / alpha) + - 0.13083 * np.log(alpha * n) + ) + + +class KLIntegralLogNormalGoFStatistic: + """ + Integral test for lognormality based on Kullback-Leibler divergences. + + References: + ---------- + .. [1] Batsidis, A., Tzavelas, G., & Economou, P. (2015). + Testing lognormality using a family of Kullback-Leibler type divergences. + Journal of Statistical Computation and Simulation, 85(11), 215-233. + """ + + @staticmethod + def code(): + return "KL_INT_LOGNORMAL_GOODNESS_OF_FIT" + + def execute_statistic(self, rvs, **kwargs): + rvs = np.array(rvs) + + if np.any(rvs <= 0): + return float("inf") + + n = len(rvs) + + log_rvs = np.log(rvs) + sigma_hat = np.std(log_rvs, ddof=0) + + Y = rvs ** (1 / sigma_hat) + logY = np.log(Y) + + mean_logY = np.mean(logY) + mean_logY2 = np.mean(logY**2) + mean_logY3 = np.mean(logY**3) + + # Define D_n,r + def D_n_r(r): + if abs(r) < 1e-8: + C = -mean_logY3 + 3 * mean_logY2 * mean_logY - 2 * (mean_logY**3) + return (np.sqrt(6) / 6) * C * (r**3) + + Yr = Y**r + mean_Yr = np.mean(Yr) + mean_Yr_logY = np.mean(Yr * logY) + + return 2 * np.log(mean_Yr) - r * mean_logY - r * mean_Yr_logY / mean_Yr + + # sigma_r + def sigma_r(r): + if abs(r) < 1e-8: + return (abs(r) ** 3) / np.sqrt(6) + + expr = np.exp(r**2) * (r**4 - 3 * (r**2) + 4) - (4 + r**2) + + if expr < 1e-16: + return 1e-8 + return np.sqrt(expr) + + # Integrand function + def integrand(r): + D = D_n_r(r) + sigma = sigma_r(r) + + if abs(r) < 1e-8: + C = -mean_logY3 + 3 * mean_logY2 * mean_logY - 2 * (mean_logY**3) + return (C / 6) ** 2 + + ratio = D / sigma + return ratio * ratio + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + + result, error = integrate.quad(integrand, -5, 5, limit=100) + + T2 = n * result + + return T2 if not np.isnan(T2) else float("inf") + + def calculate_critical_value(self, n, alpha=0.05): + if alpha == 0.05: + # Formula (28) + return -2.88971 - 0.130082 * (np.log(n) ** 2) + 2.71403 * np.log(n) + elif alpha == 0.01: + # Formula (30) + return 24.9099 - 31.9302 / np.sqrt(n) - 13.2562 * np.log(n) / np.sqrt(n) + else: + # Formula (32) for general alpha in [0.001, 0.1] + if alpha < 0.001 or alpha > 0.1: + raise ValueError("alpha must be between 0.001 and 0.1") + + return ( + 0.880415 + + 0.00680135 / alpha + - 35.179 * alpha + - 0.18045 / (alpha * n) + - 0.107479 * n + + 1.67972 * np.sqrt(alpha * n) + + 0.0119889 * n * np.log(n) + + 0.372023 * np.sqrt(n / alpha) + - 0.0213414 * np.sqrt(n / alpha) * np.log(n / alpha) + ) + + # ================================================================================================= # DYNAMIC GENERATION # ================================================================================================= @@ -148,6 +371,8 @@ def code(): "KolmogorovSmirnovNormalityGofStatistic", "CramerVonMiseNormalityGofStatistic", "QuesenberryMillerNormalityGofStatistic", + "KLSupremumLogNormalGoFStatistic", + "KLIntegralLogNormalGoFStatistic", "BHSNormalityGofStatistic", ] @@ -157,6 +382,8 @@ def code(): "KolmogorovSmirnovLogNormalGofStatistic", "CramerVonMiseLogNormalGofStatistic", "QuesenberryMillerLogNormalGofStatistic", + "KLSupremumLogNormalGoFStatistic", + "KLIntegralLogNormalGoFStatistic", ] for name, obj in inspect.getmembers(normal): diff --git a/tests/statistics/test_log_normal.py b/tests/statistics/test_log_normal.py index 84b3119..28e3ffa 100644 --- a/tests/statistics/test_log_normal.py +++ b/tests/statistics/test_log_normal.py @@ -6,6 +6,8 @@ from pysatl_criterion.statistics.log_normal import ( CramerVonMiseLogNormalGofStatistic, + KLIntegralLogNormalGoFStatistic, + KLSupremumLogNormalGoFStatistic, KolmogorovSmirnovLogNormalGofStatistic, QuesenberryMillerLogNormalGofStatistic, ) @@ -109,6 +111,246 @@ def test_quesenberry_miller_lognormal_criterion_code(): ) +@pytest.mark.parametrize( + ("data", "result"), + [ + ( + [ + 0.746, + 0.357, + 0.376, + 0.327, + 0.485, + 1.741, + 0.241, + 0.777, + 0.768, + 0.409, + 0.252, + 0.512, + 0.534, + 1.656, + 0.742, + 0.378, + 0.714, + 1.121, + 0.597, + 0.231, + 0.541, + 0.805, + 0.682, + 0.418, + 0.506, + 0.501, + 0.247, + 0.922, + 0.880, + 0.344, + 0.519, + 1.302, + 0.275, + 0.601, + 0.388, + 0.450, + 0.845, + 0.319, + 0.486, + 0.529, + 1.547, + 0.690, + 0.676, + 0.314, + 0.736, + 0.643, + 0.483, + 0.352, + 0.636, + 1.080, + ], + 0.9521, # значение из статьи + ), + ], +) +def test_kl_supremum_lognormal_criterion(data, result): + statistic = KLSupremumLogNormalGoFStatistic().execute_statistic(data) + + assert result == pytest.approx(statistic, 0.0001) + + +def test_kl_supremum_lognormal_criterion_code(): + assert "KL_SUP_LOGNORMAL_GOODNESS_OF_FIT" == KLSupremumLogNormalGoFStatistic().code() + + +@pytest.mark.parametrize( + ("data", "result"), + [ + ( + [ + 0.746, + 0.357, + 0.376, + 0.327, + 0.485, + 1.741, + 0.241, + 0.777, + 0.768, + 0.409, + 0.252, + 0.512, + 0.534, + 1.656, + 0.742, + 0.378, + 0.714, + 1.121, + 0.597, + 0.231, + 0.541, + 0.805, + 0.682, + 0.418, + 0.506, + 0.501, + 0.247, + 0.922, + 0.880, + 0.344, + 0.519, + 1.302, + 0.275, + 0.601, + 0.388, + 0.450, + 0.845, + 0.319, + 0.486, + 0.529, + 1.547, + 0.690, + 0.676, + 0.314, + 0.736, + 0.643, + 0.483, + 0.352, + 0.636, + 1.080, + ], + 1.28216, # значение из статьи + ), + ], +) +def test_kl_integral_lognormal_criterion(data, result): + statistic = KLIntegralLogNormalGoFStatistic().execute_statistic(data) + + assert result == pytest.approx(statistic, 0.01) + + +def test_kl_supremum_critical_value_n50(): + statistic = KLSupremumLogNormalGoFStatistic() + n = 50 + alpha = 0.05 + expected = 2.1397 # Из статьи: c1,0.05(50) = 2.1397 + + calculated = statistic.calculate_critical_value(n, alpha) + + assert abs(calculated - expected) < 1e-4 + + +def test_kl_integral_critical_value_n50(): + statistic = KLIntegralLogNormalGoFStatistic() + n = 50 + alpha = 0.05 + expected = 5.7369 # Из статьи: c2,0.05(50) = 5.7369 + + calculated = statistic.calculate_critical_value(n, alpha) + + assert abs(calculated - expected) < 1e-4 + + +def test_kl_supremum_critical_value_n90(): + statistic = KLSupremumLogNormalGoFStatistic() + n = 90 + alpha = 0.05 + expected = 2.2708 # Из статьи: c1,0.05(90) = 2.2708 + + calculated = statistic.calculate_critical_value(n, alpha) + + assert abs(calculated - expected) < 1e-4 + + +def test_kl_integral_critical_value_n90(): + statistic = KLIntegralLogNormalGoFStatistic() + n = 90 + alpha = 0.05 + expected = 6.6890 # Из статьи: c2,0.05(90) = 6.6890 + + calculated = statistic.calculate_critical_value(n, alpha) + + assert abs(calculated - expected) < 1e-4 + + +@pytest.mark.parametrize("n", [20, 40, 60, 80, 100]) +def test_kl_sup_critical_values(n): + stat = KLSupremumLogNormalGoFStatistic() + + # Test alpha = 0.05 + cv_05 = stat.calculate_critical_value(n, 0.05) + assert cv_05 > 0 + + # Test alpha = 0.01 + cv_01 = stat.calculate_critical_value(n, 0.01) + assert cv_01 > cv_05 + + +@pytest.mark.parametrize("n", [20, 40, 60, 80, 100]) +def test_kl_int_critical_values(n): + stat = KLIntegralLogNormalGoFStatistic() + + # Test alpha = 0.05 + cv_05 = stat.calculate_critical_value(n, 0.05) + assert cv_05 > 0 + + # Test alpha = 0.01 + cv_01 = stat.calculate_critical_value(n, 0.01) + assert cv_01 > cv_05 + + +@pytest.mark.parametrize( + ("data", "result"), + [ + ([-1.0, 0.5, 2.0, 3.0, -0.1], float("inf")), + ([0.0, 1.0, 2.0, 3.0, 4.0], float("inf")), + ], +) +def test_kl_supremum_non_positive_data(data, result): + statistic = KLSupremumLogNormalGoFStatistic().execute_statistic(data) + assert statistic == result + + +@pytest.mark.parametrize( + ("data", "result"), + [ + ([-1.0, 0.5, 2.0, 3.0, -0.1], float("inf")), + ([0.0, 1.0, 2.0, 3.0, 4.0], float("inf")), + ], +) +def test_kl_integral_non_positive_data(data, result): + statistic = KLIntegralLogNormalGoFStatistic().execute_statistic(data) + assert statistic == result + + +def test_kl_sup_lognormal_criterion_code(): + stat = KLSupremumLogNormalGoFStatistic() + assert stat.code() == "KL_SUP_LOGNORMAL_GOODNESS_OF_FIT" + + +def test_kl_int_lognormal_criterion_code(): + stat = KLIntegralLogNormalGoFStatistic() + assert stat.code() == "KL_INT_LOGNORMAL_GOODNESS_OF_FIT" + + def get_dynamic_lognormal_classes(): from pysatl_criterion.statistics import log_normal as log_normal_module from pysatl_criterion.statistics import normal as normal_module From 8a0070645a22f57051b1d9fbfea86f3ee5f44f16 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Mon, 22 Dec 2025 17:40:56 +0300 Subject: [PATCH 7/7] Add short_codes --- pysatl_criterion/statistics/__init__.py | 44 +++++++++---------- pysatl_criterion/statistics/log_normal.py | 53 ++++++++++++++++++++--- 2 files changed, 68 insertions(+), 29 deletions(-) diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 9ffe09e..375f6af 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -48,6 +48,28 @@ WeExponentialityGofStatistic, WongWongExponentialityGofStatistic, ) +from pysatl_criterion.statistics.gamma import ( + AbstractGammaGofStatistic, + AndersonDarlingGammaGofStatistic, + Chi2PearsonGammaGofStatistic, + CramerVonMisesGammaGofStatistic, + CressieReadGammaGofStatistic, + GraphAverageDegreeGammaGofStatistic, + GraphCliqueNumberGammaGofStatistic, + GraphConnectedComponentsGammaGofStatistic, + GraphEdgesNumberGammaGofStatistic, + GraphIndependenceNumberGammaGofStatistic, + GraphMaxDegreeGammaGofStatistic, + GreenwoodGammaGofStatistic, + KolmogorovSmirnovGammaGofStatistic, + KuiperGammaGofStatistic, + LikelihoodRatioGammaGofStatistic, + LillieforsGammaGofStatistic, + MinToshiyukiGammaGofStatistic, + MoranGammaGofStatistic, + ProbabilityPlotCorrelationGammaGofStatistic, + WatsonGammaGofStatistic, +) from pysatl_criterion.statistics.log_normal import ( # type: ignore[attr-defined] AbstractLogNormalGofStatistic, AndersonDarlingLogNormalGofStatistic, @@ -92,28 +114,6 @@ ZhangWuALogNormalGofStatistic, ZhangWuCLogNormalGofStatistic, ) -from pysatl_criterion.statistics.gamma import ( - AbstractGammaGofStatistic, - AndersonDarlingGammaGofStatistic, - Chi2PearsonGammaGofStatistic, - CramerVonMisesGammaGofStatistic, - CressieReadGammaGofStatistic, - GraphAverageDegreeGammaGofStatistic, - GraphCliqueNumberGammaGofStatistic, - GraphConnectedComponentsGammaGofStatistic, - GraphEdgesNumberGammaGofStatistic, - GraphIndependenceNumberGammaGofStatistic, - GraphMaxDegreeGammaGofStatistic, - GreenwoodGammaGofStatistic, - KolmogorovSmirnovGammaGofStatistic, - KuiperGammaGofStatistic, - LikelihoodRatioGammaGofStatistic, - LillieforsGammaGofStatistic, - MinToshiyukiGammaGofStatistic, - MoranGammaGofStatistic, - ProbabilityPlotCorrelationGammaGofStatistic, - WatsonGammaGofStatistic, -) from pysatl_criterion.statistics.models import AbstractStatistic from pysatl_criterion.statistics.normal import ( AbstractNormalityGofStatistic, diff --git a/pysatl_criterion/statistics/log_normal.py b/pysatl_criterion/statistics/log_normal.py index 7229bd5..7832bf8 100644 --- a/pysatl_criterion/statistics/log_normal.py +++ b/pysatl_criterion/statistics/log_normal.py @@ -1,6 +1,7 @@ import inspect import sys import warnings +from abc import ABC import numpy as np import scipy.stats as scipy_stats @@ -13,7 +14,7 @@ from pysatl_criterion.statistics.normal import AbstractNormalityGofStatistic -class AbstractLogNormalGofStatistic(AbstractGoodnessOfFitStatistic): +class AbstractLogNormalGofStatistic(AbstractGoodnessOfFitStatistic, ABC): """ Base class for Log-Normal distribution Goodness-of-Fit statistics. """ @@ -57,10 +58,16 @@ def __init__(self, alternative="two-sided", mode="auto", s=1, scale=1): AbstractLogNormalGofStatistic.__init__(self, s=s, scale=scale) KSStatistic.__init__(self, alternative, mode) + @staticmethod + @override + def short_code(): + return "KS" + @staticmethod @override def code(): - return f"KS_{AbstractLogNormalGofStatistic.code()}" + short_code = KolmogorovSmirnovLogNormalGofStatistic.short_code() + return f"{short_code}_{AbstractLogNormalGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -81,10 +88,16 @@ class CramerVonMiseLogNormalGofStatistic(AbstractLogNormalGofStatistic, CrammerV Julius Springer. """ + @staticmethod + @override + def short_code(): + return "CVM" + @staticmethod @override def code(): - return f"CVM_{AbstractLogNormalGofStatistic.code()}" + short_code = CramerVonMiseLogNormalGofStatistic.short_code() + return f"{short_code}_{AbstractLogNormalGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -104,10 +117,16 @@ class QuesenberryMillerLogNormalGofStatistic(AbstractLogNormalGofStatistic): Journal of Statistical Computation and Simulation, 5, 169-191. """ + @staticmethod + @override + def short_code(): + return "QUESENBERRY_MILLER" + @staticmethod @override def code(): - return f"QUESENBERRY_MILLER_{AbstractLogNormalGofStatistic.code()}" + short_code = QuesenberryMillerLogNormalGofStatistic.short_code() + return f"{short_code}_{AbstractLogNormalGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -143,10 +162,16 @@ class KLSupremumLogNormalGoFStatistic(AbstractGoodnessOfFitStatistic): Journal of Statistical Computation and Simulation, 85(11), 215-233. """ + @staticmethod + @override + def short_code(): + return "KL_SUP" + @staticmethod @override def code(): - return "KL_SUP_LOGNORMAL_GOODNESS_OF_FIT" + short_code = KLSupremumLogNormalGoFStatistic.short_code() + return f"{short_code}_{AbstractLogNormalGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -231,7 +256,7 @@ def calculate_critical_value(self, n, alpha=0.05): ) -class KLIntegralLogNormalGoFStatistic: +class KLIntegralLogNormalGoFStatistic(AbstractLogNormalGofStatistic): """ Integral test for lognormality based on Kullback-Leibler divergences. @@ -243,9 +268,17 @@ class KLIntegralLogNormalGoFStatistic: """ @staticmethod + @override + def short_code(): + return "KL_INT" + + @staticmethod + @override def code(): - return "KL_INT_LOGNORMAL_GOODNESS_OF_FIT" + short_code = KLIntegralLogNormalGoFStatistic.short_code() + return f"{short_code}_{AbstractLogNormalGofStatistic.code()}" + @override def execute_statistic(self, rvs, **kwargs): rvs = np.array(rvs) @@ -341,6 +374,7 @@ def calculate_critical_value(self, n, alpha=0.05): def _create_lognormal_class(normal_cls): new_class_name = normal_cls.__name__.replace("Normality", "LogNormal") class_code = normal_cls.code().replace("NORMALITY", "LOGNORMAL") + class_short_code = normal_cls.short_code() class LogNormalClass(AbstractLogNormalGofStatistic): @override @@ -355,6 +389,11 @@ def execute_statistic(self, rvs, **kwargs): return normal_cls().execute_statistic(standardized_log_rvs, **kwargs) + @staticmethod + @override + def short_code(): + return class_short_code + @staticmethod @override def code():