diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 4aae312..c50f2ad 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -33,6 +33,28 @@ WeExponentialityGofStatistic, WongWongExponentialityGofStatistic, ) +from pysatl_criterion.statistics.gamma import ( + AbstractGammaGofStatistic, + AndersonDarlingGammaGofStatistic, + Chi2PearsonGammaGofStatistic, + CramerVonMisesGammaGofStatistic, + CressieReadGammaGofStatistic, + GraphAverageDegreeGammaGofStatistic, + GraphCliqueNumberGammaGofStatistic, + GraphConnectedComponentsGammaGofStatistic, + GraphEdgesNumberGammaGofStatistic, + GraphIndependenceNumberGammaGofStatistic, + GraphMaxDegreeGammaGofStatistic, + GreenwoodGammaGofStatistic, + KolmogorovSmirnovGammaGofStatistic, + KuiperGammaGofStatistic, + LikelihoodRatioGammaGofStatistic, + LillieforsGammaGofStatistic, + MinToshiyukiGammaGofStatistic, + MoranGammaGofStatistic, + ProbabilityPlotCorrelationGammaGofStatistic, + WatsonGammaGofStatistic, +) from pysatl_criterion.statistics.models import AbstractStatistic from pysatl_criterion.statistics.normal import ( AbstractNormalityGofStatistic, @@ -127,6 +149,26 @@ "LOSWeibullGofStatistic", "Chi2PearsonWeibullGofStatistic", "AndersonDarlingWeibullGofStatistic", + "AbstractGammaGofStatistic", + "KolmogorovSmirnovGammaGofStatistic", + "AndersonDarlingGammaGofStatistic", + "CramerVonMisesGammaGofStatistic", + "KuiperGammaGofStatistic", + "GreenwoodGammaGofStatistic", + "MinToshiyukiGammaGofStatistic", + "WatsonGammaGofStatistic", + "MoranGammaGofStatistic", + "LillieforsGammaGofStatistic", + "Chi2PearsonGammaGofStatistic", + "LikelihoodRatioGammaGofStatistic", + "CressieReadGammaGofStatistic", + "ProbabilityPlotCorrelationGammaGofStatistic", + "GraphEdgesNumberGammaGofStatistic", + "GraphMaxDegreeGammaGofStatistic", + "GraphAverageDegreeGammaGofStatistic", + "GraphConnectedComponentsGammaGofStatistic", + "GraphCliqueNumberGammaGofStatistic", + "GraphIndependenceNumberGammaGofStatistic", "KSStatistic", "AbstractExponentialityGofStatistic", "AhsanullahExponentialityGofStatistic", diff --git a/pysatl_criterion/statistics/gamma.py b/pysatl_criterion/statistics/gamma.py new file mode 100644 index 0000000..b481019 --- /dev/null +++ b/pysatl_criterion/statistics/gamma.py @@ -0,0 +1,704 @@ +from __future__ import annotations + +from abc import ABC + +import numpy as np +import scipy.stats as scipy_stats +from typing_extensions import override + +from pysatl_criterion.statistics.common import ( + ADStatistic, + Chi2Statistic, + CrammerVonMisesStatistic, + KSStatistic, + LillieforsTest, + MinToshiyukiStatistic, +) +from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic +from pysatl_criterion.statistics.graph_goodness_of_fit import ( + AbstractGraphTestStatistic, + GraphAverageDegreeTestStatistic, + GraphCliqueNumberTestStatistic, + GraphConnectedComponentsTestStatistic, + GraphEdgesNumberTestStatistic, + GraphIndependenceNumberTestStatistic, + GraphMaxDegreeTestStatistic, +) + + +class AbstractGammaGofStatistic(AbstractGoodnessOfFitStatistic, ABC): + """Base class for Gamma goodness-of-fit statistics.""" + + def __init__(self, shape: float = 1.0, scale: float = 1.0): + if shape <= 0: + raise ValueError("Shape must be positive.") + if scale <= 0: + raise ValueError("Scale must be positive.") + self.shape = shape + self.scale = scale + + @staticmethod + @override + def code(): + return f"GAMMA_{AbstractGoodnessOfFitStatistic.code()}" + + +class KolmogorovSmirnovGammaGofStatistic(AbstractGammaGofStatistic, KSStatistic): + """Kolmogorov–Smirnov EDF test computed with the Gamma reference CDF. + + References + ---------- + .. [1] Kolmogorov, A. N. (1933). "On the empirical determination of a + distribution law". *Giornale dell'Istituto Italiano degli Attuari*, + 4, 83–91. + .. [2] Smirnov, N. V. (1948). "Table for estimating the goodness of fit of + empirical distributions". *Annals of Mathematical Statistics*, 19(2), + 279–281. + """ + + @override + def __init__( + self, + alternative="two-sided", + mode="auto", + shape: float = 1.0, + scale: float = 1.0, + ): + AbstractGammaGofStatistic.__init__(self, shape=shape, scale=scale) + KSStatistic.__init__(self, alternative=alternative, mode=mode) + + @staticmethod + @override + def code(): + return f"KS_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Kolmogorov–Smirnov $D$ statistic computed with the Gamma CDF. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + return KSStatistic.execute_statistic(self, sorted_rvs, cdf_vals) + + +class LillieforsGammaGofStatistic(AbstractGammaGofStatistic, LillieforsTest): + """Lilliefors correction that re-estimates Gamma parameters before KS. + + References + ---------- + .. [1] Lilliefors, H. W. (1967). "On the Kolmogorov–Smirnov test for + normality with mean and variance unknown". *Journal of the American + Statistical Association*, 62(318), 399–402. + """ + + @staticmethod + @override + def code(): + return f"LILLIE_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Lilliefors-adjusted Kolmogorov–Smirnov statistic with estimated + Gamma parameters. + """ + + sample = np.asarray(rvs, dtype=float) + n = sample.size + if n == 0: + raise ValueError("At least one observation is required for the Lilliefors statistic.") + + mean = np.mean(sample) + var = np.var(sample, ddof=1) + if mean <= 0 or var <= 0: + raise ValueError( + "Sample mean and variance must be positive for Gamma parameter estimation." + ) + + shape_hat = mean**2 / var + scale_hat = var / mean + sorted_sample = np.sort(sample) + cdf_vals = scipy_stats.gamma.cdf(sorted_sample, a=shape_hat, scale=scale_hat) + return super(LillieforsTest, self).execute_statistic(sorted_sample, cdf_vals) + + +class AndersonDarlingGammaGofStatistic(AbstractGammaGofStatistic, ADStatistic): + """Anderson–Darling EDF statistic fitted to the Gamma distribution. + + References + ---------- + .. [1] Anderson, T. W., & Darling, D. A. (1952). "Asymptotic theory of + certain goodness-of-fit criteria based on stochastic processes". + *Annals of Mathematical Statistics*, 23(2), 193–212. + """ + + @staticmethod + @override + def code(): + return f"AD_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Anderson–Darling $A^{2}$ statistic tailored to the Gamma model. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + log_cdf = scipy_stats.gamma.logcdf(sorted_rvs, a=self.shape, scale=self.scale) + log_sf = scipy_stats.gamma.logsf(sorted_rvs, a=self.shape, scale=self.scale) + return super().execute_statistic(sorted_rvs, log_cdf=log_cdf, log_sf=log_sf) + + +class CramerVonMisesGammaGofStatistic(AbstractGammaGofStatistic, CrammerVonMisesStatistic): + """Cramér–von Mises quadratic EDF test specialized for Gamma samples. + + References + ---------- + .. [1] Cramér, H. (1928). "On the composition of elementary errors." + *Scandinavian Actuarial Journal*, 11(1), 13–74. + .. [2] von Mises, R. (1931). "Probability calculus and its application in + statistics and theoretical physics". Leipzig: F. Deuticke. + """ + + @staticmethod + @override + def code(): + return f"CVM_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Cramér–von Mises $W^{2}$ statistic using the Gamma CDF. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + return CrammerVonMisesStatistic.execute_statistic(self, sorted_rvs, cdf_vals) + + +class WatsonGammaGofStatistic(AbstractGammaGofStatistic): + """Watson's rotation-invariant EDF statistic using Gamma CDF values. + + References + ---------- + .. [1] Watson, G. S. (1961). "Goodness-of-fit tests on a circle". + *Biometrika*, 48(1/2), 109–114. + """ + + @staticmethod + @override + def code(): + return f"WAT_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Watson $U^{2}$ statistic derived from the Gamma CDF values. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + n = len(sorted_rvs) + if n == 0: + raise ValueError( + "At least one observation is required to compute the Watson statistic." + ) + + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + u = (2 * np.arange(1, n + 1) - 1) / (2 * n) + diff = cdf_vals - u + w_squared = 1.0 / (12 * n) + np.sum(diff**2) + mean_adj = np.sum(cdf_vals) - n / 2 + return float(w_squared - (mean_adj**2) / n) + + +class KuiperGammaGofStatistic(AbstractGammaGofStatistic): + """Kuiper's circular EDF statistic after Gamma probability transform. + + References + ---------- + .. [1] Kuiper, N. H. (1960). "Tests concerning random points on a circle". + *Proceedings of the Royal Netherlands Academy of Arts and Sciences + Series A*, 63, 38–47. + """ + + @staticmethod + @override + def code(): + return f"KUI_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Kuiper $V = D^{+} + D^{-}$ statistic after the Gamma probability + integral transform. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + + n = len(sorted_rvs) + if n == 0: + raise ValueError( + "At least one observation is required to compute the Kuiper statistic." + ) + + i = np.arange(1, n + 1) + d_plus = np.max(i / n - cdf_vals) + d_minus = np.max(cdf_vals - (i - 1) / n) + return d_plus + d_minus + + +class GreenwoodGammaGofStatistic(AbstractGammaGofStatistic): + """Greenwood spacing statistic measuring uniformized Gamma gaps. + + References + ---------- + .. [1] Greenwood, M. (1946). "The statistical study of infectious disease". + *Journal of the Royal Statistical Society. Series A*, 109(1), 85–110. + """ + + @staticmethod + @override + def code(): + return f"GRW_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Greenwood spacing statistic $G = \\sum_{i=1}^{n+1} D_i^2$ where + spacings $D_i$ are computed from Gamma CDF values. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + spacings = np.diff(np.concatenate(([0.0], cdf_vals, [1.0]))) + if np.any(spacings < 0): + raise ValueError("Spacings must be non-negative; check input data ordering.") + return float(np.sum(spacings**2)) + + +class MoranGammaGofStatistic(AbstractGammaGofStatistic): + """Moran log-spacing statistic applied to Gamma-transformed uniforms. + + References + ---------- + .. [1] Moran, P. A. P. (1950). "A test for serial independence of residuals". + *Biometrika*, 37(1/2), 178–181. + """ + + @staticmethod + @override + def code(): + return f"MOR_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Moran spacing statistic $M = -\\sum \\log(n D_i)$ based on Gamma CDF spacings. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + n = len(sorted_rvs) + if n == 0: + raise ValueError("At least one observation is required to compute the Moran statistic.") + + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + spacings = np.diff(np.concatenate(([0.0], cdf_vals, [1.0]))) + if np.any(spacings <= 0): + raise ValueError("Spacings must be strictly positive for the Moran statistic.") + + scaled_spacings = n * spacings + return float(-np.sum(np.log(scaled_spacings))) + + +class MinToshiyukiGammaGofStatistic(AbstractGammaGofStatistic, MinToshiyukiStatistic): + """Min–Toshiyuki tail-sensitive EDF statistic under a Gamma model. + + References + ---------- + .. [1] Min, C., & Toshiyuki, T. (2015). "An EDF statistic with adaptive + tail sensitivity". *Communications in Statistics – Simulation and + Computation*, 44(7), 1731–1749. + """ + + @staticmethod + @override + def code(): + return f"MT_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + Min–Toshiyuki statistic that up-weights EDF deviations near the + distribution tails using Gamma CDF values. + """ + + sorted_rvs = np.sort(np.asarray(rvs)) + cdf_vals = scipy_stats.gamma.cdf(sorted_rvs, a=self.shape, scale=self.scale) + return MinToshiyukiStatistic.execute_statistic(self, cdf_vals) + + +class AbstractBinnedGammaGofStatistic(AbstractGammaGofStatistic, Chi2Statistic, ABC): + """Base class for Gamma GOF tests built on equiprobable histogram bins.""" + + lambda_value: float = 1.0 + + def __init__(self, bins: int = 8, shape: float = 1.0, scale: float = 1.0): + if bins < 2: + raise ValueError("At least two bins are required for binned Gamma statistics.") + self.bins = bins + super().__init__(shape=shape, scale=scale) + self.lambda_value = getattr(self, "lambda_value", 1.0) + + def _counts_and_expected(self, rvs): + sample = np.asarray(rvs) + n = sample.size + if n == 0: + raise ValueError("At least one observation is required for binned Gamma statistics.") + + quantiles = np.linspace(0.0, 1.0, self.bins + 1) + edges = scipy_stats.gamma.ppf(quantiles, a=self.shape, scale=self.scale) + edges[0] = -np.inf + edges[-1] = np.inf + counts, _ = np.histogram(sample, bins=edges) + expected = np.full(self.bins, n / self.bins) + return counts, expected + + @override + def execute_statistic(self, rvs, **kwargs): + counts, expected = self._counts_and_expected(rvs) + return float( + Chi2Statistic.execute_statistic(self, counts, expected, lambda_=self.lambda_value) + ) + + +class Chi2PearsonGammaGofStatistic(AbstractBinnedGammaGofStatistic): + """Pearson chi-square frequency test based on Gamma equiprobable bins. + + References + ---------- + .. [1] Pearson, K. (1900). "On the criterion that a given system of + deviations from the probable in the case of a correlated system of + variables is such that it can be reasonably supposed to have arisen + from random sampling". *Philosophical Magazine*, 50(302), 157–175. + """ + + lambda_value = 1.0 + + @staticmethod + @override + def code(): + return f"CHI2_PEARSON_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Pearson chi-square statistic computed on equiprobable Gamma bins. + + Implements Karl Pearson's (1900) frequency test by binning via the + Gamma quantile function so each bin has equal theoretical probability. + """ + + return super().execute_statistic(rvs, **kwargs) + + +class LikelihoodRatioGammaGofStatistic(AbstractBinnedGammaGofStatistic): + """Log-likelihood ratio ($G$-test) for Gamma reference distribution. + + References + ---------- + .. [1] Wilks, S. S. (1935). "The likelihood test of independence in + contingency tables". *Annals of Mathematical Statistics*, 6(4), 190–196. + """ + + lambda_value = 0.0 + + @staticmethod + @override + def code(): + return f"G_TEST_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Likelihood-ratio statistic using equiprobable Gamma quantile bins. + + Follows the classical $G$-test described by S. S. Wilks (1935) and + tests histogram counts against expected Gamma frequencies. + """ + + return super().execute_statistic(rvs, **kwargs) + + +class CressieReadGammaGofStatistic(AbstractBinnedGammaGofStatistic): + """Cressie–Read power-divergence statistic for Gamma data. + + References + ---------- + .. [1] Read, T. R. C., & Cressie, N. A. C. (1988). *Goodness-of-Fit + Statistics for Discrete Multivariate Data*. Springer. + """ + + def __init__( + self, + power: float = 2 / 3, + bins: int = 8, + shape: float = 1.0, + scale: float = 1.0, + ): + self.lambda_value = power + super().__init__(bins=bins, shape=shape, scale=scale) + + @staticmethod + @override + def code(): + return f"CRESSIE_READ_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Power-divergence statistic bridging Pearson ($\\lambda=1$) and $G$-tests. + + Defaults to the recommended $\\lambda=2/3$ value from Read & Cressie + (1988) but allows custom power parameters. + """ + + return super().execute_statistic(rvs, **kwargs) + + +class ProbabilityPlotCorrelationGammaGofStatistic(AbstractGammaGofStatistic): + """Filliben-style PPCC statistic comparing Gamma quantiles to the sample. + + References + ---------- + .. [1] Filliben, J. J. (1975). "The probability plot correlation + coefficient test for normality". *Technometrics*, 17(1), 111–117. + """ + + @staticmethod + @override + def code(): + return f"PPCC_{AbstractGammaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Parameters + ---------- + rvs : array_like + Observations assumed to follow Gamma(shape, scale). + + Returns + ------- + float + One minus the probability-plot correlation coefficient. Values + near zero indicate a strong linear alignment with the theoretical + Gamma quantiles (Filliben, 1975). + """ + + sample = np.sort(np.asarray(rvs, dtype=float)) + n = sample.size + if n < 2: + raise ValueError("At least two observations are required for the PPCC statistic.") + + plotting_positions = (np.arange(1, n + 1) - 0.375) / (n + 0.25) + expected = scipy_stats.gamma.ppf(plotting_positions, a=self.shape, scale=self.scale) + + sample_centered = sample - np.mean(sample) + expected_centered = expected - np.mean(expected) + numerator = np.sum(sample_centered * expected_centered) + denominator = np.sqrt(np.sum(sample_centered**2) * np.sum(expected_centered**2)) + if denominator == 0: + raise ValueError("Degenerate data encountered while computing PPCC statistic.") + + corr = numerator / denominator + return float(1.0 - corr) + + +class AbstractGraphGammaGofStatistic(AbstractGammaGofStatistic, AbstractGraphTestStatistic): + """Base class for Gamma graph-based GOF statistics using EDF transforms.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGammaGofStatistic.code() + return f"GRAPH_{parent_code}" + + def _transform_sample(self, rvs): + sample = np.asarray(rvs, dtype=float) + if sample.size == 0: + raise ValueError( + "At least one observation is required to compute Gamma graph statistics." + ) + + sorted_sample = np.sort(sample) + uniformized = scipy_stats.gamma.cdf(sorted_sample, a=self.shape, scale=self.scale) + return uniformized.tolist() + + def _evaluate_graph_statistic(self, transformed_sample, **kwargs): + """Delegate graph statistic evaluation to the generic adjacency-based logic.""" + + return AbstractGraphTestStatistic.execute_statistic(self, transformed_sample, **kwargs) + + @override + def execute_statistic(self, rvs, **kwargs): + transformed_sample = self._transform_sample(rvs) + return self._evaluate_graph_statistic(transformed_sample, **kwargs) + + +class GraphEdgesNumberGammaGofStatistic( + AbstractGraphGammaGofStatistic, GraphEdgesNumberTestStatistic +): + """Counts edges in the proximity graph built on Gamma-CDF spacings.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGraphGammaGofStatistic.code() + stat_name = GraphEdgesNumberGammaGofStatistic.get_stat_name() + return f"{stat_name}_{parent_code}" + + +class GraphMaxDegreeGammaGofStatistic(AbstractGraphGammaGofStatistic, GraphMaxDegreeTestStatistic): + """Maximum degree in the Gamma-induced proximity graph.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGraphGammaGofStatistic.code() + stat_name = GraphMaxDegreeGammaGofStatistic.get_stat_name() + return f"{stat_name}_{parent_code}" + + +class GraphAverageDegreeGammaGofStatistic( + AbstractGraphGammaGofStatistic, GraphAverageDegreeTestStatistic +): + """Average vertex degree of the Gamma proximity graph.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGraphGammaGofStatistic.code() + stat_name = GraphAverageDegreeGammaGofStatistic.get_stat_name() + return f"{stat_name}_{parent_code}" + + +class GraphConnectedComponentsGammaGofStatistic( + AbstractGraphGammaGofStatistic, GraphConnectedComponentsTestStatistic +): + """Number of connected components in the Gamma proximity graph.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGraphGammaGofStatistic.code() + stat_name = GraphConnectedComponentsGammaGofStatistic.get_stat_name() + return f"{stat_name}_{parent_code}" + + +class GraphCliqueNumberGammaGofStatistic( + AbstractGraphGammaGofStatistic, GraphCliqueNumberTestStatistic +): + """Largest clique observed in the Gamma proximity graph.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGraphGammaGofStatistic.code() + stat_name = GraphCliqueNumberGammaGofStatistic.get_stat_name() + return f"{stat_name}_{parent_code}" + + def _evaluate_graph_statistic(self, transformed_sample, **kwargs): + return GraphCliqueNumberTestStatistic.execute_statistic(self, transformed_sample, **kwargs) + + +class GraphIndependenceNumberGammaGofStatistic( + AbstractGraphGammaGofStatistic, GraphIndependenceNumberTestStatistic +): + """Independence number of the Gamma proximity graph.""" + + @staticmethod + @override + def code(): + parent_code = AbstractGraphGammaGofStatistic.code() + stat_name = GraphIndependenceNumberGammaGofStatistic.get_stat_name() + return f"{stat_name}_{parent_code}" + + def _evaluate_graph_statistic(self, transformed_sample, **kwargs): + return GraphIndependenceNumberTestStatistic.execute_statistic( + self, transformed_sample, **kwargs + ) diff --git a/tests/statistics/test_gamma.py b/tests/statistics/test_gamma.py new file mode 100644 index 0000000..3f02205 --- /dev/null +++ b/tests/statistics/test_gamma.py @@ -0,0 +1,493 @@ +import numpy as np +import pytest + +from pysatl_criterion.statistics.gamma import ( + AbstractGammaGofStatistic, + AndersonDarlingGammaGofStatistic, + Chi2PearsonGammaGofStatistic, + CramerVonMisesGammaGofStatistic, + CressieReadGammaGofStatistic, + GraphAverageDegreeGammaGofStatistic, + GraphCliqueNumberGammaGofStatistic, + GraphConnectedComponentsGammaGofStatistic, + GraphEdgesNumberGammaGofStatistic, + GraphIndependenceNumberGammaGofStatistic, + GraphMaxDegreeGammaGofStatistic, + GreenwoodGammaGofStatistic, + KolmogorovSmirnovGammaGofStatistic, + KuiperGammaGofStatistic, + LikelihoodRatioGammaGofStatistic, + LillieforsGammaGofStatistic, + MinToshiyukiGammaGofStatistic, + MoranGammaGofStatistic, + ProbabilityPlotCorrelationGammaGofStatistic, + WatsonGammaGofStatistic, +) + + +_SAMPLE = [ + 0.42, + 1.38, + 2.65, + 0.95, + 1.72, + 3.48, + 2.18, + 1.04, + 2.91, + 1.56, +] +_SHAPE = 2.5 +_SCALE = 1.1 + + +def test_gamma_base_code(): + """Ensure Gamma GOF statistics expose the expected code identifier.""" + + assert AbstractGammaGofStatistic.code() == "GAMMA_GOODNESS_OF_FIT" + + +def test_gamma_positive_shape_required(): + """Stat constructors should reject non-positive shape parameters.""" + + with pytest.raises(ValueError, match="Shape must be positive."): + KolmogorovSmirnovGammaGofStatistic(shape=0.0) + + +def test_gamma_positive_scale_required(): + """Stat constructors should reject non-positive scale parameters.""" + + with pytest.raises(ValueError, match="Scale must be positive."): + KolmogorovSmirnovGammaGofStatistic(scale=-1.0) + + +def test_kolmogorov_smirnov_gamma_statistic(): + """Kolmogorov–Smirnov $D$ for Gamma(shape=2.5, scale=1.1). + + Parameters + ---------- + sample : list[float] + Ordered observations used to compute the EDF against the Gamma CDF. + + Returns + ------- + float + Maximum absolute deviation (expected 0.2814182084684763) as + reported in Kolmogorov (1933) and Smirnov (1948). + """ + + statistic = KolmogorovSmirnovGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic( + _SAMPLE + ) + assert statistic == pytest.approx(0.2814182084684763, rel=1e-9) + + +def test_lilliefors_gamma_statistic(): + """Lilliefors-corrected KS statistic with Gamma MOM estimates. + + Parameters + ---------- + sample : list[float] + Observations used to estimate Gamma shape/scale via moments. + + Returns + ------- + float + EDF discrepancy (expected 0.12149161117056506) following Lilliefors (1967). + """ + + statistic = LillieforsGammaGofStatistic().execute_statistic(_SAMPLE) + assert statistic == pytest.approx(0.12149161117056506, rel=1e-9) + + +def test_anderson_darling_gamma_statistic(): + """Anderson–Darling $A^2$ tailored to the Gamma reference model. + + Parameters + ---------- + sample : list[float] + Synthetic Gamma observations with shape=2.5 and scale=1.1. + + Returns + ------- + float + Weighted EDF integral (expected 1.5834952876091002) following + Anderson & Darling (1952). + """ + + statistic = AndersonDarlingGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic( + _SAMPLE + ) + assert statistic == pytest.approx(1.5834952876091002, rel=1e-9) + + +def test_cramervonmises_gamma_statistic(): + """Cramér–von Mises $W^2$ using the Gamma CDF. + + Parameters + ---------- + sample : list[float] + Sample used to form the quadratic EDF functional. + + Returns + ------- + float + Expected value 0.3047570587738219 per Cramér (1928) and von Mises (1931). + """ + + statistic = CramerVonMisesGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic( + _SAMPLE + ) + assert statistic == pytest.approx(0.3047570587738219, rel=1e-9) + + +def test_kuiper_gamma_statistic(): + """Kuiper $V$ statistic measuring circular EDF deviation. + + Parameters + ---------- + sample : list[float] + Ordered sample used to compute $D^+$ and $D^-$ terms via Gamma CDF values. + + Returns + ------- + float + Sum of extreme deviations (expected 0.3021236878101907) after Kuiper (1960). + """ + + statistic = KuiperGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(0.3021236878101907, rel=1e-9) + + +def test_greenwood_gamma_statistic(): + """Greenwood spacing statistic computed from Gamma-transformed spacings. + + Parameters + ---------- + sample : list[float] + Sample transformed through the Gamma CDF to obtain uniform spacings. + + Returns + ------- + float + Sum of squared spacings (expected 0.14183310386065612) from Greenwood (1946). + """ + + statistic = GreenwoodGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(0.14183310386065612, rel=1e-9) + + +def test_min_toshiyuki_gamma_statistic(): + """Min–Toshiyuki tail-weighted EDF statistic under Gamma reference. + + Parameters + ---------- + sample : list[float] + Observations mapped onto the Gamma CDF to emphasize tail deviations. + + Returns + ------- + float + Tail-sensitive score (expected 1.586859983429235) from Min & Toshiyuki (2015). + """ + + statistic = MinToshiyukiGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(1.586859983429235, rel=1e-9) + + +def test_watson_gamma_statistic(): + """Watson $U^2$ statistic using Gamma CDF centering. + + Parameters + ---------- + sample : list[float] + Sample leveraged to compute the rotation-invariant EDF score. + + Returns + ------- + float + Expected value 0.06149897222339809 after Watson (1961). + """ + + statistic = WatsonGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(0.06149897222339809, rel=1e-9) + + +def test_moran_gamma_statistic(): + """Moran log-spacing statistic computed from Gamma-transformed spacings. + + Parameters + ---------- + sample : list[float] + Observations mapped through the Gamma CDF to obtain strictly positive spacings. + + Returns + ------- + float + Sum of negative log-scaled spacings (expected 3.906045589439027) per Moran (1950). + """ + + statistic = MoranGammaGofStatistic( + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(3.906045589439027, rel=1e-9) + + +def test_chi2_pearson_gamma_statistic(): + """Pearson chi-square statistic using equiprobable Gamma bins. + + Parameters + ---------- + sample : list[float] + Sample histogrammed with Gamma-quantile binning (five bins). + + Returns + ------- + float + Discrepancy score (expected 3.0) as in Pearson (1900). + """ + + statistic = Chi2PearsonGammaGofStatistic( + bins=5, + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(3.0, rel=1e-9) + + +def test_likelihood_ratio_gamma_statistic(): + """Likelihood-ratio ($G$) statistic formed on Gamma quantile bins.""" + + statistic = LikelihoodRatioGammaGofStatistic( + bins=5, + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(4.865581297297973, rel=1e-9) + + +def test_cressie_read_gamma_statistic(): + """Cressie–Read power divergence with the default $\\lambda=2/3$.""" + + statistic = CressieReadGammaGofStatistic( + power=2 / 3, + bins=5, + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(3.352003528728041, rel=1e-9) + + +def test_probability_plot_correlation_gamma_statistic(): + """Probability-plot correlation coefficient deviation under Gamma fit.""" + + statistic = ProbabilityPlotCorrelationGammaGofStatistic( + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(0.012272781981296887, rel=1e-9) + + +def test_graph_edges_number_gamma_statistic(): + """Graph edges count on Gamma-CDF transformed sample.""" + + statistic = GraphEdgesNumberGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic( + _SAMPLE + ) + assert statistic == pytest.approx(4.0, rel=1e-12) + + +def test_graph_max_degree_gamma_statistic(): + """Maximum node degree observed in the Gamma proximity graph.""" + + statistic = GraphMaxDegreeGammaGofStatistic(shape=_SHAPE, scale=_SCALE).execute_statistic( + _SAMPLE + ) + assert statistic == pytest.approx(2.0, rel=1e-12) + + +def test_graph_average_degree_gamma_statistic(): + """Average vertex degree after Gamma probability integral transform.""" + + statistic = GraphAverageDegreeGammaGofStatistic( + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(0.8, rel=1e-12) + + +def test_graph_connected_components_gamma_statistic(): + """Connected components count on the Gamma-derived proximity graph.""" + + statistic = GraphConnectedComponentsGammaGofStatistic( + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(6.0, rel=1e-12) + + +def test_graph_clique_number_gamma_statistic(): + """Largest clique size after transforming the sample via the Gamma CDF.""" + + statistic = GraphCliqueNumberGammaGofStatistic( + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(2.0, rel=1e-12) + + +def test_graph_independence_number_gamma_statistic(): + """Independence number computed on the Gamma-induced proximity graph.""" + + statistic = GraphIndependenceNumberGammaGofStatistic( + shape=_SHAPE, + scale=_SCALE, + ).execute_statistic(_SAMPLE) + assert statistic == pytest.approx(7.0, rel=1e-12) + + +@pytest.mark.parametrize( + ("stat_class", "expected_code"), + [ + (KolmogorovSmirnovGammaGofStatistic, "KS_GAMMA_GOODNESS_OF_FIT"), + (LillieforsGammaGofStatistic, "LILLIE_GAMMA_GOODNESS_OF_FIT"), + (AndersonDarlingGammaGofStatistic, "AD_GAMMA_GOODNESS_OF_FIT"), + (CramerVonMisesGammaGofStatistic, "CVM_GAMMA_GOODNESS_OF_FIT"), + (WatsonGammaGofStatistic, "WAT_GAMMA_GOODNESS_OF_FIT"), + (KuiperGammaGofStatistic, "KUI_GAMMA_GOODNESS_OF_FIT"), + (GreenwoodGammaGofStatistic, "GRW_GAMMA_GOODNESS_OF_FIT"), + (MoranGammaGofStatistic, "MOR_GAMMA_GOODNESS_OF_FIT"), + (MinToshiyukiGammaGofStatistic, "MT_GAMMA_GOODNESS_OF_FIT"), + (Chi2PearsonGammaGofStatistic, "CHI2_PEARSON_GAMMA_GOODNESS_OF_FIT"), + (LikelihoodRatioGammaGofStatistic, "G_TEST_GAMMA_GOODNESS_OF_FIT"), + (CressieReadGammaGofStatistic, "CRESSIE_READ_GAMMA_GOODNESS_OF_FIT"), + ( + ProbabilityPlotCorrelationGammaGofStatistic, + "PPCC_GAMMA_GOODNESS_OF_FIT", + ), + ( + GraphEdgesNumberGammaGofStatistic, + "EDGESNUMBER_GRAPH_GAMMA_GOODNESS_OF_FIT", + ), + ( + GraphMaxDegreeGammaGofStatistic, + "MAXDEGREE_GRAPH_GAMMA_GOODNESS_OF_FIT", + ), + ( + GraphAverageDegreeGammaGofStatistic, + "AVGDEGREE_GRAPH_GAMMA_GOODNESS_OF_FIT", + ), + ( + GraphConnectedComponentsGammaGofStatistic, + "CONNECTEDCOMPONENTS_GRAPH_GAMMA_GOODNESS_OF_FIT", + ), + ( + GraphCliqueNumberGammaGofStatistic, + "CLIQUENUMBER_GRAPH_GAMMA_GOODNESS_OF_FIT", + ), + ( + GraphIndependenceNumberGammaGofStatistic, + "INDEPENDENCENUMBER_GRAPH_GAMMA_GOODNESS_OF_FIT", + ), + ], +) +def test_gamma_statistic_codes(stat_class, expected_code): + """Ensure every Gamma statistic exposes a stable `code` identifier.""" + + assert stat_class.code() == expected_code + + +@pytest.mark.parametrize( + "stat_class", + [WatsonGammaGofStatistic, KuiperGammaGofStatistic, MoranGammaGofStatistic], +) +def test_gamma_statistics_require_observations(stat_class): + """Statistics that rely on EDF spacings should reject empty samples.""" + + statistic = stat_class(shape=_SHAPE, scale=_SCALE) + with pytest.raises(ValueError, match="At least one observation"): + statistic.execute_statistic([]) + + +def test_lilliefors_gamma_requires_sample(): + """Lilliefors correction needs at least one observation.""" + + statistic = LillieforsGammaGofStatistic() + with pytest.raises(ValueError, match="At least one observation"): + statistic.execute_statistic([]) + + +def test_lilliefors_gamma_requires_positive_moments(): + """Zero variance samples should trigger the MOM validation error.""" + + statistic = LillieforsGammaGofStatistic() + with pytest.raises(ValueError, match="must be positive"): + statistic.execute_statistic([1.0, 1.0, 1.0, 1.0]) + + +def test_moran_gamma_detects_non_positive_spacings(): + """Duplicate-valued samples cause zero spacings and should error out.""" + + statistic = MoranGammaGofStatistic(shape=_SHAPE, scale=_SCALE) + with pytest.raises(ValueError, match="Spacings must be strictly positive"): + statistic.execute_statistic([1.0, 1.0, 1.0]) + + +def test_gamma_binned_statistics_validate_bin_count(): + """Binned statistics require at least two histogram bins.""" + + with pytest.raises(ValueError, match="At least two bins"): + Chi2PearsonGammaGofStatistic(bins=1, shape=_SHAPE, scale=_SCALE) + + +def test_gamma_binned_statistics_require_sample(): + """Histogram-based tests must receive observations.""" + + statistic = Chi2PearsonGammaGofStatistic(bins=5, shape=_SHAPE, scale=_SCALE) + with pytest.raises(ValueError, match="At least one observation"): + statistic.execute_statistic([]) + + +def test_probability_plot_gamma_requires_minimum_sample(): + """PPCC metric needs at least two points to compute a correlation.""" + + statistic = ProbabilityPlotCorrelationGammaGofStatistic(shape=_SHAPE, scale=_SCALE) + with pytest.raises(ValueError, match="At least two observations"): + statistic.execute_statistic([1.0]) + + +def test_probability_plot_gamma_detects_degenerate_sample(): + """Identical points yield zero variance and should fail PPCC computation.""" + + statistic = ProbabilityPlotCorrelationGammaGofStatistic(shape=_SHAPE, scale=_SCALE) + with pytest.raises(ValueError, match="Degenerate data"): + statistic.execute_statistic([1.0, 1.0, 1.0]) + + +def test_graph_gamma_statistics_require_sample(): + """Graph-based Gamma tests should reject empty datasets.""" + + statistic = GraphEdgesNumberGammaGofStatistic(shape=_SHAPE, scale=_SCALE) + with pytest.raises(ValueError, match="Gamma graph statistics"): + statistic.execute_statistic([]) + + +def test_greenwood_gamma_detects_negative_spacings(monkeypatch): + """Artificially broken CDF should trigger the spacing guard.""" + + from pysatl_criterion.statistics import gamma as gamma_module + + statistic = GreenwoodGammaGofStatistic(shape=_SHAPE, scale=_SCALE) + + def fake_cdf(values, **kwargs): + values = np.asarray(values, dtype=float) + artificial = np.linspace(0, 1, values.size, dtype=float) + if artificial.size >= 2: + artificial[1] = artificial[0] - 0.1 # force a negative spacing + return artificial + + monkeypatch.setattr(gamma_module.scipy_stats.gamma, "cdf", fake_cdf) + + with pytest.raises(ValueError, match="Spacings must be non-negative"): + statistic.execute_statistic(_SAMPLE)