unit tests

ColtAllen · ColtAllen · commit 93c4a6029a2a · 2025-04-20T14:09:55.000-05:00
diff --git a/pymc_extras/distributions/__init__.py b/pymc_extras/distributions/__init__.py
@@ -22,6 +22,7 @@
     BetaNegativeBinomial,
     GeneralizedPoisson,
     Skellam,
+    GrassiaIIGeometric,
 )
 from pymc_extras.distributions.histogram_utils import histogram_approximation
 from pymc_extras.distributions.multivariate import R2D2M2CP
diff --git a/pymc_extras/distributions/discrete.py b/pymc_extras/distributions/discrete.py
@@ -15,6 +15,7 @@
 import numpy as np
 import pymc as pm
 
+from pymc.distributions.distribution import Discrete
 from pymc.distributions.dist_math import betaln, check_parameters, factln, logpow
 from pymc.distributions.shape_utils import rv_size_is_none
 from pytensor import tensor as pt
@@ -441,12 +442,11 @@ def sim_data(lam):
 g2g = GrassiaIIGeometricRV()
 
 
-class GrassiaIIGeometric(UnitContinuous):
+class GrassiaIIGeometric(Discrete):
     r"""Grassia(II)-Geometric distribution.
 
-    This distribution is a flexible alternative to the Geometric distribution for the
-    number of trials until a discrete event, and can be easily extended to support both static
-    and time-varying covariates.
+    This distribution is a flexible alternative to the Geometric distribution for the number of trials until a
+    discrete event, and can be extended to support both static and time-varying covariates.
 
     Hardie and Fader describe this distribution with the following PMF and survival functions in [1]_:
 
@@ -520,4 +520,22 @@ def logcdf(value, r, alpha):
             r > 0,
             alpha > 0,
             msg="s > 0, alpha > 0",
-        )
+        )
+
+    def support_point(rv, size, r, alpha):
+        """Calculate a reasonable starting point for sampling.
+        
+        For the GrassiaIIGeometric distribution, we use a point estimate based on
+        the expected value of the mixing distribution. Since the mixing distribution
+        is Gamma(r, 1/alpha), its mean is r/alpha. We then transform this through
+        the geometric link function and round to ensure an integer value.
+        """
+        # E[lambda] = r/alpha for Gamma(r, 1/alpha)
+        # p = 1 - exp(-lambda) for geometric
+        # E[T] = 1/p for geometric
+        mean = pt.ceil(pt.exp(alpha/r))  # Conservative upper bound
+        
+        if not rv_size_is_none(size):
+            mean = pt.full(size, mean)
+        
+        return mean
diff --git a/tests/distributions/test_discrete.py b/tests/distributions/test_discrete.py
@@ -34,6 +34,7 @@
     BetaNegativeBinomial,
     GeneralizedPoisson,
     Skellam,
+    GrassiaIIGeometric,
 )
 
 
@@ -208,3 +209,119 @@ def test_logp(self):
                 {"mu1": Rplus_small, "mu2": Rplus_small},
                 lambda value, mu1, mu2: scipy.stats.skellam.logpmf(value, mu1, mu2),
             )
+
+
+class TestGrassiaIIGeometric:
+    class TestRandomVariable(BaseTestDistributionRandom):
+        pymc_dist = GrassiaIIGeometric
+        pymc_dist_params = {"r": 1.0, "alpha": 2.0}
+        expected_rv_op_params = {"r": 1.0, "alpha": 2.0}
+        tests_to_run = [
+            "check_pymc_params_match_rv_op",
+            "check_rv_size",
+        ]
+
+        def test_random_basic_properties(self):
+            discrete_random_tester(
+                dist=self.pymc_dist,
+                paramdomains={"r": Rplus, "alpha": Rplus},
+                ref_rand=lambda r, alpha, size: np.random.geometric(
+                    1 - np.exp(-np.random.gamma(r, 1/alpha, size=size)), size=size
+                ),
+            )
+
+        @pytest.mark.parametrize("r,alpha", [
+            (0.5, 1.0),
+            (1.0, 2.0),
+            (2.0, 0.5),
+            (5.0, 1.0),
+        ])
+        def test_random_moments(self, r, alpha):
+            dist = self.pymc_dist.dist(r=r, alpha=alpha, size=10_000)
+            draws = dist.eval()
+
+            # Check that all values are positive integers
+            assert np.all(draws > 0)
+            assert np.all(draws.astype(int) == draws)
+
+            # Check that values are reasonably distributed
+            # Note: Exact moments are complex for this distribution
+            # so we just check basic properties
+            assert np.mean(draws) > 0
+            assert np.var(draws) > 0
+
+    def test_logp_basic(self):
+        r = pt.scalar("r")
+        alpha = pt.scalar("alpha")
+        value = pt.vector("value", dtype="int64")
+
+        logp = pm.logp(GrassiaIIGeometric.dist(r, alpha), value)
+        logp_fn = pytensor.function([value, r, alpha], logp)
+
+        # Test basic properties of logp
+        test_value = np.array([1, 2, 3, 4, 5])
+        test_r = 1.0
+        test_alpha = 1.0
+        
+        logp_vals = logp_fn(test_value, test_r, test_alpha)
+        assert not np.any(np.isnan(logp_vals))
+        assert np.all(np.isfinite(logp_vals))
+
+        # Test invalid values
+        assert logp_fn(np.array([0]), test_r, test_alpha) == np.inf  # Value must be > 0
+        
+        with pytest.raises(TypeError):
+            logp_fn(np.array([1.5]), test_r, test_alpha) == -np.inf  # Value must be integer
+
+        # Test parameter restrictions
+        with pytest.raises(ParameterValueError):
+            logp_fn(np.array([1]), -1.0, test_alpha)  # r must be > 0
+
+        with pytest.raises(ParameterValueError):
+            logp_fn(np.array([1]), test_r, -1.0)  # alpha must be > 0
+
+    def test_sampling_consistency(self):
+        """Test that sampling from the distribution produces reasonable results"""
+        r = 2.0
+        alpha = 1.0
+        with pm.Model():
+            x = GrassiaIIGeometric("x", r=r, alpha=alpha)
+            trace = pm.sample(chains=1, draws=1000, random_seed=42).posterior
+
+        samples = trace["x"].values.flatten()
+        
+        # Check basic properties of samples
+        assert np.all(samples > 0)  # All values should be positive
+        assert np.all(samples.astype(int) == samples)  # All values should be integers
+        
+        # Check mean and variance are reasonable
+        # (exact values depend on the parameterization)
+        assert 0 < np.mean(samples) < np.inf
+        assert 0 < np.var(samples) < np.inf
+
+    @pytest.mark.parametrize(
+        "r, alpha, size, expected_shape",
+        [
+            (1.0, 1.0, None, ()),  # Scalar output
+            ([1.0, 2.0], 1.0, None, (2,)),  # Vector output from r
+            (1.0, [1.0, 2.0], None, (2,)),  # Vector output from alpha
+            (1.0, 1.0, (3, 2), (3, 2)),  # Explicit size
+        ],
+    )
+    def test_support_point(self, r, alpha, size, expected_shape):
+        """Test that support_point returns reasonable values with correct shapes"""
+        with pm.Model() as model:
+            GrassiaIIGeometric("x", r=r, alpha=alpha, size=size)
+        
+        init_point = model.initial_point()["x"]
+        
+        # Check shape
+        assert init_point.shape == expected_shape
+        
+        # Check values are positive integers
+        assert np.all(init_point > 0)
+        assert np.all(init_point.astype(int) == init_point)
+        
+        # Check values are finite and reasonable
+        assert np.all(np.isfinite(init_point))
+        assert np.all(init_point < 1e6)  # Should not be extremely large

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`BetaNegativeBinomial,`
`23`	`23`	`GeneralizedPoisson,`
`24`	`24`	`Skellam,`
	`25`	`+ GrassiaIIGeometric,`
`25`	`26`	`)`
`26`	`27`	`from pymc_extras.distributions.histogram_utils import histogram_approximation`
`27`	`28`	`from pymc_extras.distributions.multivariate import R2D2M2CP`