WIP time indexing

ColtAllen · ColtAllen · commit 8a304599f0fe · 2025-06-20T08:37:10.000-06:00
diff --git a/pymc_extras/distributions/discrete.py b/pymc_extras/distributions/discrete.py
@@ -399,59 +399,70 @@ def dist(cls, mu1, mu2, **kwargs):
             **kwargs,
         )
 
-
+# TODO: C expressions are not correct. Both value and covariate broadcasting must be handled.
 class GrassiaIIGeometricRV(RandomVariable):
     name = "g2g"
     signature = "(),(),()->()"
 
     dtype = "int64"
     _print_name = ("GrassiaIIGeometric", "\\operatorname{GrassiaIIGeometric}")
 
-    def __call__(self, r, alpha, time_covar_dot=None,size=None, **kwargs):
-        return super().__call__(r, alpha, time_covar_dot=time_covar_dot, size=size, **kwargs)
+    def __call__(self, r, alpha, time_covariates_sum=None, size=None, **kwargs):
+        return super().__call__(r, alpha, time_covariates_sum, size=size, **kwargs)
 
     @classmethod
-    def rng_fn(cls, rng, r, alpha, time_covar_dot,size):
-        if time_covar_dot is None:
-            time_covar_dot = np.array(0)
+    def rng_fn(cls, rng, r, alpha, time_covariates_sum, size):
+        if time_covariates_sum is None:
+            time_covariates_sum = np.array(0)
         if size is None:
-            size = np.broadcast_shapes(r.shape, alpha.shape, time_covar_dot.shape)
+            size = np.broadcast_shapes(r.shape, alpha.shape, time_covariates_sum.shape)
 
         r = np.broadcast_to(r, size)
         alpha = np.broadcast_to(alpha, size)
-        time_covar_dot = np.broadcast_to(time_covar_dot,size)
-
-        output = np.zeros(shape=size + (1,))  # noqa:RUF005
-
-        lam = rng.gamma(shape=r, scale=1/alpha, size=size)
-
-        exp_time_covar_dot = np.exp(time_covar_dot)
-
-        def sim_data(lam, exp_time_covar_dot):
-            # Handle numerical stability for very small lambda values
-            # p = np.where(
-            #     lam < 0.0001,
-            #     lam,  # For small lambda, p ≈ lambda
-            #     1 - np.exp(-lam * exp_time_covar_dot)
-            # )
-
-            # Ensure lam is in valid range for geometric distribution
-            lam = np.clip(lam, np.finfo(float).tiny, 1.)
-            p = 1 - np.exp(-lam * exp_time_covar_dot)
-
-            t = rng.geometric(p)
-            return np.array([t])
-
-        for index in np.ndindex(*size):
-            output[index] = sim_data(lam[index], exp_time_covar_dot[index])
-
+        time_covariates_sum = np.broadcast_to(time_covariates_sum, size)
+
+        # Calculate exp(time_covariates_sum) for all samples
+        exp_time_covar_sum = np.exp(time_covariates_sum)
+        
+        # Initialize output array
+        output = np.zeros(size, dtype=np.int64)
+        
+        # For each sample, generate a value from the distribution
+        for idx in np.ndindex(*size):
+            # Calculate survival probabilities for each possible value
+            t = 1
+            while True:
+                C_t = t + exp_time_covar_sum[idx]
+                C_tm1 = (t - 1) + exp_time_covar_sum[idx]
+                
+                # Calculate PMF for current t
+                pmf = (
+                    (alpha[idx] / (alpha[idx] + C_tm1)) ** r[idx] - 
+                    (alpha[idx] / (alpha[idx] + C_t)) ** r[idx]
+                )
+                
+                # If PMF is negative or NaN, we've gone too far
+                if pmf <= 0 or np.isnan(pmf):
+                    break
+                    
+                # Accept this value with probability proportional to PMF
+                if rng.random() < pmf:
+                    output[idx] = t
+                    break
+                    
+                t += 1
+                
+                # Safety check to prevent infinite loops
+                if t > 1000:  # Arbitrary large number
+                    output[idx] = t
+                    break
+        
         return output
 
 
 g2g = GrassiaIIGeometricRV()
 
-# TODO: Add time-varying covariates. May simply replace the t-value , but is a continuous parameter
-class GrassiaIIGeometric(Discrete):
+# TODO: C expressions are not correct. Both value and covariate broadcasting must be handled.
     r"""Grassia(II)-Geometric distribution.
 
     This distribution is a flexible alternative to the Geometric distribution for the number of trials until a
@@ -494,7 +505,9 @@ class GrassiaIIGeometric(Discrete):
         Shape parameter (r > 0).
     alpha : tensor_like of float
         Scale parameter (alpha > 0).
-
+    time_covariates_sum : tensor_like of float, optional
+        Optional dot product of time-varying covariates and their coefficients, summed over time.
+        
     References
     ----------
     .. [1] Fader, Peter & G. S. Hardie, Bruce (2020).
@@ -505,22 +518,56 @@ class GrassiaIIGeometric(Discrete):
     rv_op = g2g
 
     @classmethod
-    def dist(cls, r, alpha, *args, **kwargs):
+    def dist(cls, r, alpha, time_covariates_sum=None, *args, **kwargs):
         r = pt.as_tensor_variable(r)
         alpha = pt.as_tensor_variable(alpha)
-        return super().dist([r, alpha], *args, **kwargs)
-
-    def logp(value, r, alpha):
-        logp = -r * (pt.log(alpha + value - 1) + pt.log(alpha + value))
+        if time_covariates_sum is None:
+            time_covariates_sum = pt.constant(0.0)
+        time_covariates_sum = pt.as_tensor_variable(time_covariates_sum)
+        return super().dist([r, alpha, time_covariates_sum], *args, **kwargs)
 
+    def logp(value, r, alpha, time_covariates_sum=None):
+        """
+        Log probability function for GrassiaIIGeometric distribution.
+        
+        The PMF is:
+        P(T=t|r,α,β;Z(t)) = (α/(α+C(t-1)))^r - (α/(α+C(t)))^r
+        
+        where C(t) = t + exp(time_covariates_sum)
+        """
+        if time_covariates_sum is None:
+            time_covariates_sum = pt.constant(0.0)
+            
+        # Calculate C(t) and C(t-1)
+        C_t = value + pt.exp(time_covariates_sum)
+        C_tm1 = (value - 1) + pt.exp(time_covariates_sum)
+        
+        # Calculate the PMF on log scale
+        logp = pt.log(
+            pt.pow(alpha / (alpha + C_tm1), r) - 
+            pt.pow(alpha / (alpha + C_t), r)
+        )
+        
+        # Handle invalid values
+        logp = pt.switch(
+            pt.or_(
+                value < 1,  # Value must be >= 1
+                pt.isnan(logp),  # Handle NaN cases
+            ),
+            -np.inf,
+            logp
+        )
+        
         return check_parameters(
             logp,
             r > 0,
             alpha > 0,
-            msg="s > 0, alpha > 0",
+            msg="r > 0, alpha > 0",
         )
 
-    def logcdf(value, r, alpha):
+    def logcdf(value, r, alpha, time_covariates_sum=None):
+        if time_covariates_sum is not None:
+            value = time_covariates_sum
         logcdf = r * (pt.log(value) - pt.log(alpha + value))
 
         return check_parameters(
@@ -530,15 +577,27 @@ def logcdf(value, r, alpha):
             msg="r > 0, alpha > 0",
         )
 
-    def support_point(rv, size, r, alpha):
+    def support_point(rv, size, r, alpha, time_covariates_sum=None):
         """Calculate a reasonable starting point for sampling.
 
         For the GrassiaIIGeometric distribution, we use a point estimate based on
         the expected value of the mixing distribution. Since the mixing distribution
         is Gamma(r, 1/alpha), its mean is r/alpha. We then transform this through
         the geometric link function and round to ensure an integer value.
+
+        When time_covariates_sum is provided, it affects the expected value through
+        the exponential link function: exp(time_covariates_sum).
         """
-        mean = pt.ceil(pt.exp(alpha/r))
+        # Base mean without covariates
+        mean = pt.exp(alpha/r)
+
+        # Apply time-varying covariates if provided
+        if time_covariates_sum is None:
+            time_covariates_sum = pt.constant(0.0)
+        mean = mean * pt.exp(time_covariates_sum)
+
+        # Round up to nearest integer
+        mean = pt.ceil(mean)
 
         if not rv_size_is_none(size):
             mean = pt.full(size, mean)
diff --git a/tests/distributions/test_discrete.py b/tests/distributions/test_discrete.py
@@ -214,23 +214,24 @@ def test_logp(self):
 class TestGrassiaIIGeometric:
     class TestRandomVariable(BaseTestDistributionRandom):
         pymc_dist = GrassiaIIGeometric
-        pymc_dist_params = {"r": .5, "alpha": 2.0}
-        expected_rv_op_params = {"r": .5, "alpha": 2.0}
+        pymc_dist_params = {"r": .5, "alpha": 2.0, "time_covariates_sum": 1.0}
+        expected_rv_op_params = {"r": .5, "alpha": 2.0, "time_covariates_sum": 1.0}
         tests_to_run = [
             "check_pymc_params_match_rv_op",
             "check_rv_size",
         ]
 
         def test_random_basic_properties(self):
-            # Test standard parameter values
+            # Test standard parameter values with time covariates
             discrete_random_tester(
                 dist=self.pymc_dist,
                 paramdomains={
                     "r": Domain([0.5, 1.0, 2.0], edges=(None, None)),  # Standard values
                     "alpha": Domain([0.5, 1.0, 2.0], edges=(None, None)),  # Standard values
+                    "time_covariates_sum": Domain([-1.0, 1.0, 2.0], edges=(None, None)),  # Time covariates
                 },
-                ref_rand=lambda r, alpha, size: np.random.geometric(
-                    1 - np.exp(-np.random.gamma(r, 1/alpha, size=size)), size=size
+                ref_rand=lambda r, alpha, time_covariates_sum, size: np.random.geometric(
+                    1 - np.exp(-np.random.gamma(r, 1/alpha, size=size) * np.exp(time_covariates_sum)), size=size
                 ),
             )
 
@@ -240,20 +241,21 @@ def test_random_basic_properties(self):
                 paramdomains={
                     "r": Domain([0.01, 0.1], edges=(None, None)),  # Small r values
                     "alpha": Domain([10.0, 100.0], edges=(None, None)),  # Large alpha values
+                    "time_covariates_sum": Domain([0.0, 1.0], edges=(None, None)),  # Time covariates
                 },
-                ref_rand=lambda r, alpha, size: np.random.geometric(
-                    np.clip(np.random.gamma(r, 1/alpha, size=size), 1e-5, 1.0), size=size
+                ref_rand=lambda r, alpha, time_covariates_sum, size: np.random.geometric(
+                    np.clip(np.random.gamma(r, 1/alpha, size=size) * np.exp(time_covariates_sum), 1e-5, 1.0), size=size
                 ),
             )
 
-        @pytest.mark.parametrize("r,alpha", [
-            (0.5, 1.0),
-            (1.0, 2.0),
-            (2.0, 0.5),
-            (5.0, 1.0),
+        @pytest.mark.parametrize("r,alpha,time_covariates_sum", [
+            (0.5, 1.0, 0.0),
+            (1.0, 2.0, 1.0),
+            (2.0, 0.5, -1.0),
+            (5.0, 1.0, None),
         ])
-        def test_random_moments(self, r, alpha):
-            dist = self.pymc_dist.dist(r=r, alpha=alpha, size=10_000)
+        def test_random_moments(self, r, alpha, time_covariates_sum):
+            dist = self.pymc_dist.dist(r=r, alpha=alpha, time_covariates_sum=time_covariates_sum, size=10_000)
             draws = dist.eval()
 
             # Check that all values are positive integers
@@ -269,65 +271,102 @@ def test_random_moments(self, r, alpha):
     def test_logp_basic(self):
         r = pt.scalar("r")
         alpha = pt.scalar("alpha")
+        time_covariates_sum = pt.scalar("time_covariates_sum")
         value = pt.vector("value", dtype="int64")
 
-        logp = pm.logp(GrassiaIIGeometric.dist(r, alpha), value)
-        logp_fn = pytensor.function([value, r, alpha], logp)
+        logp = pm.logp(GrassiaIIGeometric.dist(r, alpha, time_covariates_sum), value)
+        logp_fn = pytensor.function([value, r, alpha, time_covariates_sum], logp)
 
         # Test basic properties of logp
         test_value = np.array([1, 2, 3, 4, 5])
         test_r = 1.0
         test_alpha = 1.0
+        test_time_covariates_sum = 1.0
 
-        logp_vals = logp_fn(test_value, test_r, test_alpha)
+        logp_vals = logp_fn(test_value, test_r, test_alpha, test_time_covariates_sum)
         assert not np.any(np.isnan(logp_vals))
         assert np.all(np.isfinite(logp_vals))
 
         # Test invalid values
-        assert logp_fn(np.array([0]), test_r, test_alpha) == np.inf  # Value must be > 0
+        assert logp_fn(np.array([0]), test_r, test_alpha, test_time_covariates_sum) == np.inf  # Value must be > 0
 
         with pytest.raises(TypeError):
-            logp_fn(np.array([1.5]), test_r, test_alpha) == -np.inf  # Value must be integer
+            logp_fn(np.array([1.5]), test_r, test_alpha, test_time_covariates_sum)  # Value must be integer
 
         # Test parameter restrictions
         with pytest.raises(ParameterValueError):
-            logp_fn(np.array([1]), -1.0, test_alpha)  # r must be > 0
+            logp_fn(np.array([1]), -1.0, test_alpha, test_time_covariates_sum)  # r must be > 0
 
         with pytest.raises(ParameterValueError):
-            logp_fn(np.array([1]), test_r, -1.0)  # alpha must be > 0
+            logp_fn(np.array([1]), test_r, -1.0, test_time_covariates_sum)  # alpha must be > 0
 
     def test_sampling_consistency(self):
         """Test that sampling from the distribution produces reasonable results"""
         r = 2.0
         alpha = 1.0
+        time_covariates_sum = None
+        
+        # First test direct sampling from the distribution
+        dist = GrassiaIIGeometric.dist(r=r, alpha=alpha, time_covariates_sum=time_covariates_sum)
+        direct_samples = dist.eval()
+        
+        # Convert to numpy array if it's not already
+        if not isinstance(direct_samples, np.ndarray):
+            direct_samples = np.array([direct_samples])
+        
+        # Ensure we have a 1D array
+        if direct_samples.ndim == 0:
+            direct_samples = direct_samples.reshape(1)
+            
+        assert direct_samples.size > 0, "Direct sampling produced no samples"
+        assert np.all(direct_samples > 0), "Direct sampling produced non-positive values"
+        assert np.all(direct_samples.astype(int) == direct_samples), "Direct sampling produced non-integer values"
+        
+        # Then test MCMC sampling
         with pm.Model():
-            x = GrassiaIIGeometric("x", r=r, alpha=alpha)
+            x = GrassiaIIGeometric("x", r=r, alpha=alpha, time_covariates_sum=time_covariates_sum)
             trace = pm.sample(chains=1, draws=1000, random_seed=42).posterior
 
-        samples = trace["x"].values.flatten()
+        # Extract samples and ensure they're in the correct shape
+        samples = trace["x"].values
+        assert samples is not None, "No samples were returned from MCMC"
+        assert samples.size > 0, "MCMC sampling produced empty array"
+        
+        if samples.ndim > 1:
+            samples = samples.reshape(-1)  # Flatten if needed
 
         # Check basic properties of samples
-        assert np.all(samples > 0)  # All values should be positive
-        assert np.all(samples.astype(int) == samples)  # All values should be integers
+        assert samples.size > 0, "No samples after reshaping"
+        assert np.all(samples > 0), "Found non-positive values in samples"
+        assert np.all(samples.astype(int) == samples), "Found non-integer values in samples"
 
         # Check mean and variance are reasonable
-        # (exact values depend on the parameterization)
-        assert 0 < np.mean(samples) < np.inf
-        assert 0 < np.var(samples) < np.inf
+        mean = np.mean(samples)
+        var = np.var(samples)
+        assert 0 < mean < np.inf, f"Mean {mean} is not in valid range"
+        assert 0 < var < np.inf, f"Variance {var} is not in valid range"
+
+        # Additional checks for distribution properties
+        # The mean should be greater than 1 for these parameters
+        assert mean > 1, f"Mean {mean} is not greater than 1"
+        # The variance should be positive and finite
+        assert var > 0, f"Variance {var} is not positive"
 
     @pytest.mark.parametrize(
-        "r, alpha, size, expected_shape",
+        "r, alpha, time_covariates_sum, size, expected_shape",
         [
-            (1.0, 1.0, None, ()),  # Scalar output
-            ([1.0, 2.0], 1.0, None, (2,)),  # Vector output from r
-            (1.0, [1.0, 2.0], None, (2,)),  # Vector output from alpha
-            (1.0, 1.0, (3, 2), (3, 2)),  # Explicit size
+            (1.0, 1.0, 1.0, None, ()),  # Scalar output with covariates
+            ([1.0, 2.0], 1.0, 1.0, None, (2,)),  # Vector output from r
+            (1.0, [1.0, 2.0], 1.0, None, (2,)),  # Vector output from alpha
+            (1.0, 1.0, None, None, ()),  # No time covariates
+            (1.0, 1.0, [1.0, 2.0], None, (2,)),  # Vector output from time covariates
+            (1.0, 1.0, 1.0, (3, 2), (3, 2)),  # Explicit size
         ],
     )
-    def test_support_point(self, r, alpha, size, expected_shape):
+    def test_support_point(self, r, alpha, time_covariates_sum, size, expected_shape):
         """Test that support_point returns reasonable values with correct shapes"""
         with pm.Model() as model:
-            GrassiaIIGeometric("x", r=r, alpha=alpha, size=size)
+            GrassiaIIGeometric("x", r=r, alpha=alpha, time_covariates_sum=time_covariates_sum, size=size)
 
         init_point = model.initial_point()["x"]