clean up comments and final TODO

ColtAllen · ColtAllen · commit a715ec78ef55 · 2025-07-11T15:12:06.000+02:00
diff --git a/pymc_extras/distributions/discrete.py b/pymc_extras/distributions/discrete.py
@@ -409,7 +409,7 @@ def dist(cls, mu1, mu2, **kwargs):
 class GrassiaIIGeometricRV(RandomVariable):
     name = "g2g"
     signature = "(),(),()->()"
-    ndims_params = [0, 0, 0]  # r, alpha, time_covariate_vector are all scalars
+    ndims_params = [0, 0, 0]  # deprecated in PyTensor 2.31.7, but still required for RandomVariable
 
     dtype = "int64"
     _print_name = ("GrassiaIIGeometric", "\\operatorname{GrassiaIIGeometric}")
@@ -430,14 +430,13 @@ def rng_fn(cls, rng, r, alpha, time_covariate_vector, size):
         alpha = np.broadcast_to(alpha, size)
         time_covariate_vector = np.broadcast_to(time_covariate_vector, size)
 
-        # Calculate exp(time_covariate_vector) for all samples
-        exp_time_covar = np.exp(time_covariate_vector)
-
-        # Generate gamma samples and apply time covariates
         lam = rng.gamma(shape=r, scale=1 / alpha, size=size)
 
-        # TODO: Add C(t) to the calculation of lam_covar
+        # Calculate exp(time_covariate_vector) for all samples
+        exp_time_covar = np.exp(time_covariate_vector)
         lam_covar = lam * exp_time_covar
+
+        # TODO: This is not aggregated over time
         p = 1 - np.exp(-lam_covar)
 
         # Ensure p is in valid range for geometric distribution
@@ -526,16 +525,18 @@ def logp(value, r, alpha, time_covariate_vector=None):
         time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
 
         def C_t(t):
-            # Aggregate time_covariate_vector over active time periods
             if t == 0:
                 return pt.constant(0.0)
-            # Handle case where time_covariate_vector is a scalar
             if time_covariate_vector.ndim == 0:
-                return t * pt.exp(time_covariate_vector)
+                return t
             else:
-                # For time covariates, this approximation avoids symbolic indexing issues
-                mean_covariate = pt.mean(time_covariate_vector)
-                return t * pt.exp(mean_covariate)
+                # Ensure t is a valid index
+                t_idx = pt.maximum(0, t - 1)  # Convert to 0-based indexing
+                # If t_idx exceeds length of time_covariate_vector, use last value
+                max_idx = pt.shape(time_covariate_vector)[0] - 1
+                safe_idx = pt.minimum(t_idx, max_idx)
+                covariate_value = time_covariate_vector[safe_idx]
+                return t * pt.exp(covariate_value)
 
         logp = pt.log(
             pt.pow(alpha / (alpha + C_t(value - 1)), r) - pt.pow(alpha / (alpha + C_t(value)), r)
@@ -567,11 +568,15 @@ def C_t(t):
             if t == 0:
                 return pt.constant(0.0)
             if time_covariate_vector.ndim == 0:
-                return t * pt.exp(time_covariate_vector)
+                return t
             else:
-                # For time covariates, this approximation avoids symbolic indexing issues
-                mean_covariate = pt.mean(time_covariate_vector)
-                return t * pt.exp(mean_covariate)
+                # Ensure t is a valid index
+                t_idx = pt.maximum(0, t - 1)  # Convert to 0-based indexing
+                # If t_idx exceeds length of time_covariate_vector, use last value
+                max_idx = pt.shape(time_covariate_vector)[0] - 1
+                safe_idx = pt.minimum(t_idx, max_idx)
+                covariate_value = time_covariate_vector[safe_idx]
+                return t * pt.exp(covariate_value)
 
         survival = pt.pow(alpha / (alpha + C_t(value)), r)
         logcdf = pt.log(1 - survival)
diff --git a/tests/distributions/test_discrete.py b/tests/distributions/test_discrete.py
@@ -272,8 +272,8 @@ def test_random_none_covariates(self):
                     dist = self.pymc_dist.dist(
                         r=r,
                         alpha=alpha,
-                        time_covariate_vector=0.0,
-                        size=1000,  # Changed from None to 0.0
+                        time_covariate_vector=0.0,  # Changed from None to avoid zip issues
+                        size=1000,
                     )
                     draws = dist.eval()
 
@@ -289,7 +289,7 @@ def test_random_none_covariates(self):
                 (0.5, 1.0, 0.0),
                 (1.0, 2.0, 1.0),
                 (2.0, 0.5, -1.0),
-                (5.0, 1.0, 0.0),  # Changed from None to 0.0 to avoid zip issues
+                (5.0, 1.0, 0.0),  # Changed from None to avoid zip issues
             ],
         )
         def test_random_moments(self, r, alpha, time_covariate_vector):
@@ -298,13 +298,8 @@ def test_random_moments(self, r, alpha, time_covariate_vector):
             )
             draws = dist.eval()
 
-            # Check that all values are positive integers
             assert np.all(draws > 0)
             assert np.all(draws.astype(int) == draws)
-
-            # Check that values are reasonably distributed
-            # Note: Exact moments are complex for this distribution
-            # so we just check basic properties
             assert np.mean(draws) > 0
             assert np.var(draws) > 0
 
@@ -337,21 +332,18 @@ def test_sampling_consistency(self):
         """Test that sampling from the distribution produces reasonable results"""
         r = 2.0
         alpha = 1.0
-        time_covariate_vector = 0.0  # Changed from None to 0.0 to avoid issues
+        time_covariate_vector = [0.0, 1.0, 2.0]
 
-        # First test direct sampling from the distribution
         try:
             dist = GrassiaIIGeometric.dist(
                 r=r, alpha=alpha, time_covariate_vector=time_covariate_vector
             )
 
             direct_samples = dist.eval()
 
-            # Convert to numpy array if it's not already
             if not isinstance(direct_samples, np.ndarray):
                 direct_samples = np.array([direct_samples])
 
-            # Ensure we have a 1D array
             if direct_samples.ndim == 0:
                 direct_samples = direct_samples.reshape(1)
 
@@ -371,7 +363,6 @@ def test_sampling_consistency(self):
             traceback.print_exc()
             raise
 
-        # Then test MCMC sampling
         try:
             with pm.Model():
                 x = GrassiaIIGeometric(
@@ -382,7 +373,7 @@ def test_sampling_consistency(self):
                     chains=1, draws=50, tune=0, random_seed=42, progressbar=False
                 ).posterior
 
-            # Extract samples and ensure they're in the correct shape
+            # Extract samples and ensure correct shape
             samples = trace["x"].values
 
             assert (
@@ -415,9 +406,7 @@ def test_sampling_consistency(self):
             ), f"Variance {var} is not in valid range for {time_covariate_vector}"
 
             # Additional checks for distribution properties
-            # The mean should be greater than 1 for these parameters
             assert mean > 1, f"Mean {mean} is not greater than 1 for {time_covariate_vector}"
-            # The variance should be positive and finite
             assert var > 0, f"Variance {var} is not positive for {time_covariate_vector}"
 
         except Exception as e: