WIP symbolic indexing

ColtAllen · ColtAllen · commit b957333c4bf0 · 2025-06-20T13:57:32.000-06:00
diff --git a/pymc_extras/distributions/discrete.py b/pymc_extras/distributions/discrete.py
@@ -401,70 +401,63 @@ def dist(cls, mu1, mu2, **kwargs):
             **kwargs,
         )
 
-# TODO: C expressions are not correct. Both value and covariate broadcasting must be handled.
+
 class GrassiaIIGeometricRV(RandomVariable):
     name = "g2g"
     signature = "(),(),()->()"
 
     dtype = "int64"
     _print_name = ("GrassiaIIGeometric", "\\operatorname{GrassiaIIGeometric}")
 
-    def __call__(self, r, alpha, time_covariates_sum=None, size=None, **kwargs):
-        return super().__call__(r, alpha, time_covariates_sum, size=size, **kwargs)
+    def __call__(self, r, alpha, time_covariate_vector=None, size=None, **kwargs):
+        return super().__call__(r, alpha, time_covariate_vector, size=size, **kwargs)
 
     @classmethod
-    def rng_fn(cls, rng, r, alpha, time_covariates_sum, size):
-        if time_covariates_sum is None:
-            time_covariates_sum = np.array(0)
+    def rng_fn(cls, rng, r, alpha, time_covariate_vector, size):
+        # Handle None case for time_covariate_vector
+        if time_covariate_vector is None:
+            time_covariate_vector = 0.0
+
+        # Convert inputs to numpy arrays - these should be concrete values
+        r = np.asarray(r, dtype=np.float64)
+        alpha = np.asarray(alpha, dtype=np.float64)
+        time_covariate_vector = np.asarray(time_covariate_vector, dtype=np.float64)
+
+        # Determine output size
         if size is None:
-            size = np.broadcast_shapes(r.shape, alpha.shape, time_covariates_sum.shape)
+            size = np.broadcast_shapes(r.shape, alpha.shape, time_covariate_vector.shape)
 
+        # Broadcast parameters to the output size
         r = np.broadcast_to(r, size)
         alpha = np.broadcast_to(alpha, size)
-        time_covariates_sum = np.broadcast_to(time_covariates_sum, size)
-
-        # Calculate exp(time_covariates_sum) for all samples
-        exp_time_covar_sum = np.exp(time_covariates_sum)
-
-        # Initialize output array
-        output = np.zeros(size, dtype=np.int64)
-
-        # For each sample, generate a value from the distribution
-        for idx in np.ndindex(*size):
-            # Calculate survival probabilities for each possible value
-            t = 1
-            while True:
-                C_t = t + exp_time_covar_sum[idx]
-                C_tm1 = (t - 1) + exp_time_covar_sum[idx]
-
-                # Calculate PMF for current t
-                pmf = (
-                    (alpha[idx] / (alpha[idx] + C_tm1)) ** r[idx] -
-                    (alpha[idx] / (alpha[idx] + C_t)) ** r[idx]
-                )
+        time_covariate_vector = np.broadcast_to(time_covariate_vector, size)
 
-                # If PMF is negative or NaN, we've gone too far
-                if pmf <= 0 or np.isnan(pmf):
-                    break
+        # Calculate exp(time_covariate_vector) for all samples
+        exp_time_covar_sum = np.exp(time_covariate_vector)
 
-                # Accept this value with probability proportional to PMF
-                if rng.random() < pmf:
-                    output[idx] = t
-                    break
+        # Use a simpler approach: generate from a geometric distribution with transformed parameters
+        # This is an approximation but should be much faster and more reliable
+        lam = rng.gamma(shape=r, scale=1 / alpha, size=size)
+        lam_covar = lam * exp_time_covar_sum
 
-                t += 1
+        # Handle numerical stability for very small lambda values
+        p = np.where(
+            lam_covar < 0.0001,
+            lam_covar,  # For small values, set this to p
+            1 - np.exp(-lam_covar),
+        )
 
-                # Safety check to prevent infinite loops
-                if t > 1000:  # Arbitrary large number
-                    output[idx] = t
-                    break
+        # Ensure p is in valid range for geometric distribution
+        p = np.clip(p, np.finfo(float).tiny, 1.0)
 
-        return output
+        # Generate geometric samples
+        return rng.geometric(p)
 
 
 g2g = GrassiaIIGeometricRV()
 
-# TODO: C expressions are not correct. Both value and covariate broadcasting must be handled.
+
+class GrassiaIIGeometric(Discrete):
     r"""Grassia(II)-Geometric distribution.
 
     This distribution is a flexible alternative to the Geometric distribution for the number of trials until a
@@ -507,8 +500,8 @@ def rng_fn(cls, rng, r, alpha, time_covariates_sum, size):
         Shape parameter (r > 0).
     alpha : tensor_like of float
         Scale parameter (alpha > 0).
-    time_covariates_sum : tensor_like of float, optional
-        Optional dot product of time-varying covariates and their coefficients, summed over time.
+    time_covariate_vector : tensor_like of float, optional
+        Optional vector of dot product of time-varying covariates and their coefficients by time period.
 
     References
     ----------
@@ -520,34 +513,36 @@ def rng_fn(cls, rng, r, alpha, time_covariates_sum, size):
     rv_op = g2g
 
     @classmethod
-    def dist(cls, r, alpha, time_covariates_sum=None, *args, **kwargs):
+    def dist(cls, r, alpha, time_covariate_vector=None, *args, **kwargs):
         r = pt.as_tensor_variable(r)
         alpha = pt.as_tensor_variable(alpha)
-        if time_covariates_sum is None:
-            time_covariates_sum = pt.constant(0.0)
-        time_covariates_sum = pt.as_tensor_variable(time_covariates_sum)
-        return super().dist([r, alpha, time_covariates_sum], *args, **kwargs)
-
-    def logp(value, r, alpha, time_covariates_sum=None):
-        """
-        Log probability function for GrassiaIIGeometric distribution.
-
-        The PMF is:
-        P(T=t|r,α,β;Z(t)) = (α/(α+C(t-1)))^r - (α/(α+C(t)))^r
-
-        where C(t) = t + exp(time_covariates_sum)
-        """
-        if time_covariates_sum is None:
-            time_covariates_sum = pt.constant(0.0)
-
-        # Calculate C(t) and C(t-1)
-        C_t = value + pt.exp(time_covariates_sum)
-        C_tm1 = (value - 1) + pt.exp(time_covariates_sum)
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
+        return super().dist([r, alpha, time_covariate_vector], *args, **kwargs)
+
+    def logp(value, r, alpha, time_covariate_vector=None):
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
+
+        def C_t(t):
+            # Aggregate time_covariate_vector over active time periods
+            if t == 0:
+                return pt.constant(1.0)
+            # Handle case where time_covariate_vector is a scalar
+            if time_covariate_vector.ndim == 0:
+                return t * pt.exp(time_covariate_vector)
+            else:
+                # For vector time_covariate_vector, we need to handle symbolic indexing
+                # Since we can't slice with symbolic indices, we'll use a different approach
+                # For now, we'll use the first element multiplied by t
+                # This is a simplification but should work for basic cases
+                return t * pt.exp(time_covariate_vector[:t])
 
         # Calculate the PMF on log scale
         logp = pt.log(
-            pt.pow(alpha / (alpha + C_tm1), r) -
-            pt.pow(alpha / (alpha + C_t), r)
+            pt.pow(alpha / (alpha + C_t(value - 1)), r) - pt.pow(alpha / (alpha + C_t(value)), r)
         )
 
         # Handle invalid values
@@ -557,7 +552,7 @@ def logp(value, r, alpha, time_covariates_sum=None):
                 pt.isnan(logp),  # Handle NaN cases
             ),
             -np.inf,
-            logp
+            logp,
         )
 
         return check_parameters(
@@ -567,36 +562,52 @@ def logp(value, r, alpha, time_covariates_sum=None):
             msg="r > 0, alpha > 0",
         )
 
-    def logcdf(value, r, alpha, time_covariates_sum=None):
-        if time_covariates_sum is not None:
-            value = time_covariates_sum
-        logcdf = r * (pt.log(value) - pt.log(alpha + value))
+    def logcdf(value, r, alpha, time_covariate_vector=None):
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        time_covariate_vector = pt.as_tensor_variable(time_covariate_vector)
+
+        # Calculate CDF on log scale
+        # For the GrassiaIIGeometric, the CDF is 1 - survival function
+        # S(t) = (alpha/(alpha + C(t)))^r
+        # CDF(t) = 1 - S(t)
+
+        def C_t(t):
+            if t == 0:
+                return pt.constant(1.0)
+            if time_covariate_vector.ndim == 0:
+                return t * pt.exp(time_covariate_vector)
+            else:
+                return t * pt.exp(time_covariate_vector[:t])
+
+        survival = pt.pow(alpha / (alpha + C_t(value)), r)
+        logcdf = pt.log(1 - survival)
 
         return check_parameters(
             logcdf,
             r > 0,
-            alpha > 0, # alpha must be greater than 0.6181 for convergence
+            alpha > 0,
             msg="r > 0, alpha > 0",
         )
 
-    def support_point(rv, size, r, alpha, time_covariates_sum=None):
+    def support_point(rv, size, r, alpha, time_covariate_vector=None):
         """Calculate a reasonable starting point for sampling.
 
         For the GrassiaIIGeometric distribution, we use a point estimate based on
         the expected value of the mixing distribution. Since the mixing distribution
         is Gamma(r, 1/alpha), its mean is r/alpha. We then transform this through
         the geometric link function and round to ensure an integer value.
 
-        When time_covariates_sum is provided, it affects the expected value through
-        the exponential link function: exp(time_covariates_sum).
+        When time_covariate_vector is provided, it affects the expected value through
+        the exponential link function: exp(time_covariate_vector).
         """
         # Base mean without covariates
-        mean = pt.exp(alpha/r)
+        mean = pt.exp(alpha / r)
 
         # Apply time-varying covariates if provided
-        if time_covariates_sum is None:
-            time_covariates_sum = pt.constant(0.0)
-        mean = mean * pt.exp(time_covariates_sum)
+        if time_covariate_vector is None:
+            time_covariate_vector = pt.constant(0.0)
+        mean = mean * pt.exp(time_covariate_vector)
 
         # Round up to nearest integer
         mean = pt.ceil(mean)
diff --git a/test_simple.py b/test_simple.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+import pymc as pm
+import pytensor.tensor as pt
+
+from pymc_extras.distributions import GrassiaIIGeometric
+
+
+def test_basic_functionality():
+    """Test basic functionality of GrassiaIIGeometric distribution"""
+    print("Testing basic GrassiaIIGeometric functionality...")
+
+    # Test 1: Create distribution with None time_covariate_vector
+    try:
+        dist = GrassiaIIGeometric.dist(r=2.0, alpha=1.0, time_covariate_vector=None)
+        print("✓ Distribution created successfully with None time_covariate_vector")
+
+        # Test sampling
+        samples = dist.eval()
+        print(f"✓ Direct sampling successful: {samples}")
+
+    except Exception as e:
+        print(f"✗ Failed to create distribution with None time_covariate_vector: {e}")
+        return False
+
+    # Test 2: Create distribution with scalar time_covariate_vector
+    try:
+        dist = GrassiaIIGeometric.dist(r=2.0, alpha=1.0, time_covariate_vector=0.5)
+        print("✓ Distribution created successfully with scalar time_covariate_vector")
+
+        # Test sampling
+        samples = dist.eval()
+        print(f"✓ Direct sampling successful: {samples}")
+
+    except Exception as e:
+        print(f"✗ Failed to create distribution with scalar time_covariate_vector: {e}")
+        return False
+
+    # Test 3: Test logp function
+    try:
+        r = pt.scalar("r")
+        alpha = pt.scalar("alpha")
+        time_covariate_vector = pt.scalar("time_covariate_vector")
+        value = pt.scalar("value", dtype="int64")
+
+        logp = pm.logp(GrassiaIIGeometric.dist(r, alpha, time_covariate_vector), value)
+        logp_fn = pm.compile_fn([value, r, alpha, time_covariate_vector], logp)
+
+        result = logp_fn(2, 1.0, 1.0, 0.0)
+        print(f"✓ Logp function works: {result}")
+
+    except Exception as e:
+        print(f"✗ Failed to test logp function: {e}")
+        return False
+
+    print("✓ All basic functionality tests passed!")
+    return True
+
+
+if __name__ == "__main__":
+    test_basic_functionality()
diff --git a/tests/distributions/test_discrete.py b/tests/distributions/test_discrete.py