@@ -410,15 +410,14 @@ class GrassiaIIGeometricRV(RandomVariable):
410
410
_print_name = ("GrassiaIIGeometric" , "\\ operatorname{GrassiaIIGeometric}" )
411
411
412
412
def __call__ (self , r , alpha , time_covariate_vector = None , size = None , ** kwargs ):
413
- return super ().__call__ (r , alpha , time_covariate_vector , size = size , ** kwargs )
413
+ return super ().__call__ (r , alpha , time_covariate_vector , size , ** kwargs )
414
414
415
415
@classmethod
416
416
def rng_fn (cls , rng , r , alpha , time_covariate_vector , size ):
417
- # Handle None case for time_covariate_vector
418
417
if time_covariate_vector is None :
419
418
time_covariate_vector = 0.0
420
419
421
- # Convert inputs to numpy arrays - these should be concrete values
420
+ # Cast inputs as numpy arrays
422
421
r = np .asarray (r , dtype = np .float64 )
423
422
alpha = np .asarray (alpha , dtype = np .float64 )
424
423
time_covariate_vector = np .asarray (time_covariate_vector , dtype = np .float64 )
@@ -427,33 +426,28 @@ def rng_fn(cls, rng, r, alpha, time_covariate_vector, size):
427
426
if size is None :
428
427
size = np .broadcast_shapes (r .shape , alpha .shape , time_covariate_vector .shape )
429
428
430
- # Broadcast parameters to the output size
429
+ # Broadcast parameters to output size
431
430
r = np .broadcast_to (r , size )
432
431
alpha = np .broadcast_to (alpha , size )
433
432
time_covariate_vector = np .broadcast_to (time_covariate_vector , size )
434
433
435
434
# Calculate exp(time_covariate_vector) for all samples
436
- exp_time_covar_sum = np .exp (time_covariate_vector )
435
+ exp_time_covar = np .exp (time_covariate_vector )
437
436
438
437
# Generate gamma samples and apply time covariates
439
438
lam = rng .gamma (shape = r , scale = 1 / alpha , size = size )
440
- lam_covar = lam * exp_time_covar_sum
441
439
442
- # Calculate probability parameter for geometric distribution
443
- # Use the mathematically correct approach: 1 - exp(-lambda)
444
- # This matches the first test case and is theoretically sound
440
+ # TODO: Add C(t) to the calculation of lam_covar
441
+ lam_covar = lam * exp_time_covar
445
442
p = 1 - np .exp (- lam_covar )
446
443
447
444
# Ensure p is in valid range for geometric distribution
448
- # Use a more conservative lower bound to prevent extremely large values
449
445
min_p = max (1e-6 , np .finfo (float ).tiny ) # Minimum probability to prevent infinite values
450
446
p = np .clip (p , min_p , 1.0 )
451
447
452
- # Generate geometric samples
453
448
samples = rng .geometric (p )
454
449
455
450
# Clip samples to reasonable bounds to prevent infinite values
456
- # Geometric distribution with small p can produce very large values
457
451
max_sample = 10000 # Reasonable upper bound for discrete time-to-event data
458
452
samples = np .clip (samples , 1 , max_sample )
459
453
@@ -507,7 +501,7 @@ class GrassiaIIGeometric(Discrete):
507
501
alpha : tensor_like of float
508
502
Scale parameter (alpha > 0).
509
503
time_covariate_vector : tensor_like of float, optional
510
- Optional vector of dot product of time-varying covariates and their coefficients by time period .
504
+ Optional vector containing dot products of time-varying covariates and coefficients.
511
505
512
506
References
513
507
----------
@@ -535,19 +529,15 @@ def logp(value, r, alpha, time_covariate_vector=None):
535
529
def C_t (t ):
536
530
# Aggregate time_covariate_vector over active time periods
537
531
if t == 0 :
538
- return pt .constant (1 .0 )
532
+ return pt .constant (0 .0 )
539
533
# Handle case where time_covariate_vector is a scalar
540
534
if time_covariate_vector .ndim == 0 :
541
535
return t * pt .exp (time_covariate_vector )
542
536
else :
543
- # For vector time_covariate_vector, use a simpler approach
544
- # that works with PyTensor's symbolic system
545
- # We'll use the mean of the time covariates multiplied by t
546
- # This is an approximation but avoids symbolic indexing issues
537
+ # For time covariates, this approximation avoids symbolic indexing issues
547
538
mean_covariate = pt .mean (time_covariate_vector )
548
539
return t * pt .exp (mean_covariate )
549
540
550
- # Calculate the PMF on log scale
551
541
logp = pt .log (
552
542
pt .pow (alpha / (alpha + C_t (value - 1 )), r ) - pt .pow (alpha / (alpha + C_t (value )), r )
553
543
)
@@ -574,21 +564,13 @@ def logcdf(value, r, alpha, time_covariate_vector=None):
574
564
time_covariate_vector = pt .constant (0.0 )
575
565
time_covariate_vector = pt .as_tensor_variable (time_covariate_vector )
576
566
577
- # Calculate CDF on log scale
578
- # For the GrassiaIIGeometric, the CDF is 1 - survival function
579
- # S(t) = (alpha/(alpha + C(t)))^r
580
- # CDF(t) = 1 - S(t)
581
-
582
567
def C_t (t ):
583
568
if t == 0 :
584
569
return pt .constant (1.0 )
585
570
if time_covariate_vector .ndim == 0 :
586
571
return t * pt .exp (time_covariate_vector )
587
572
else :
588
- # For vector time_covariate_vector, use a simpler approach
589
- # that works with PyTensor's symbolic system
590
- # We'll use the mean of the time covariates multiplied by t
591
- # This is an approximation but avoids symbolic indexing issues
573
+ # For time covariates, this approximation avoids symbolic indexing issues
592
574
mean_covariate = pt .mean (time_covariate_vector )
593
575
return t * pt .exp (mean_covariate )
594
576
@@ -613,14 +595,9 @@ def support_point(rv, size, r, alpha, time_covariate_vector=None):
613
595
When time_covariate_vector is provided, it affects the expected value through
614
596
the exponential link function: exp(time_covariate_vector).
615
597
"""
616
- # Base mean from the gamma mixing distribution: E[lambda] = r/alpha
617
- # For a geometric distribution with parameter p, E[X] = 1/p
618
- # Since p = 1 - exp(-lambda), we approximate E[X] ≈ 1/(1 - exp(-E[lambda]))
619
598
base_lambda = r / alpha
620
599
621
- # Approximate the expected value of the geometric distribution
622
- # For small lambda, 1 - exp(-lambda) ≈ lambda, so E[X] ≈ 1/lambda
623
- # For larger lambda, we use the full expression
600
+ # Approximate expected value of geometric distribution
624
601
mean = pt .switch (
625
602
base_lambda < 0.1 ,
626
603
1.0 / base_lambda , # Approximation for small lambda
@@ -631,7 +608,7 @@ def support_point(rv, size, r, alpha, time_covariate_vector=None):
631
608
if time_covariate_vector is not None :
632
609
mean = mean * pt .exp (time_covariate_vector )
633
610
634
- # Round up to nearest integer and ensure it's at least 1
611
+ # Round up to nearest integer and ensure >= 1
635
612
mean = pt .maximum (pt .ceil (mean ), 1.0 )
636
613
637
614
# Handle size parameter
0 commit comments