adolgert
diff --git a/‎docs/notes/distributions.tex‎
Lines changed: 59 additions & 5 deletions b/‎docs/notes/distributions.tex‎
Lines changed: 59 additions & 5 deletions
diff --git a/‎src/sample/pssa_cr.jl‎
Lines changed: 1 addition & 1 deletion b/‎src/sample/pssa_cr.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sample/rssa.jl‎
Lines changed: 43 additions & 20 deletions b/‎src/sample/rssa.jl‎
Lines changed: 43 additions & 20 deletions
diff --git a/‎src/trace/track.jl‎
Lines changed: 57 additions & 1 deletion b/‎src/trace/track.jl‎
Lines changed: 57 additions & 1 deletion
diff --git a/‎test/Project.toml‎
Lines changed: 1 addition & 0 deletions b/‎test/Project.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/gauntlet/anderson_darling.jl‎
Lines changed: 1 addition & 1 deletion b/‎test/gauntlet/anderson_darling.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/gauntlet/doob_meyer.jl‎
Lines changed: 1 addition & 1 deletion b/‎test/gauntlet/doob_meyer.jl‎
Lines changed: 1 addition & 1 deletion
@@ -135,15 +135,15 @@ \subsection{Probability Evaluation}
 	Julia Function & Equation & Statistics name \\ \hline
 	\texttt{ccdf} & $S(x) = e^{-\int^x\lambda(s)ds}$ & Survival \\
 	\texttt{cdf} & $F(x) = 1 - S(x) = 1-e^{-\int^x\lambda(s)ds}$ & Cumulative distribution function \\
-	\texttt{pdf} & $f(x) = \lambda(x)e^{-\int^x\lambda(s)ds}$ & Probability distribution function \\
-	\texttt{logpdf} & $\ln\:f(x)$ & Log-likelihood \\
+	\texttt{pdf} & $f(x) = \lambda(x)e^{-\int^x\lambda(s)ds}$ & Probability density function \\
+	\texttt{logpdf} & $\ln\:f(x)$ & Log probability density \\
 	\texttt{logcdf} & $\ln\:F(x)$ & \\
 	\texttt{logdiffcdf} & $\ln\left(F(x_2)-F(x_1)\right)$ & \\
 	\texttt{logccdf} & $-\int^x\lambda(s)ds$ & Integrated hazard (negated) \\
 	\texttt{quantile} & $y = F^{-1}(x)$ & Inverse cumulative distribution function \\
 	\texttt{cquantile} & $y = F^{-1}(1-x)$ so $x = S(y)$ & Inverse Survival \\
 	\texttt{invlogcdf} & $x = \ln\:F(y)$ so $e^x = F(y)$ & \\
-	\texttt{invlogccdf} & $x = -\int_0^y\lambda(s)ds$ & Inverse integrated hazard \\ \hline
+	\texttt{invlogccdf} & $x = -\int_0^y\lambda(s)ds$ & Inverse negative integrated hazard \\ \hline
 \end{tabular}
 \caption{This translates between Julia functions and hazard-based notation. We can use this to find
 the shortest path to our calculation in code.\label{julia-translation}}
@@ -153,13 +153,13 @@ \subsection{Probability Evaluation}
 
 \texttt{invlogccdf(d::UnivariateDistribution, x::Real)} can be translated into
 \begin{equation}
-e^x = G(y) = e^{\int_0^y\lambda(s)ds}.
+e^x = G(y) = e^{-\int_0^y\lambda(s)ds}.
 \end{equation}
 That means this function is the inverse of the integrated hazard.
 \begin{equation}
 x = \Lambda(y)
 \end{equation}
-In this package, the integrated hazard is called \texttt{logccdf}.
+In this package, the integrated hazard is the negative of \texttt{logccdf}.
 
 
 \section{Next Reaction for Non-Markov Processes}\label{sec:nextreaction}
@@ -644,4 +644,58 @@ \section{Piecewise Deterministic Markov Processes}
 
 Show that a delta-function can be included in CompetingClocks.
 
+\section{Testing Samplers}
+
+\subsection{Doob-Meyer}
+
+Doob-Meyer is a conceptually simple way to test the times from a sampler.
+For every draw from a sampler, take the time of the draw and use the currently-enabled
+distributions to calculate the cumulant. That is, had you sampled the time
+by drawing a value $U ~ [0, 1]$ and using inversion, what would the $U$ have been?
+
+Once you have the transformed values, a Kolmogorov-Smirnov test will compare the
+draws with a Uniform distribution and give you a p-value.
+
+You can do this for holding times, $P[T|K]$ for clock $K$, or waiting times, $P[T]$.
+
+\subsection{Mark Calibration}
+
+The mark is the clock chosen for a particular sampling time, so it's $P[K|T]$.
+Given a time, this is a multinomial choice. We evaluate the correctness of this
+choice using a Brier score. The simpler Brier score is a yes-no choice, so we use
+a multi-class Brier score.
+
+The multi-class Brier score is an average of successes. For one draw $j$ from $K$
+classes, the Brier score
+\begin{equation}
+	B = \frac{1}{2K}\sum_{k=1}^K (p_k - 1_{k=j})^2
+\end{equation}
+The $1/2$ in the equation is because a worst score is assigning a 1 where it should
+be a 0 and assigning a 0 where it should be a 1, which gives 2.
+A lower Brier score is better.
+
+We have a wonky version of this because over the course of a simulation the number
+of classes changes. One way to handle that is to not normalize by $K$.
+If we label each step of the simulation by $i$, then it's a sum.
+\begin{equation}
+	B = \frac{1}{2N}\sum_{i=1}^N\sum_{k=1}^K (p_{ik} - 1_{ik=ij})^2
+\end{equation}
+I'm having trouble with notation, but the point is that you put a 1 on the right
+when the probability matches the chosen clock.
+
+How do we know when the Brier score is good? There are several approaches, but
+one nice approach is to take the known multinomial probability for clocks
+at time $t$ and sample from it to get a parametric null. We know that the probability
+of choosing clock $k$ at time $t$ is the ratio of hazards, $\lambda_k(t)/\sum_j\lambda_j(t)$.
+Draw from this $B=1999$ times and calculate the Brier score for each draw. Keep a running
+tally of all 1999 Brier scores as you simulate. At the end, calculate a one-sided
+p-value.
+\begin{equation}
+	p_{\mbox{bad}} = \frac{1 + \#\{b: B^{(b)}\ge B_{\mbox{obs}}\}}{B + 1}
+\end{equation}
+That is, how often is a Brier score for a correct set of draws greater than
+the measured Brier score? You want this number to be low.
+For our purposes, we would use $1-p_{\mbox{bad}}$ so that a low value is worse,
+in line with what we would get from Doob-Meyer and other p-values.
+
 \end{document}
@@ -263,7 +263,7 @@ end
 # Required interface: `fire!` has no internal scheduled time to remove.
 # We simply invalidate the cached next event.
 function fire!(s::PSSACR{K,T}, clock::K, when::T) where {K,T}
-    _invalidate!(s)
+    disable!(s, clock, when)
     return s
 end
 
 
@@ -3,20 +3,6 @@
 # Exact Rejection-based Stochastic Simulation Algorithm (RSSA)
 # for continuous-time Markov jump processes (exponential clocks).
 #
-# Algorithmic core:
-# - Maintain per-clock *true* rate a_i and a certified upper bound \bar a_i >= a_i.
-# - Maintain Ā = sum_i \bar a_i and a Fenwick tree over { \bar a_i } for O(log N) sampling.
-# - Draw candidate times from Exp(Ā). Select candidate clock i by Categorical(\bar a_i/Ā).
-# - Accept with probability a_i / \bar a_i; otherwise reject and continue thinning.
-#
-# Exactness: standard thinning of a Poisson process with rate Ā, with acceptance a_i/\bar a_i,
-# yields the target Markov jump process (homogeneous propensities). See Thanh et al. (2014, 2015).
-#
-# Notes:
-# - This implementation targets time-homogeneous propensities (Exponential only).
-#   For time-dependent rates (tRSSA), you need piecewise-time envelopes and an integral sampler;
-#   these can be added as a thin extension without changing the public interface.
-#
 using Random
 using Distributions: UnivariateDistribution, Exponential, rate
 
@@ -29,6 +15,20 @@ Rejection-based SSA with global Fenwick tree for candidate selection.
 This is for exponential distributions only not time-dependent rates.
 - `bound_factor` ≥ 1.0 controls default upper bounds: \\bar a_i ← max(\\bar a_i, bound_factor * a_i).
   Set to 1.0 for no rejections (reduces to direct-method timing with tree selection).
+
+# Algorithmic core:
+ - Maintain per-clock *true* rate a_i and a certified upper bound \bar a_i >= a_i.
+ - Maintain Ā = sum_i \bar a_i and a Fenwick tree over { \bar a_i } for O(log N) sampling.
+ - Draw candidate times from Exp(Ā). Select candidate clock i by Categorical(\bar a_i/Ā).
+ - Accept with probability a_i / \bar a_i; otherwise reject and continue thinning.
+
+Exactness: standard thinning of a Poisson process with rate Ā, with acceptance a_i/\bar a_i,
+yields the target Markov jump process (homogeneous propensities). See Thanh et al. (2014, 2015).
+
+Notes:
+ - This implementation targets time-homogeneous propensities (Exponential only).
+   For time-dependent rates (tRSSA), you need piecewise-time envelopes and an integral sampler;
+   these can be added as a thin extension without changing the public interface.
 """
 mutable struct RSSA{K,T} <: SSA{K,T}
     idx_of::Dict{K,Int}           # key → index (stable; indices are never reused)
@@ -141,13 +141,25 @@ function _ensure_index!(s::RSSA{K,T}, key::K) where {K,T}
     if idx != 0
         return idx
     end
+
+    # Append new, disabled clock
     push!(s.keys_vec, key)
     push!(s.present, false)
     push!(s.a, zero(T))
     push!(s.abar, zero(T))
     push!(s.bit, zero(T))
     idx = length(s.keys_vec)
     s.idx_of[key] = idx
+
+    # Rebuild Fenwick tree over current bounds for enabled clocks
+    fill!(s.bit, zero(T))
+    for j in 1:idx
+        if s.present[j] && s.abar[j] > zero(T)
+            _bit_add!(s.bit, j, s.abar[j])
+        end
+    end
+    # Note: we do *not* touch s.Abar here; it is still the sum of abar over enabled clocks.
+
     return idx
 end
 
@@ -172,20 +184,26 @@ end
 function set_global_bound_factor!(s::RSSA{K,T}, bf) where {K,T}
     s.bound_factor = convert(T, bf)
     s.bound_factor < one(T) && (s.bound_factor = one(T))
-    # rebuild BIT and Abar
     fill!(s.bit, zero(T))
     s.Abar = zero(T)
     for idx in 1:length(s.keys_vec)
         if s.present[idx]
-            s.abar[idx] = max(s.bound_factor * s.a[idx], eps(T))
-            _bit_add!(s.bit, idx, s.abar[idx])
-            s.Abar += s.abar[idx]
+            if s.a[idx] <= zero(T)
+                s.abar[idx] = zero(T)
+            else
+                s.abar[idx] = s.bound_factor * s.a[idx]
+            end
+            if s.abar[idx] > zero(T)
+                _bit_add!(s.bit, idx, s.abar[idx])
+                s.Abar += s.abar[idx]
+            end
         end
     end
     _invalidate!(s)
     return s
 end
 
+
 # ---- interface methods ----
 
 # No scheduled times to perturb; just drop cached sample.
@@ -214,7 +232,8 @@ function enable!(s::RSSA{K,T},
     s.a[idx] = λ
 
     # Choose a default bound if needed
-    newabar = max(oldabar, s.bound_factor * λ)
+    # If λ is zero, force abar to zero to avoid infinite loops in next()
+    newabar = λ > zero(T) ? max(oldabar, s.bound_factor * λ) : zero(T)
     if !old_enabled
         # enable
         s.present[idx] = true
@@ -259,7 +278,7 @@ end
 
 # After firing, nothing to remove; just invalidate cached sample.
 function fire!(s::RSSA{K,T}, key::K, when::T) where {K,T}
-    _invalidate!(s)
+    disable!(s, key, when)
     return s
 end
 
@@ -275,7 +294,10 @@ function next(s::RSSA{K,T}, when::T, rng::AbstractRNG) where {K,T}
     end
 
     t = when
+    iteration = 0
     while true
+        iteration += 1
+
         # candidate time from Exp(Abar)
         Δ = rand(rng, Exponential(inv(s.Abar)))
         t += Δ
@@ -286,6 +308,7 @@ function next(s::RSSA{K,T}, when::T, rng::AbstractRNG) where {K,T}
 
         # in case of numerical corner cases, resample
         if j < 1 || j > length(s.keys_vec) || !s.present[j] || s.abar[j] <= zero(T)
+
             continue
         end
 
 
@@ -132,16 +132,72 @@ end
 """
     steploglikelihood(tw::TrackWatcher, now, when_fires, which_fires)
 
-Calculate the log-likelihood of a single step in which the `which_fires`
+Calculate the log probability density of a single step in which the `which_fires`
 transition fires next. `now` is the current time. `when_fires` is the time when
 `which_fires` happens so `when > now`. You have to call this before the transition fires so that
 it is before transitions are enabled and disabled from the previous step.
+
+One way to compute a marginal likelihood of a particular clock firing ``P[K]``
+is to integrate:
+```julia
+Using QuadGK
+quadgk(t -> exp(steploglikelihood(tw, t0, t, clock)), t0, Inf)[1]
+```
+It would be slow but could be done.
 """
 function steploglikelihood(tw::EnabledWatcher{K,T}, t0, t, which_fires) where {K,T}
     _steploglikelihood(values(tw.enabled), t0, t, which_fires)
 end
 
 
+"""
+    stepcumulant(tw::EnabledWatcher{K,T}, t0, t)
+
+Given a firing time, return the cumulant of the waiting time. Each sample is from a
+distributon `P[K,T]` where `K` is the clock. This tells you the cumulant
+of the marginal `P[T]`. This calculation is used for a Doob-Meyer test of
+sampler correctness. The step-cumulant should be uniformly-distributed.
+This value is ``U=1-\\exp(-H)`` where ``H`` is the integrated hazard of the
+waiting time.
+"""
+function stepcumulant(tw::EnabledWatcher{K,T}, t0, t) where {K,T}
+    @assert t>= t0
+    return one(Float64) - exp(sum(
+        function (entry)
+            t < entry.te && return zero(Float64)
+            t0 < entry.te && return logccdf(entry.distribution, t - entry.te)
+            logccdf(entry.distribution, t - entry.te) - logccdf(entry.distribution, t0 - entry.te)
+        end,
+        values(tw.enabled)
+    ))
+end
+
+
+"""
+    stepconditionalprobability(tw::EnabledWatcher, t)
+
+This is the probability that any particular clock fires at a given time, ``P[K|T]``.
+This returns a dictionary from clock to probability such that the sum is one.
+It is the probability over the space of clocks conditional on the firing time. If
+all distributions are Exponential, this won't depend on the time, but in other
+cases it will. This is useful for mark calibration testing.
+
+Note that `t0` isn't required because the hazard depends only on enabling times.
+"""
+function stepconditionalprobability(tw::EnabledWatcher{K,T}, t) where {K,T}
+    marginal = Dict{K,Float64}(
+        entry.clock => hazard(entry.distribution, entry.te, t)
+        for entry in values(tw.enabled)
+    )
+    denominator = sum(values(marginal))
+    denominator == zero(Float64) && return marginal
+    for k in keys(marginal)
+        marginal[k] /= denominator
+    end
+    return marginal
+end
+
+
 mutable struct MemorySampler{S,K,T}
     sampler::S
     track::TrackWatcher{K,T}
 
@@ -9,6 +9,7 @@ Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LocalCoverage = "5f6e1e16-694c-5876-87ef-16b5274f298e"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 
@@ -31,7 +31,7 @@ function ad_two_sample(draws_a::Vector{ClockDraw}, draws_b::Vector{ClockDraw}, c
     times_b = [x[2] for x in draws_b]
     result = KSampleADTest(times_a, times_b)
     pv = pvalue(result)
-    push!(results, (; test="ad-two-sample", clock=0, pvalue, result))
+    push!(results, (; test="ad-two-sample", clock=0, pvalue=pv, result))
 
     if verbose
         ad_diagnostic_report(result)
 
@@ -19,7 +19,7 @@ function doob_meyer(times::Vector{Float64}, distributions::Vector{DistributionSt
     Gamma_0 = Gamma(when)
     uniform_draws = similar(times)
     for idx in eachindex(times)
-        uniform_draws[idx] = 1.0 - exp(-Gamma(times[idx]) - Gamma_0)
+        uniform_draws[idx] = 1.0 - exp(-Gamma(times[idx]) + Gamma_0)
     end
     test = ApproximateOneSampleKSTest(uniform_draws, Uniform(0, 1))
     p = pvalue(test)