Skip to content

Commit 4386b44

Browse files
committed
Allow priors to be truncated or not
And require a flag to use historical samples
1 parent 03cc55a commit 4386b44

File tree

1 file changed

+30
-9
lines changed

1 file changed

+30
-9
lines changed

tsdate/prior.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def fill_priors(node_parameters, timepoints, ts, Ne, *, prior_distr, progress=Fa
991991
return prior_times
992992

993993

994-
def truncate_priors(ts, priors, nodes_to_date=None, progress=False):
994+
def _truncate_priors(ts, priors, nodes_to_date=None, progress=False):
995995
"""
996996
Truncate priors so they conform to the age of nodes in the tree sequence
997997
"""
@@ -1043,7 +1043,7 @@ def truncate_priors(ts, priors, nodes_to_date=None, progress=False):
10431043
grid_data = grid_data - rowmax[:, np.newaxis]
10441044

10451045
priors.grid_data[:] = grid_data
1046-
return constrained_min_times, constrained_max_times, priors
1046+
return priors
10471047

10481048

10491049
def build_grid(
@@ -1054,6 +1054,8 @@ def build_grid(
10541054
approximate_priors=False,
10551055
approx_prior_size=None,
10561056
prior_distribution="lognorm",
1057+
allow_historical_samples=None,
1058+
truncate_priors=None,
10571059
eps=1e-6,
10581060
progress=False,
10591061
):
@@ -1075,18 +1077,25 @@ def build_grid(
10751077
:param int approx_prior_size: Number of samples from which to precalculate prior.
10761078
Should only enter value if approximate_priors=True. If approximate_priors=True
10771079
and no value specified, defaults to 1000. Default: None
1078-
:param string prior_distr: What distribution to use to approximate the conditional
1079-
coalescent prior. Can be "lognorm" for the lognormal distribution (generally a
1080-
better fit, but slightly slower to calculate) or "gamma" for the gamma
1081-
distribution (slightly faster, but a poorer fit for recent nodes). Default:
1082-
"lognorm"
1080+
:param string prior_distribution: What distribution to use to approximate the
1081+
conditional coalescent prior. Can be "lognorm" for the lognormal distribution
1082+
(generally a better fit, but slightly slower to calculate) or "gamma" for the
1083+
gamma distribution (slightly faster, but a poorer fit for recent nodes).
1084+
Default: "lognorm"
1085+
:param bool allow_historical_samples: should we allow historical samples (i.e. at
1086+
times > 0. This invalidates the assumptions of the conditional coalescent, but
1087+
may be acceptable if the historical samples are recent or if there are many
1088+
contemporaneous samples. Default: `False`
1089+
:param bool truncate_priors: If there are historical samples, should we truncate the
1090+
priors of their direct ancestor nodes so that the probability of being younger
1091+
than the oldest descendant sample is zero. If the tree sequence is trustworthy
1092+
this should give better restults. Default: `True`
10831093
:param float eps: Specify minimum distance separating points in the time grid. Also
10841094
specifies the error factor in time difference calculations. Default: 1e-6
10851095
:return: A prior object to pass to tsdate.date() containing prior values for
10861096
inference and a discretised time grid
10871097
:rtype: base.NodeGridValues Object
10881098
"""
1089-
# tree_sequence = tree_sequence.simplify(tree_sequence.samples())
10901099

10911100
if Ne <= 0:
10921101
raise ValueError("Parameter 'Ne' must be greater than 0")
@@ -1098,6 +1107,10 @@ def build_grid(
10981107
raise ValueError(
10991108
"Can't set approx_prior_size if approximate_prior is False"
11001109
)
1110+
if truncate_priors is None:
1111+
truncate_priors = True
1112+
if allow_historical_samples is None:
1113+
allow_historical_samples = False
11011114

11021115
span_data = SpansBySamples(tree_sequence, progress=progress)
11031116

@@ -1140,5 +1153,13 @@ def build_grid(
11401153
progress=progress,
11411154
)
11421155
if np.any(tree_sequence.tables.nodes.time[tree_sequence.samples()] != 0):
1143-
_, _, priors = truncate_priors(tree_sequence, priors, progress=progress)
1156+
if not allow_historical_samples:
1157+
raise ValueError(
1158+
"There are samples at non-zero times, invalidating the conditional "
1159+
"coalescent prior. You can set allow_historical_samples=True to carry "
1160+
"on regardless, calculating a prior as if all samples were "
1161+
"contemporaneous (reasonable if you only have a few ancient samples)"
1162+
)
1163+
if truncate_priors:
1164+
priors = _truncate_priors(tree_sequence, priors, progress=progress)
11441165
return priors

0 commit comments

Comments
 (0)