Change name and correct identify constant

hyanwong · hyanwong · commit 09b562434607 · 2022-09-08T09:02:57.000+01:00
diff --git a/tsdate/core.py b/tsdate/core.py
@@ -705,14 +705,14 @@ def inside_pass(self, *, normalize=True, cache_inside=False, progress=None):
                     raise ValueError
                 if cache_inside:
                     g_i[edge.id] = edge_lik
-            norm[parent] = np.max(val) if normalize else 1
+            norm[parent] = np.max(val) if normalize else self.lik.identity_constant
             inside[parent] = self.lik.reduce(val, norm[parent])
             to_visit[parent] = False
 
         # There may be nodes that are not parents but are also not fixed (e.g.
         # undated sample nodes). These need an identity normalization constant
         for unfixed_unvisited in np.where(to_visit)[0]:
-            norm[unfixed_unvisited] = 1
+            norm[unfixed_unvisited] = self.lik.identity_constant
 
         if cache_inside:
             self.g_i = self.lik.reduce(g_i, norm[self.ts.tables.edges.child, None])
diff --git a/tsdate/prior.py b/tsdate/prior.py
@@ -960,7 +960,7 @@ def fill_priors(
     Ne,
     *,
     prior_distr,
-    node_var_override=None,
+    nonfixed_sample_var=None,
     progress=False,
 ):
     """
@@ -969,7 +969,7 @@ def fill_priors(
     are ignored for these nodes) and fill out a NodeGridValues object with the prior
     values from the gamma or lognormal distribution with those parameters.
 
-    For a description of `node_var_override`, see the parameter description in
+    For a description of `nonfixed_sample_var`, see the parameter description in
     the `build_grid` function.
 
     TODO - what if there is an internal fixed node? Should we truncate
@@ -994,12 +994,16 @@ def shape_scale_from_mean_var(mean, var):
 
     else:
         raise ValueError("prior distribution must be lognorm or gamma")
-    if node_var_override is None:
-        node_var_override = {}
+    samples = ts.samples()
+    if nonfixed_sample_var is None:
+        nonfixed_sample_var = {}
+    for u in nonfixed_sample_var.keys():
+        if u not in samples:
+            raise ValueError(f"Node {u} in 'nonfixed_sample_var' is not a sample")
     datable_nodes = np.ones(ts.num_nodes, dtype=bool)
-    datable_nodes[ts.samples()] = False
-    # Mark all nodes in node_var_override as datable
-    datable_nodes[list(node_var_override.keys())] = True
+    datable_nodes[samples] = False
+    # Mark all nodes in nonfixed_sample_var as datable
+    datable_nodes[list(nonfixed_sample_var.keys())] = True
     datable_nodes = np.where(datable_nodes)[0]
 
     prior_times = base.NodeGridValues(
@@ -1012,10 +1016,10 @@ def shape_scale_from_mean_var(mean, var):
     for node in tqdm(
         datable_nodes, desc="Assign Prior to Each Node", disable=not progress
     ):
-        if node in node_var_override:
+        if node in nonfixed_sample_var:
             shape, scale = shape_scale_from_mean_var(
                 mean=ts.node(node).time,
-                var=node_var_override[node],
+                var=nonfixed_sample_var[node],
             )
         else:
             shape = shape_param[node]
@@ -1098,7 +1102,7 @@ def build_grid(
     prior_distribution="lognorm",
     allow_historical_samples=None,
     truncate_priors=None,
-    node_var_override=None,
+    nonfixed_sample_var=None,
     eps=1e-6,
     # Parameters below undocumented
     progress=False,
@@ -1127,20 +1131,21 @@ def build_grid(
         gamma distribution (slightly faster, but a poorer fit for recent nodes).
         Default: "lognorm"
     :param bool allow_historical_samples: should we allow historical samples (i.e. at
-        times > 0. This invalidates the assumptions of the conditional coalescent, but
+        times > 0). This invalidates the assumptions of the conditional coalescent, but
         may be acceptable if the historical samples are recent or if there are many
-        contemporaneous samples. Default: `False`
+        contemporaneous samples. Default: ``False``
     :param bool truncate_priors: If there are historical samples, should we truncate the
-        priors of their direct ancestor nodes so that the probability of being younger
-        than the oldest descendant sample is zero. If the tree sequence is trustworthy
-        this should give better restults. Default: `True`
-    :param dict node_var_override: is a dict mapping node IDs to a variance value.
-        Any nodes listed here will be treated as non-fixed nodes whose prior is not
-        calculated from the conditional coalescent but instead are allocated a prior
+        priors of all nodes which are their ancestors so that the probability of being
+        younger than the oldest descendant sample is zero. As long as historical
+        samples do not have ancestors that have been misassigned in the tree sequence
+        topology, this should give better results. Default: ``True``
+    :param dict nonfixed_sample_var: is a dict mapping sample node IDs to a variance
+        value. Any nodes listed here will be treated as non-fixed nodes whose prior is
+        not calculated from the conditional coalescent but instead are allocated a prior
         whose mean is the node time in the tree sequence and whose variance is the
         value in this dictionary. This allows sample nodes to be treated as nonfixed
         nodes, and therefore dated. If ``None`` (default) then all sample nodes are
-        treated as occurring ata  fixed time (as if this were an empty dict).
+        treated as occurring at a fixed time (as if this were an empty dict).
     :param float eps: Specify minimum distance separating points in the time grid. Also
         specifies the error factor in time difference calculations. Default: 1e-6
     :return: A prior object to pass to tsdate.date() containing prior values for
@@ -1201,7 +1206,7 @@ def build_grid(
         tree_sequence,
         Ne,
         prior_distr=prior_distribution,
-        node_var_override=node_var_override,
+        nonfixed_sample_var=nonfixed_sample_var,
         progress=progress,
     )
     if np.any(tree_sequence.nodes_time[tree_sequence.samples()] > 0):