Skip to content

Commit 14175c4

Browse files
committed
Set root time values to something reasonable
1 parent 6d22ac9 commit 14175c4

File tree

3 files changed

+43
-2
lines changed

3 files changed

+43
-2
lines changed

CHANGELOG.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
********************
2+
[0.2.2] - 2021-0X-XX
3+
********************
4+
5+
**Bugfixes**:
6+
7+
- Mutations at non-inference sites are now guaranteed to be fully parsimonious.
8+
Previous versions required a mutation above the root when the input ancestral state
9+
disagreed with the ancestral state produced by the parsimony algorithm. Now fixed by
10+
using the new map_mutations code from tskit 0.3.7 (:pr:`557`, :user:`hyanwong`)
11+
12+
**New Features**:
13+
14+
**Breaking changes**:
15+
16+
- Oldest nodes in a standard inferred tree sequence are no longer set to frequencies ~2
17+
and ~3 (i.e. 2 or 3 times as old as all the other nodes), but are spaced above the
18+
others by the mean time between unique ancestor ages (:pr:`485`, :user:`hyanwong`)
19+
120
********************
221
[0.2.1] - 2021-05-26
322
********************

tests/test_inference.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,26 @@ def test_random_data(self):
15151515
ancestor_data = tsinfer.generate_ancestors(sample_data)
15161516
self.verify_ancestors(sample_data, ancestor_data)
15171517

1518+
def test_oldest_ancestors(self):
1519+
m = 50
1520+
G, positions = get_random_data_example(20, m)
1521+
sample_data = tsinfer.SampleData(sequence_length=m)
1522+
for genotypes, position in zip(G, positions):
1523+
sample_data.add_site(position, genotypes)
1524+
sample_data.finalise()
1525+
ancestor_data = tsinfer.generate_ancestors(sample_data)
1526+
assert ancestor_data.num_ancestors > 2
1527+
times = ancestor_data.ancestors_time[:]
1528+
unique_times = np.unique(times)
1529+
ultimate_root_time = unique_times[-1]
1530+
root_time = unique_times[-2]
1531+
oldest_non_root = unique_times[-3]
1532+
assert np.sum(times == root_time) == 1 # root ancestor at unique time
1533+
assert np.sum(times == ultimate_root_time) == 1 # ultimate anc at unique time
1534+
expected_time_diff = oldest_non_root / len(unique_times[:-2])
1535+
assert np.isclose(ultimate_root_time - root_time, expected_time_diff)
1536+
assert np.isclose(root_time - oldest_non_root, expected_time_diff)
1537+
15181538

15191539
class TestAncestorsTreeSequence:
15201540
"""

tsinfer/inference.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -946,8 +946,10 @@ def run(self):
946946
logger.info(f"Starting build for {self.num_ancestors} ancestors")
947947
progress = self.progress_monitor.get("ga_generate", self.num_ancestors)
948948
a = np.zeros(self.num_sites, dtype=np.int8)
949-
root_time = max(self.timepoint_to_epoch.keys()) + 1
950-
ultimate_ancestor_time = root_time + 1
949+
root_time = max(self.timepoint_to_epoch.keys())
950+
av_timestep = root_time / len(self.timepoint_to_epoch)
951+
root_time += av_timestep # Add a root a bit older than the oldest ancestor
952+
ultimate_ancestor_time = root_time + av_timestep
951953
# Add the ultimate ancestor. This is an awkward hack really; we don't
952954
# ever insert this ancestor. The only reason to add it here is that
953955
# it makes sure that the ancestor IDs we have in the ancestor file are

0 commit comments

Comments
 (0)