|
28 | 28 | import itertools |
29 | 29 | import random |
30 | 30 | import json |
| 31 | +import sys |
31 | 32 |
|
32 | 33 | import numpy as np |
33 | 34 | import msprime |
@@ -4984,3 +4985,242 @@ def test_ts_single_tree_delete_middle(self): |
4984 | 4985 | ts_keep = ts.delete_intervals([[0.25, 0.5]], record_provenance=False) |
4985 | 4986 | ts_delete = ts.keep_intervals([[0, 0.25], [0.5, 1.0]], record_provenance=False) |
4986 | 4987 | self.assertTrue(ts_equal(ts_keep, ts_delete)) |
| 4988 | + |
| 4989 | + |
| 4990 | +class TestTrim(unittest.TestCase): |
| 4991 | + """ |
| 4992 | + Test the trimming functionality |
| 4993 | + """ |
| 4994 | + def add_mutations(self, ts, position, ancestral_state, derived_states, nodes): |
| 4995 | + """ |
| 4996 | + Create a site at the specified position and assign mutations to the specified |
| 4997 | + nodes (could be sequential mutations) |
| 4998 | + """ |
| 4999 | + tables = ts.dump_tables() |
| 5000 | + site = tables.sites.add_row(position, ancestral_state) |
| 5001 | + for state, node in zip(derived_states, nodes): |
| 5002 | + tables.mutations.add_row(site, node, state) |
| 5003 | + tables.sort() |
| 5004 | + tables.build_index() |
| 5005 | + tables.compute_mutation_parents() |
| 5006 | + return tables.tree_sequence() |
| 5007 | + |
| 5008 | + def verify_sites(self, source_tree, trimmed_tree, position_offset): |
| 5009 | + source_sites = list(source_tree.sites()) |
| 5010 | + trimmed_sites = list(trimmed_tree.sites()) |
| 5011 | + self.assertEqual(len(source_sites), len(trimmed_sites)) |
| 5012 | + for source_site, trimmed_site in zip(source_sites, trimmed_sites): |
| 5013 | + self.assertAlmostEqual( |
| 5014 | + source_site.position, position_offset + trimmed_site.position) |
| 5015 | + self.assertEqual( |
| 5016 | + source_site.ancestral_state, trimmed_site.ancestral_state) |
| 5017 | + self.assertEqual(source_site.metadata, trimmed_site.metadata) |
| 5018 | + self.assertEqual( |
| 5019 | + len(source_site.mutations), len(trimmed_site.mutations)) |
| 5020 | + for source_mut, trimmed_mut in zip( |
| 5021 | + source_site.mutations, trimmed_site.mutations): |
| 5022 | + self.assertEqual(source_mut.node, trimmed_mut.node) |
| 5023 | + self.assertEqual( |
| 5024 | + source_mut.derived_state, trimmed_mut.derived_state) |
| 5025 | + self.assertEqual( |
| 5026 | + source_mut.metadata, trimmed_mut.metadata) |
| 5027 | + # mutation.parent id may have changed after deleting redundant mutations |
| 5028 | + if source_mut.parent == trimmed_mut.parent == tskit.NULL: |
| 5029 | + pass |
| 5030 | + else: |
| 5031 | + self.assertEqual( |
| 5032 | + source_tree.tree_sequence.mutation(source_mut.parent).node, |
| 5033 | + trimmed_tree.tree_sequence.mutation(trimmed_mut.parent).node) |
| 5034 | + |
| 5035 | + def verify_ltrim(self, source_ts, trimmed_ts): |
| 5036 | + deleted_span = source_ts.first().span |
| 5037 | + self.assertAlmostEqual( |
| 5038 | + source_ts.sequence_length, trimmed_ts.sequence_length + deleted_span) |
| 5039 | + self.assertEqual(source_ts.num_trees, trimmed_ts.num_trees + 1) |
| 5040 | + for j in range(trimmed_ts.num_trees): |
| 5041 | + source_tree = source_ts.at_index(j + 1) |
| 5042 | + trimmed_tree = trimmed_ts.at_index(j) |
| 5043 | + self.assertEqual(source_tree.parent_dict, trimmed_tree.parent_dict) |
| 5044 | + self.assertAlmostEqual(source_tree.span, trimmed_tree.span) |
| 5045 | + self.assertAlmostEqual( |
| 5046 | + source_tree.interval[0], trimmed_tree.interval[0] + deleted_span) |
| 5047 | + self.verify_sites(source_tree, trimmed_tree, deleted_span) |
| 5048 | + |
| 5049 | + def verify_rtrim(self, source_ts, trimmed_ts): |
| 5050 | + deleted_span = source_ts.last().span |
| 5051 | + self.assertAlmostEqual( |
| 5052 | + source_ts.sequence_length, trimmed_ts.sequence_length + deleted_span) |
| 5053 | + self.assertEqual(source_ts.num_trees, trimmed_ts.num_trees + 1) |
| 5054 | + for j in range(trimmed_ts.num_trees): |
| 5055 | + source_tree = source_ts.at_index(j) |
| 5056 | + trimmed_tree = trimmed_ts.at_index(j) |
| 5057 | + self.assertEqual(source_tree.parent_dict, trimmed_tree.parent_dict) |
| 5058 | + self.assertEqual(source_tree.interval, trimmed_tree.interval) |
| 5059 | + self.verify_sites(source_tree, trimmed_tree, 0) |
| 5060 | + |
| 5061 | + def clear_left_mutate(self, ts, left, num_sites): |
| 5062 | + """ |
| 5063 | + Clear the edges from a tree sequence left of the specified coordinate |
| 5064 | + and add in num_sites regularly spaced sites into the cleared region. |
| 5065 | + """ |
| 5066 | + new_ts = ts.delete_intervals([[0.0, left]]) |
| 5067 | + for j, x in enumerate(np.linspace(0, left, num_sites, endpoint=False)): |
| 5068 | + new_ts = self.add_mutations(new_ts, x, 'A' * j, ['T'] * j, range(j+1)) |
| 5069 | + return new_ts |
| 5070 | + |
| 5071 | + def clear_right_mutate(self, ts, right, num_sites): |
| 5072 | + """ |
| 5073 | + Clear the edges from a tree sequence right of the specified coordinate |
| 5074 | + and add in num_sites regularly spaced sites into the cleared region. |
| 5075 | + """ |
| 5076 | + new_ts = ts.delete_intervals([[right, ts.sequence_length]]) |
| 5077 | + for j, x in enumerate( |
| 5078 | + np.linspace(right, ts.sequence_length, num_sites, endpoint=False)): |
| 5079 | + new_ts = self.add_mutations(new_ts, x, 'A' * j, ['T'] * j, range(j+1)) |
| 5080 | + return new_ts |
| 5081 | + |
| 5082 | + def clear_left_right_234(self, left, right): |
| 5083 | + """ |
| 5084 | + Clear edges to left and right and add 2 mutations at the same site into the left |
| 5085 | + cleared region, 3 at the same site into the untouched region, and 4 into the |
| 5086 | + right cleared region. |
| 5087 | + """ |
| 5088 | + assert 0.0 < left < right < 1.0 |
| 5089 | + ts = msprime.simulate(10, recombination_rate=10, random_seed=2) |
| 5090 | + left_pos = np.mean([0.0, left]) |
| 5091 | + left_root = ts.at(left_pos).root |
| 5092 | + mid_pos = np.mean([left, right]) |
| 5093 | + mid_root = ts.at(mid_pos).root |
| 5094 | + right_pos = np.mean([right, ts.sequence_length]) |
| 5095 | + right_root = ts.at(right_pos).root |
| 5096 | + # Clear |
| 5097 | + ts = ts.keep_intervals([[left, right]], simplify=False) |
| 5098 | + ts = self.add_mutations(ts, left_pos, 'A', ['T', 'C'], [left_root, 0]) |
| 5099 | + ts = self.add_mutations(ts, mid_pos, 'T', ['A', 'C', 'G'], [mid_root, 0, 1]) |
| 5100 | + ts = self.add_mutations( |
| 5101 | + ts, right_pos, 'X', ['T', 'C', 'G', 'A'], [right_root, 0, 1, 2]) |
| 5102 | + self.assertNotEqual(np.min(ts.tables.edges.left), 0) |
| 5103 | + self.assertEqual(ts.num_mutations, 9) |
| 5104 | + self.assertEqual(ts.num_sites, 3) |
| 5105 | + return ts |
| 5106 | + |
| 5107 | + def test_ltrim_single_tree(self): |
| 5108 | + ts = msprime.simulate(10, mutation_rate=12, random_seed=2) |
| 5109 | + ts = self.clear_left_mutate(ts, 0.5, 10) |
| 5110 | + self.verify_ltrim(ts, ts.ltrim()) |
| 5111 | + |
| 5112 | + def test_ltrim_single_tree_no_mutations(self): |
| 5113 | + ts = msprime.simulate(10, random_seed=2) |
| 5114 | + ts = self.clear_left_mutate(ts, 0.5, 0) |
| 5115 | + self.verify_ltrim(ts, ts.ltrim()) |
| 5116 | + |
| 5117 | + def test_ltrim_single_tree_tiny_left(self): |
| 5118 | + ts = msprime.simulate(10, mutation_rate=12, random_seed=2) |
| 5119 | + ts = self.clear_left_mutate(ts, 1e-200, 10) |
| 5120 | + self.verify_ltrim(ts, ts.ltrim()) |
| 5121 | + |
| 5122 | + def test_ltrim_many_trees(self): |
| 5123 | + ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2) |
| 5124 | + ts = self.clear_left_mutate(ts, 0.5, 10) |
| 5125 | + self.verify_ltrim(ts, ts.ltrim()) |
| 5126 | + |
| 5127 | + def test_ltrim_many_trees_left_min(self): |
| 5128 | + ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2) |
| 5129 | + ts = self.clear_left_mutate(ts, sys.float_info.min, 10) |
| 5130 | + self.verify_ltrim(ts, ts.ltrim()) |
| 5131 | + |
| 5132 | + def test_ltrim_many_trees_left_epsilon(self): |
| 5133 | + ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2) |
| 5134 | + ts = self.clear_left_mutate(ts, sys.float_info.epsilon, 0) |
| 5135 | + self.verify_ltrim(ts, ts.ltrim()) |
| 5136 | + |
| 5137 | + def test_ltrim_empty(self): |
| 5138 | + ts = msprime.simulate(2, random_seed=2) |
| 5139 | + ts = ts.delete_intervals([[0, 1]]) |
| 5140 | + self.assertRaises(ValueError, ts.ltrim) |
| 5141 | + |
| 5142 | + def test_ltrim_multiple_mutations(self): |
| 5143 | + ts = self.clear_left_right_234(0.1, 0.5) |
| 5144 | + trimmed_ts = ts.ltrim() |
| 5145 | + self.assertAlmostEqual(trimmed_ts.sequence_length, 0.9) |
| 5146 | + self.assertEqual(trimmed_ts.num_sites, 2) |
| 5147 | + self.assertEqual(trimmed_ts.num_mutations, 7) # We should have deleted 2 |
| 5148 | + self.assertEqual(np.min(trimmed_ts.tables.edges.left), 0) |
| 5149 | + self.verify_ltrim(ts, trimmed_ts) |
| 5150 | + |
| 5151 | + def test_rtrim_single_tree(self): |
| 5152 | + ts = msprime.simulate(10, mutation_rate=12, random_seed=2) |
| 5153 | + ts = self.clear_right_mutate(ts, 0.5, 10) |
| 5154 | + self.verify_rtrim(ts, ts.rtrim()) |
| 5155 | + |
| 5156 | + def test_rtrim_single_tree_no_mutations(self): |
| 5157 | + ts = msprime.simulate(10, random_seed=2) |
| 5158 | + ts = self.clear_right_mutate(ts, 0.5, 0) |
| 5159 | + self.verify_rtrim(ts, ts.rtrim()) |
| 5160 | + |
| 5161 | + def test_rtrim_single_tree_tiny_left(self): |
| 5162 | + ts = msprime.simulate(10, mutation_rate=12, random_seed=2) |
| 5163 | + ts = self.clear_right_mutate(ts, 1e-200, 10) |
| 5164 | + self.verify_rtrim(ts, ts.rtrim()) |
| 5165 | + |
| 5166 | + def test_rtrim_many_trees(self): |
| 5167 | + ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2) |
| 5168 | + ts = self.clear_right_mutate(ts, 0.5, 10) |
| 5169 | + self.verify_rtrim(ts, ts.rtrim()) |
| 5170 | + |
| 5171 | + def test_rtrim_many_trees_left_min(self): |
| 5172 | + ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2) |
| 5173 | + ts = self.clear_right_mutate(ts, sys.float_info.min, 10) |
| 5174 | + self.verify_rtrim(ts, ts.rtrim()) |
| 5175 | + |
| 5176 | + def test_rtrim_many_trees_left_epsilon(self): |
| 5177 | + ts = msprime.simulate(10, recombination_rate=10, mutation_rate=12, random_seed=2) |
| 5178 | + ts = self.clear_right_mutate(ts, sys.float_info.epsilon, 0) |
| 5179 | + self.verify_rtrim(ts, ts.rtrim()) |
| 5180 | + |
| 5181 | + def test_rtrim_empty(self): |
| 5182 | + ts = msprime.simulate(2, random_seed=2) |
| 5183 | + ts = ts.delete_intervals([[0, 1]]) |
| 5184 | + self.assertRaises(ValueError, ts.rtrim) |
| 5185 | + |
| 5186 | + def test_rtrim_multiple_mutations(self): |
| 5187 | + ts = self.clear_left_right_234(0.1, 0.5) |
| 5188 | + trimmed_ts = ts.rtrim() |
| 5189 | + self.assertAlmostEqual(trimmed_ts.sequence_length, 0.5) |
| 5190 | + self.assertEqual(trimmed_ts.num_sites, 2) |
| 5191 | + self.assertEqual(trimmed_ts.num_mutations, 5) # We should have deleted 4 |
| 5192 | + self.assertEqual( |
| 5193 | + np.max(trimmed_ts.tables.edges.right), trimmed_ts.tables.sequence_length) |
| 5194 | + self.verify_rtrim(ts, trimmed_ts) |
| 5195 | + |
| 5196 | + def test_trim_multiple_mutations(self): |
| 5197 | + ts = self.clear_left_right_234(0.1, 0.5) |
| 5198 | + trimmed_ts = ts.trim() |
| 5199 | + self.assertAlmostEqual(trimmed_ts.sequence_length, 0.4) |
| 5200 | + self.assertEqual(trimmed_ts.num_mutations, 3) |
| 5201 | + self.assertEqual(trimmed_ts.num_sites, 1) |
| 5202 | + self.assertEqual(np.min(trimmed_ts.tables.edges.left), 0) |
| 5203 | + self.assertEqual( |
| 5204 | + np.max(trimmed_ts.tables.edges.right), trimmed_ts.tables.sequence_length) |
| 5205 | + |
| 5206 | + def test_trims_no_effect(self): |
| 5207 | + # Deleting from middle should have no effect on any trim function |
| 5208 | + ts = msprime.simulate(10, recombination_rate=2, mutation_rate=50, random_seed=2) |
| 5209 | + ts = ts.delete_intervals([[0.1, 0.5]]) |
| 5210 | + trimmed_ts = ts.ltrim(record_provenance=False) |
| 5211 | + self.assertTrue(ts_equal(ts, trimmed_ts)) |
| 5212 | + trimmed_ts = ts.rtrim(record_provenance=False) |
| 5213 | + self.assertTrue(ts_equal(ts, trimmed_ts)) |
| 5214 | + trimmed_ts = ts.trim(record_provenance=False) |
| 5215 | + self.assertTrue(ts_equal(ts, trimmed_ts)) |
| 5216 | + |
| 5217 | + def test_failure_with_migrations(self): |
| 5218 | + # All trim functions fail if migrations present |
| 5219 | + ts = msprime.simulate(10, recombination_rate=2, random_seed=2) |
| 5220 | + ts = ts.keep_intervals([[0.1, 0.5]]) |
| 5221 | + tables = ts.dump_tables() |
| 5222 | + tables.migrations.add_row(0, 1, 0, 0, 0, 0) |
| 5223 | + ts = tables.tree_sequence() |
| 5224 | + self.assertRaises(ValueError, ts.ltrim) |
| 5225 | + self.assertRaises(ValueError, ts.rtrim) |
| 5226 | + self.assertRaises(ValueError, ts.trim) |
0 commit comments