Skip to content

Commit e80c00a

Browse files
committed
Add mutation.inherited_state
1 parent 3f92a90 commit e80c00a

File tree

10 files changed

+93
-8
lines changed

10 files changed

+93
-8
lines changed

c/tests/test_trees.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3998,24 +3998,31 @@ test_single_tree_good_mutations(void)
39983998
CU_ASSERT_EQUAL(other_mutations[0].id, 0);
39993999
CU_ASSERT_EQUAL(other_mutations[0].node, 2);
40004000
CU_ASSERT_NSTRING_EQUAL(other_mutations[0].derived_state, "1", 1);
4001+
CU_ASSERT_NSTRING_EQUAL(other_mutations[0].inherited_state, "0", 1);
40014002
CU_ASSERT_EQUAL(other_mutations[1].id, 1);
40024003
CU_ASSERT_EQUAL(other_mutations[1].node, 4);
40034004
CU_ASSERT_NSTRING_EQUAL(other_mutations[1].derived_state, "1", 1);
4005+
CU_ASSERT_NSTRING_EQUAL(other_mutations[1].inherited_state, "0", 1);
40044006
CU_ASSERT_EQUAL(other_mutations[2].id, 2);
40054007
CU_ASSERT_EQUAL(other_mutations[2].node, 0);
40064008
CU_ASSERT_NSTRING_EQUAL(other_mutations[2].derived_state, "0", 1);
4009+
CU_ASSERT_NSTRING_EQUAL(other_mutations[2].inherited_state, "1", 1);
40074010
CU_ASSERT_EQUAL(other_mutations[3].id, 3);
40084011
CU_ASSERT_EQUAL(other_mutations[3].node, 0);
40094012
CU_ASSERT_NSTRING_EQUAL(other_mutations[3].derived_state, "1", 1);
4013+
CU_ASSERT_NSTRING_EQUAL(other_mutations[3].inherited_state, "0", 1);
40104014
CU_ASSERT_EQUAL(other_mutations[4].id, 4);
40114015
CU_ASSERT_EQUAL(other_mutations[4].node, 1);
40124016
CU_ASSERT_NSTRING_EQUAL(other_mutations[4].derived_state, "1", 1);
4017+
CU_ASSERT_NSTRING_EQUAL(other_mutations[4].inherited_state, "0", 1);
40134018
CU_ASSERT_EQUAL(other_mutations[5].id, 5);
40144019
CU_ASSERT_EQUAL(other_mutations[5].node, 2);
40154020
CU_ASSERT_NSTRING_EQUAL(other_mutations[5].derived_state, "1", 1);
4021+
CU_ASSERT_NSTRING_EQUAL(other_mutations[5].inherited_state, "0", 1);
40164022
CU_ASSERT_EQUAL(other_mutations[6].id, 6);
40174023
CU_ASSERT_EQUAL(other_mutations[6].node, 3);
40184024
CU_ASSERT_NSTRING_EQUAL(other_mutations[6].derived_state, "1", 1);
4025+
CU_ASSERT_NSTRING_EQUAL(other_mutations[6].inherited_state, "0", 1);
40194026

40204027
tsk_treeseq_free(&ts);
40214028
}

c/tskit/tables.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ typedef struct {
158158
/** @brief The ID of the edge that this mutation lies on, or TSK_NULL
159159
if there is no corresponding edge.*/
160160
tsk_id_t edge;
161+
/** @brief Inherited state. */
162+
const char *inherited_state;
163+
/** @brief Size of the inherited state in bytes. */
164+
tsk_size_t inherited_state_length;
161165
} tsk_mutation_t;
162166

163167
/**

c/tskit/trees.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,13 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
253253
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
254254
const double *restrict site_position = self->tables->sites.position;
255255
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
256+
const tsk_id_t *restrict mutation_parent = self->tables->mutations.parent;
257+
const char *restrict sites_ancestral_state = self->tables->sites.ancestral_state;
258+
const tsk_size_t *restrict sites_ancestral_state_offset
259+
= self->tables->sites.ancestral_state_offset;
260+
const char *restrict mutations_derived_state = self->tables->mutations.derived_state;
261+
const tsk_size_t *restrict mutations_derived_state_offset
262+
= self->tables->mutations.derived_state_offset;
256263
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
257264
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
258265
const double *restrict edge_right = self->tables->edges.right;
@@ -311,6 +318,26 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
311318
mutation_id < num_mutations && mutation_site[mutation_id] == site_id) {
312319
mutation = self->site_mutations_mem + mutation_id;
313320
mutation->edge = node_edge_map[mutation->node];
321+
322+
/* Compute inherited state */
323+
if (mutation_parent[mutation_id] == TSK_NULL) {
324+
/* No parent: inherited state is the site's ancestral state */
325+
mutation->inherited_state
326+
= sites_ancestral_state + sites_ancestral_state_offset[site_id];
327+
mutation->inherited_state_length
328+
= sites_ancestral_state_offset[site_id + 1]
329+
- sites_ancestral_state_offset[site_id];
330+
} else {
331+
/* Has parent: inherited state is parent's derived state */
332+
tsk_id_t parent_id = mutation_parent[mutation_id];
333+
mutation->inherited_state
334+
= mutations_derived_state
335+
+ mutations_derived_state_offset[parent_id];
336+
mutation->inherited_state_length
337+
= mutations_derived_state_offset[parent_id + 1]
338+
- mutations_derived_state_offset[parent_id];
339+
}
340+
314341
mutation_id++;
315342
}
316343
site_id++;
@@ -5228,6 +5255,9 @@ tsk_treeseq_get_mutation(
52285255
goto out;
52295256
}
52305257
mutation->edge = self->site_mutations_mem[index].edge;
5258+
mutation->inherited_state = self->site_mutations_mem[index].inherited_state;
5259+
mutation->inherited_state_length
5260+
= self->site_mutations_mem[index].inherited_state_length;
52315261
out:
52325262
return ret;
52335263
}

docs/c-api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ arbitrarily between releases.
6565
stability, since the primary use-case is for tskit to be embedded
6666
within another application rather than used as a shared library. If you
6767
do intend to use tskit as a shared library and ABI stability is
68-
therefore imporant to you, please let us know and we can plan
68+
therefore important to you, please let us know and we can plan
6969
accordingly.
7070

7171
.. _sec_c_api_overview_structure:

python/CHANGELOG.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@
3030
- Add ``TreeSequence.mutations_edge`` which returns the edge ID for each mutation's
3131
edge. (:user:`benjeffery`, :pr:`3226`, :issue:`3189`)
3232

33-
- Add ``TreeSequence.mutations_inherited_state`` which returns the inherited state
33+
- Add which returns the inherited state
3434
for each mutation. (:user:`benjeffery`, :pr:`3276`, :issue:`2631`)
3535

36-
- Add ``TreeSequence.sites_ancestral_state`` and ``TreeSequence.mutations_derived_state`` properties
37-
to return the ancestral state of sites and derived state of mutations as NumPy arrays of
36+
- Add ``TreeSequence.sites_ancestral_state``, ``TreeSequence.mutations_derived_state`` and
37+
``TreeSequence.mutations_inherited_state`` properties to return the ancestral state of sites,
38+
derived state of mutations and inherited state of mutations as NumPy arrays of
3839
the new numpy 2.0 StringDType.
39-
(:user:`benjeffery`, :pr:`3228`, :issue:`2632`)
40+
(:user:`benjeffery`, :pr:`3228`, :issue:`2632`, :pr:`3276`, :issue:`2631`)
4041

4142
- Tskit now distributes with a requirement of numpy version 2 or greater. However, you can still use
4243
tskit with numpy 1.X by building tskit from source with numpy 1.X using ``pip install tskit --no-binary tskit``.
@@ -45,6 +46,9 @@
4546
the error "A module that was compiled using NumPy 1.x cannot be run in NumPy 2.0.0 as it may crash.".
4647
If no newer version of the module is available you will have to use the Numpy 1.X build as above.
4748

49+
- Add ``Mutation.inherited_state`` property which returns the inherited state
50+
for a single mutation. (:user:`benjeffery`, :pr:`3277`, :issue:`2631`)
51+
4852
**Bugfixes**
4953

5054
- In some tables with mutations out-of-order `TableCollection.sort` did not re-order

python/_tskitmodule.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,9 +363,10 @@ make_mutation_object(const tsk_mutation_t *mutation)
363363
if (metadata == NULL) {
364364
goto out;
365365
}
366-
ret = Py_BuildValue("iis#iOdi", mutation->site, mutation->node,
366+
ret = Py_BuildValue("iis#iOdis#", mutation->site, mutation->node,
367367
mutation->derived_state, (Py_ssize_t) mutation->derived_state_length,
368-
mutation->parent, metadata, mutation->time, mutation->edge);
368+
mutation->parent, metadata, mutation->time, mutation->edge,
369+
mutation->inherited_state, (Py_ssize_t) mutation->inherited_state_length);
369370
out:
370371
Py_XDECREF(metadata);
371372
return ret;

python/tests/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ def make_mutation(id_):
150150
metadata,
151151
time,
152152
edge,
153+
inherited_state,
153154
) = ll_ts.get_mutation(id_)
154155
return tskit.Mutation(
155156
id=id_,
@@ -160,6 +161,7 @@ def make_mutation(id_):
160161
parent=parent,
161162
metadata=metadata,
162163
edge=edge,
164+
inherited_state=inherited_state,
163165
metadata_decoder=tskit.metadata.parse_metadata_schema(
164166
ll_ts.get_table_metadata_schemas().mutation
165167
).decode_row,

python/tests/test_highlevel.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,20 @@ def test_trees(self, ts):
12181218
def test_mutations(self, ts):
12191219
self.verify_mutations(ts)
12201220

1221+
@pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
1222+
def test_mutation_inherited_state_property(self, ts):
1223+
inherited_states = ts.mutations_inherited_state
1224+
for mut in ts.mutations():
1225+
expected = inherited_states[mut.id]
1226+
actual = mut.inherited_state
1227+
assert actual == expected
1228+
1229+
if mut.parent == tskit.NULL:
1230+
expected_direct = ts.site(mut.site).ancestral_state
1231+
else:
1232+
expected_direct = ts.mutation(mut.parent).derived_state
1233+
assert actual == expected_direct
1234+
12211235
def verify_pairwise_diversity(self, ts):
12221236
haplotypes = ts.genotype_matrix(isolated_as_missing=False).T
12231237
if ts.num_samples == 0:

python/tests/test_lowlevel.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1336,14 +1336,24 @@ def test_get_mutation_interface(self):
13361336
assert len(mutations) == ts.get_num_mutations()
13371337
# Check the form of the mutations
13381338
for packed in mutations:
1339-
site, node, derived_state, parent, metadata, time, edge = packed
1339+
(
1340+
site,
1341+
node,
1342+
derived_state,
1343+
parent,
1344+
metadata,
1345+
time,
1346+
edge,
1347+
inherited_state,
1348+
) = packed
13401349
assert isinstance(site, int)
13411350
assert isinstance(node, int)
13421351
assert isinstance(derived_state, str)
13431352
assert isinstance(parent, int)
13441353
assert isinstance(metadata, bytes)
13451354
assert isinstance(time, float)
13461355
assert isinstance(edge, int)
1356+
assert isinstance(inherited_state, str)
13471357

13481358
def test_get_edge_interface(self):
13491359
for ts in self.get_example_tree_sequences():
@@ -3867,6 +3877,7 @@ def test_sites(self):
38673877
metadata,
38683878
time,
38693879
edge,
3880+
inherited_state,
38703881
) = ts.get_mutation(mut_id)
38713882
assert site == index
38723883
assert mutation_id == mut_id

python/tskit/trees.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ class Mutation(util.Dataclass):
425425
"metadata",
426426
"time",
427427
"edge",
428+
"inherited_state",
428429
]
429430
id: int # noqa A003
430431
"""
@@ -473,6 +474,13 @@ class Mutation(util.Dataclass):
473474
"""
474475
The ID of the edge that this mutation is on.
475476
"""
477+
inherited_state: str
478+
"""
479+
The inherited state for this mutation. This is the state that existed at the site
480+
before this mutation occurred. This is either the ancestral state of the site
481+
(if the mutation has no parent) or the derived state of the mutation's
482+
parent mutation (if it has a parent).
483+
"""
476484

477485
# To get default values on slots we define a custom init
478486
def __init__(
@@ -485,6 +493,7 @@ def __init__(
485493
parent=NULL,
486494
metadata=b"",
487495
edge=NULL,
496+
inherited_state=None,
488497
):
489498
self.id = id
490499
self.site = site
@@ -494,6 +503,7 @@ def __init__(
494503
self.parent = parent
495504
self.metadata = metadata
496505
self.edge = edge
506+
self.inherited_state = inherited_state
497507

498508
# We need a custom eq to compare unknown times.
499509
def __eq__(self, other):
@@ -6333,6 +6343,7 @@ def mutation(self, id_):
63336343
metadata,
63346344
time,
63356345
edge,
6346+
inherited_state,
63366347
) = self._ll_tree_sequence.get_mutation(id_)
63376348
return Mutation(
63386349
id=id_,
@@ -6343,6 +6354,7 @@ def mutation(self, id_):
63436354
metadata=metadata,
63446355
time=time,
63456356
edge=edge,
6357+
inherited_state=inherited_state,
63466358
metadata_decoder=self.table_metadata_schemas.mutation.decode_row,
63476359
)
63486360

0 commit comments

Comments
 (0)