Skip to content

Commit 240d8aa

Browse files
authored
Merge pull request #8 from hyanwong/ARG
Add the 2 "What is an ARG" workbooks
2 parents 07cfb78 + 2caa8ea commit 240d8aa

36 files changed

+3172
-0
lines changed

content/WhatIsAnARG_module.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import collections
2+
import itertools
3+
import sys
4+
5+
import numpy as np
6+
from IPython.display import HTML
7+
8+
def remove_edges(ts, edge_id_remove_list):
9+
edges_to_remove_by_child = collections.defaultdict(list)
10+
edge_id_remove_list = set(edge_id_remove_list)
11+
for m in ts.mutations():
12+
if m.edge in edge_id_remove_list:
13+
# If we remove this edge, we will remove the associated mutation
14+
# as the child node won't have ancestral material in this region.
15+
# So we force the user to explicitly (re)move the mutations beforehand
16+
raise ValueError("Cannot remove edges that have associated mutations")
17+
for remove_edge in edge_id_remove_list:
18+
e = ts.edge(remove_edge)
19+
edges_to_remove_by_child[e.child].append(e)
20+
21+
# sort left-to-right for each child
22+
for k, v in edges_to_remove_by_child.items():
23+
edges_to_remove_by_child[k] = sorted(v, key=lambda e: e.left)
24+
# check no overlaps
25+
for e1, e2 in zip(edges_to_remove_by_child[k], edges_to_remove_by_child[k][1:]):
26+
assert e1.right <= e2.left
27+
28+
# Sanity check: this means the topmost node will deal with modified edges left at the end
29+
assert ts.edge(-1).parent not in edges_to_remove_by_child
30+
31+
new_edges = collections.defaultdict(list)
32+
tables = ts.dump_tables()
33+
tables.edges.clear()
34+
samples = set(ts.samples())
35+
# Edges are sorted by parent time, youngest first, so we can iterate over
36+
# nodes-as-parents visiting children before parents by using itertools.groupby
37+
for parent_id, ts_edges in itertools.groupby(ts.edges(), lambda e: e.parent):
38+
# Iterate through the ts edges *plus* the polytomy edges we created in previous steps.
39+
# This allows us to re-edit polytomy edges when the edges_to_remove are stacked
40+
edges = list(ts_edges)
41+
if parent_id in new_edges:
42+
edges += new_edges.pop(parent_id)
43+
if parent_id in edges_to_remove_by_child:
44+
for e in edges:
45+
assert parent_id == e.parent
46+
l = -1
47+
if e.id in edge_id_remove_list:
48+
continue
49+
# NB: we go left to right along the target edges, reducing edge e as required
50+
for target_edge in edges_to_remove_by_child[parent_id]:
51+
# As we go along the target_edges, gradually split e into chunks.
52+
# If edge e is in the target_edge region, change the edge parent
53+
assert target_edge.left > l
54+
l = target_edge.left
55+
if e.left >= target_edge.right:
56+
# This target edge is entirely to the LHS of edge e, with no overlap
57+
continue
58+
elif e.right <= target_edge.left:
59+
# This target edge is entirely to the RHS of edge e with no overlap.
60+
# Since target edges are sorted by left coord, all other target edges
61+
# are to RHS too, and we are finished dealing with edge e
62+
tables.edges.append(e)
63+
e = None
64+
break
65+
else:
66+
# Edge e must overlap with current target edge somehow
67+
if e.left < target_edge.left:
68+
# Edge had region to LHS of target
69+
# Add the left hand section (change the edge right coord)
70+
tables.edges.add_row(left=e.left, right=target_edge.left, parent=e.parent, child=e.child)
71+
e = e.replace(left=target_edge.left)
72+
if e.right > target_edge.right:
73+
# Edge continues after RHS of target
74+
assert e.left < target_edge.right
75+
new_edges[target_edge.parent].append(
76+
e.replace(right=target_edge.right, parent=target_edge.parent)
77+
)
78+
e = e.replace(left=target_edge.right)
79+
else:
80+
# No more of edge e to RHS
81+
assert e.left < e.right
82+
new_edges[target_edge.parent].append(e.replace(parent=target_edge.parent))
83+
e = None
84+
break
85+
if e is not None:
86+
# Need to add any remaining regions of edge back in
87+
tables.edges.append(e)
88+
else:
89+
# NB: sanity check at top means that the oldest node will have no edges above,
90+
# so the last iteration should hit this branch
91+
for e in edges:
92+
if e.id not in edge_id_remove_list:
93+
tables.edges.append(e)
94+
assert len(new_edges) == 0
95+
tables.sort()
96+
return tables.tree_sequence()
97+
98+
def unsupported_edges(ts, per_interval=False):
99+
"""
100+
Return the internal edges that are unsupported by a mutation.
101+
If ``per_interval`` is True, each interval needs to be supported,
102+
otherwise, a mutation on an edge (even if there are multiple intervals
103+
per edge) will result in all intervals on that edge being treated
104+
as supported.
105+
"""
106+
edges_to_remove = np.ones(ts.num_edges, dtype="bool")
107+
edges_to_remove[[m.edge for m in ts.mutations()]] = False
108+
# We don't remove edges above samples
109+
edges_to_remove[np.isin(ts.edges_child, ts.samples())] = False
110+
111+
if per_interval:
112+
return np.where(edges_to_remove)[0]
113+
else:
114+
keep = (edges_to_remove == False)
115+
for p, c in zip(ts.edges_parent[keep], ts.edges_child[keep]):
116+
edges_to_remove[np.logical_and(ts.edges_parent == p, ts.edges_child == c)] = False
117+
return np.where(edges_to_remove)[0]
118+
119+
120+
class Workbook:
121+
@staticmethod
122+
def setup():
123+
display(HTML(
124+
"<style type='text/css'>" +
125+
".exercise {background-color: yellow; color: black; font-family: 'serif'; font-size: 1.2em}" +
126+
".exercise code {font-size: 0.7em}" +
127+
"</style>" +
128+
"<h4>✅ Your notebook is ready to go!</h4>" +
129+
("This notebook is not running in JupyterLite: you may need to install tskit, tszip, etc."
130+
if sys.platform != 'emscripten' else '''
131+
To clear the notebook and reset,
132+
select "Clear Browser Data" from the JupyterLite help menu.
133+
''')
134+
))
135+
136+
def node_coalescence_status(arg):
137+
"""
138+
Uses the num_children_array attribute to find nodes that represent local coalescence.
139+
See https://tskit.dev/tskit/docs/latest/python-api.html#tskit.Tree.num_children_array
140+
Returns an array of length num_nodes containing 0 if a node never has any coalescent
141+
segments, 1 if some segments of the node are coalescent and some unary, and 2 if
142+
all node segments represent a local coalescence point.
143+
"""
144+
has_unary = np.zeros(arg.num_nodes, dtype=int)
145+
has_coal = np.zeros(arg.num_nodes, dtype=int)
146+
tree = arg.first()
147+
nca = tree.num_children_array
148+
for edge_diffs in arg.edge_diffs():
149+
for e in edge_diffs.edges_out:
150+
if nca[e.parent] == 0:
151+
has_unary[e.parent] = 1
152+
elif nca[e.parent] > 1:
153+
has_coal[e.parent] = 1
154+
for e in edge_diffs.edges_in:
155+
if nca[e.parent] == 0:
156+
has_unary[e.parent] = 1
157+
elif nca[e.parent] > 1:
158+
has_coal[e.parent] = 1
159+
tree.next()
160+
return np.where(has_coal, np.where(has_unary, 1, 2), 0)
161+
162+
def remove_unsupported_edges(ts, per_interval=True):
163+
"""
164+
Remove edges from the tree sequence that are unsupported by mutations.
165+
If ``per_interval`` is True, each interval needs to be supported,
166+
otherwise, a mutation on an edge (even if there are multiple intervals
167+
per edge) will result in all intervals on that edge being treated
168+
as supported.
169+
"""
170+
edges_to_remove = unsupported_edges(ts, per_interval=per_interval)
171+
tables = remove_edges(ts, edges_to_remove).dump_tables()
172+
tables.edges.squash()
173+
return tables.tree_sequence()
174+
175+
class Workbook1(Workbook):
176+
url = "json/WhatIsAnARG/Workbook1/" # could also put a full URL here, but a local one will work offline too
177+
178+
class Workbook2(Workbook):
179+
url = "json/WhatIsAnARG/Workbook2/" # could also put a full URL here, but a local one will work offline too

0 commit comments

Comments
 (0)