Skip to content

Commit a7f0e77

Browse files
szhanmergify[bot]
authored andcommitted
Add migrations argument to load_text and dump_text, and add parse_migrations.
1 parent e99c448 commit a7f0e77

File tree

5 files changed

+204
-7
lines changed

5 files changed

+204
-7
lines changed

docs/python-api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,7 @@ Move some or all of these examples into a suitable alternative chapter.
10411041
parse_mutations
10421042
parse_individuals
10431043
parse_populations
1044+
parse_migrations
10441045
pack_strings
10451046
unpack_strings
10461047
pack_bytes
@@ -1143,6 +1144,7 @@ The following constants are used throughout the `tskit` API.
11431144
.. autofunction:: parse_mutations
11441145
.. autofunction:: parse_nodes
11451146
.. autofunction:: parse_populations
1147+
.. autofunction:: parse_migrations
11461148
.. autofunction:: parse_sites
11471149
.. autofunction:: random_nucleotides
11481150
.. autofunction:: register_metadata_codec

python/tests/test_highlevel.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3093,6 +3093,38 @@ def convert(v):
30933093
else:
30943094
assert repr(population.metadata) == splits[1]
30953095

3096+
def verify_migrations_format(self, ts, migrations_file, precision, base64_metadata):
3097+
"""
3098+
Verifies that the migrations we output have the correct form.
3099+
"""
3100+
3101+
def convert(v):
3102+
return "{:.{}f}".format(v, precision)
3103+
3104+
output_migrations = migrations_file.read().splitlines()
3105+
assert len(output_migrations) - 1 == ts.num_migrations
3106+
assert list(output_migrations[0].split()) == [
3107+
"left",
3108+
"right",
3109+
"node",
3110+
"source",
3111+
"dest",
3112+
"time",
3113+
"metadata",
3114+
]
3115+
for migration, line in zip(ts.migrations(), output_migrations[1:]):
3116+
splits = line.split("\t")
3117+
assert str(migration.left) == splits[0]
3118+
assert str(migration.right) == splits[1]
3119+
assert str(migration.node) == splits[2]
3120+
assert str(migration.source) == splits[3]
3121+
assert str(migration.dest) == splits[4]
3122+
assert str(migration.time) == splits[5]
3123+
if isinstance(migration.metadata, bytes) and base64_metadata:
3124+
assert tests.base64_encode(migration.metadata) == splits[6]
3125+
else:
3126+
assert repr(migration.metadata) == splits[6]
3127+
30963128
@pytest.mark.parametrize(("precision", "base64_metadata"), [(2, True), (7, False)])
30973129
def test_output_format(self, precision, base64_metadata):
30983130
for ts in get_example_tree_sequences():
@@ -3102,6 +3134,7 @@ def test_output_format(self, precision, base64_metadata):
31023134
mutations_file = io.StringIO()
31033135
individuals_file = io.StringIO()
31043136
populations_file = io.StringIO()
3137+
migrations_file = io.StringIO()
31053138
provenances_file = io.StringIO()
31063139
ts.dump_text(
31073140
nodes=nodes_file,
@@ -3110,6 +3143,7 @@ def test_output_format(self, precision, base64_metadata):
31103143
mutations=mutations_file,
31113144
individuals=individuals_file,
31123145
populations=populations_file,
3146+
migrations=migrations_file,
31133147
provenances=provenances_file,
31143148
precision=precision,
31153149
base64_metadata=base64_metadata,
@@ -3120,6 +3154,7 @@ def test_output_format(self, precision, base64_metadata):
31203154
mutations_file.seek(0)
31213155
individuals_file.seek(0)
31223156
populations_file.seek(0)
3157+
migrations_file.seek(0)
31233158
self.verify_nodes_format(ts, nodes_file, precision, base64_metadata)
31243159
self.verify_edges_format(ts, edges_file, precision, base64_metadata)
31253160
self.verify_sites_format(ts, sites_file, precision, base64_metadata)
@@ -3130,6 +3165,9 @@ def test_output_format(self, precision, base64_metadata):
31303165
self.verify_populations_format(
31313166
ts, populations_file, precision, base64_metadata
31323167
)
3168+
self.verify_migrations_format(
3169+
ts, migrations_file, precision, base64_metadata
3170+
)
31333171

31343172
def verify_approximate_equality(self, ts1, ts2):
31353173
"""
@@ -3143,6 +3181,7 @@ def verify_approximate_equality(self, ts1, ts2):
31433181
assert ts1.num_sites == ts2.num_sites
31443182
assert ts1.num_mutations == ts2.num_mutations
31453183
assert ts1.num_populations == ts2.num_populations
3184+
assert ts1.num_migrations == ts2.num_migrations
31463185

31473186
checked = 0
31483187
for n1, n2 in zip(ts1.nodes(), ts2.nodes()):
@@ -3182,6 +3221,18 @@ def verify_approximate_equality(self, ts1, ts2):
31823221
assert s1.metadata == s2.metadata
31833222
assert ts1.num_mutations == checked
31843223

3224+
checked = 0
3225+
for s1, s2 in zip(ts1.migrations(), ts2.migrations()):
3226+
checked += 1
3227+
assert s1.left == s2.left
3228+
assert s1.right == s2.right
3229+
assert s1.node == s2.node
3230+
assert s1.source == s2.source
3231+
assert s1.dest == s2.dest
3232+
assert s1.time == s2.time
3233+
assert s1.metadata == s2.metadata
3234+
assert ts1.num_migrations == checked
3235+
31853236
# Check the trees
31863237
check = 0
31873238
for t1, t2 in zip(ts1.trees(), ts2.trees()):
@@ -3199,13 +3250,15 @@ def test_text_record_round_trip(self):
31993250
mutations_file = io.StringIO()
32003251
individuals_file = io.StringIO()
32013252
populations_file = io.StringIO()
3253+
migrations_file = io.StringIO()
32023254
ts1.dump_text(
32033255
nodes=nodes_file,
32043256
edges=edges_file,
32053257
sites=sites_file,
32063258
mutations=mutations_file,
32073259
individuals=individuals_file,
32083260
populations=populations_file,
3261+
migrations=migrations_file,
32093262
precision=16,
32103263
)
32113264
nodes_file.seek(0)
@@ -3214,13 +3267,15 @@ def test_text_record_round_trip(self):
32143267
mutations_file.seek(0)
32153268
individuals_file.seek(0)
32163269
populations_file.seek(0)
3270+
migrations_file.seek(0)
32173271
ts2 = tskit.load_text(
32183272
nodes=nodes_file,
32193273
edges=edges_file,
32203274
sites=sites_file,
32213275
mutations=mutations_file,
32223276
individuals=individuals_file,
32233277
populations=populations_file,
3278+
migrations=migrations_file,
32243279
sequence_length=ts1.sequence_length,
32253280
strict=True,
32263281
)
@@ -3231,31 +3286,36 @@ def test_empty_files(self):
32313286
edges_file = io.StringIO("left\tright\tparent\tchild\n")
32323287
sites_file = io.StringIO("position\tancestral_state\n")
32333288
mutations_file = io.StringIO("site\tnode\tderived_state\n")
3289+
migrations_file = io.StringIO("left\tright\tnode\tsource\tdest\ttime\n")
32343290
with pytest.raises(_tskit.LibraryError):
32353291
tskit.load_text(
32363292
nodes=nodes_file,
32373293
edges=edges_file,
32383294
sites=sites_file,
32393295
mutations=mutations_file,
3296+
migrations=migrations_file,
32403297
)
32413298

32423299
def test_empty_files_sequence_length(self):
32433300
nodes_file = io.StringIO("is_sample\ttime\n")
32443301
edges_file = io.StringIO("left\tright\tparent\tchild\n")
32453302
sites_file = io.StringIO("position\tancestral_state\n")
32463303
mutations_file = io.StringIO("site\tnode\tderived_state\n")
3304+
migrations_file = io.StringIO("left\tright\tnode\tsource\tdest\ttime\n")
32473305
ts = tskit.load_text(
32483306
nodes=nodes_file,
32493307
edges=edges_file,
32503308
sites=sites_file,
32513309
mutations=mutations_file,
3310+
migrations=migrations_file,
32523311
sequence_length=100,
32533312
)
32543313
assert ts.sequence_length == 100
32553314
assert ts.num_nodes == 0
32563315
assert ts.num_edges == 0
32573316
assert ts.num_sites == 0
32583317
assert ts.num_edges == 0
3318+
assert ts.num_migrations == 0
32593319

32603320
def test_load_text_no_populations(self):
32613321
nodes_file = io.StringIO("is_sample\ttime\tpopulation\n1\t0\t2\n")

python/tests/test_metadata.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,23 @@ def test_populations(self):
264264
for a, b in zip(expected, p):
265265
assert a.encode("utf8") == b.metadata
266266

267+
@pytest.mark.parametrize(
268+
"base64_metadata,expected", [(True, ["pop", "gen"]), (False, ["cG9w", "Z2Vu"])]
269+
)
270+
def test_migrations(self, base64_metadata, expected):
271+
migrations = io.StringIO(
272+
"""\
273+
left right node source dest time metadata
274+
10 100 0 3 4 123.0 cG9w
275+
150 360 1 1 2 307.0 Z2Vu
276+
"""
277+
)
278+
m = tskit.parse_migrations(
279+
migrations, strict=False, encoding="utf8", base64_metadata=base64_metadata
280+
)
281+
for a, b in zip(expected, m):
282+
assert a.encode("utf8") == b.metadata
283+
267284

268285
class TestMetadataModule:
269286
"""

python/tskit/text_formats.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MIT License
22
#
3-
# Copyright (c) 2021 Tskit Developers
3+
# Copyright (c) 2021-2022 Tskit Developers
44
#
55
# Permission is hereby granted, free of charge, to any person obtaining a copy
66
# of this software and associated documentation files (the "Software"), to deal
@@ -261,6 +261,7 @@ def dump_text(
261261
mutations,
262262
individuals,
263263
populations,
264+
migrations,
264265
provenances,
265266
precision,
266267
encoding,
@@ -396,6 +397,39 @@ def dump_text(
396397
row = ("{id}\t" "{metadata}").format(id=population.id, metadata=metadata)
397398
print(row, file=populations)
398399

400+
if migrations is not None:
401+
print(
402+
"left",
403+
"right",
404+
"node",
405+
"source",
406+
"dest",
407+
"time",
408+
"metadata",
409+
sep="\t",
410+
file=migrations,
411+
)
412+
for migration in ts.migrations():
413+
metadata = text_metadata(base64_metadata, encoding, migration)
414+
row = (
415+
"{left}\t"
416+
"{right}\t"
417+
"{node}\t"
418+
"{source}\t"
419+
"{dest}\t"
420+
"{time}\t"
421+
"{metadata}\t"
422+
).format(
423+
left=migration.left,
424+
right=migration.right,
425+
node=migration.node,
426+
source=migration.source,
427+
dest=migration.dest,
428+
time=migration.time,
429+
metadata=metadata,
430+
)
431+
print(row, file=migrations)
432+
399433
if provenances is not None:
400434
print("id", "timestamp", "record", sep="\t", file=provenances)
401435
for provenance in ts.provenances():

0 commit comments

Comments
 (0)