Skip to content

Commit f8e4253

Browse files
committed
Load tree sequence from text
1 parent ed95cac commit f8e4253

File tree

1 file changed

+81
-51
lines changed

1 file changed

+81
-51
lines changed

python/tests/test_imputation.py

Lines changed: 81 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,64 +2,94 @@
22

33
import pandas as pd
44

5+
import tskit
6+
57

68
"""
79
A tree sequence containing 3 diploid individuals with 5 sites and 5 mutations
810
(one per site). The first 2 individuals are used as reference panel,
911
the last one is the target individual.
1012
"""
11-
_toy_ts_text = """
12-
left right parent child metadata
13-
0.000000 1000000.000000 6 0
14-
0.000000 1000000.000000 6 3
15-
0.000000 1000000.000000 7 2
16-
0.000000 1000000.000000 7 5
17-
0.000000 1000000.000000 8 1
18-
0.000000 1000000.000000 8 4
19-
0.000000 781157.000000 9 6
20-
0.000000 781157.000000 9 7
21-
0.000000 505438.000000 10 8
22-
0.000000 505438.000000 10 9
23-
505438.000000 549484.000000 11 8
24-
505438.000000 549484.000000 11 9
25-
781157.000000 1000000.000000 12 6
26-
781157.000000 1000000.000000 12 7
27-
549484.000000 1000000.000000 13 8
28-
549484.000000 781157.000000 13 9
29-
781157.000000 1000000.000000 13 12
30-
id flags location parents metadata
31-
0 0
32-
1 0
33-
2 0
34-
site node time derived_state parent metadata
35-
0 9 unknown G -1
36-
1 8 unknown A -1
37-
2 9 unknown T -1
38-
3 9 unknown C -1
39-
4 12 unknown C -1
40-
id is_sample time population individual metadata
41-
0 1 0.000000 0 0
42-
1 1 0.000000 0 0
43-
2 1 0.000000 0 1
44-
3 1 0.000000 0 1
45-
4 1 0.000000 0 2
46-
5 1 0.000000 0 2
47-
6 0 0.029768 0 -1
48-
7 0 0.133017 0 -1
49-
8 0 0.223233 0 -1
50-
9 0 0.651586 0 -1
51-
10 0 0.698831 0 -1
52-
11 0 2.114867 0 -1
53-
12 0 4.322031 0 -1
54-
13 0 7.432311 0 -1
55-
position ancestral_state metadata
56-
200000.000000 A
57-
300000.000000 C
58-
520000.000000 G
59-
600000.000000 T
60-
900000.000000 A
13+
_toy_ts_nodes_text = """\
14+
id is_sample time population individual metadata
15+
0 1 0.000000 0 0
16+
1 1 0.000000 0 0
17+
2 1 0.000000 0 1
18+
3 1 0.000000 0 1
19+
4 1 0.000000 0 2
20+
5 1 0.000000 0 2
21+
6 0 0.029768 0 -1
22+
7 0 0.133017 0 -1
23+
8 0 0.223233 0 -1
24+
9 0 0.651586 0 -1
25+
10 0 0.698831 0 -1
26+
11 0 2.114867 0 -1
27+
12 0 4.322031 0 -1
28+
13 0 7.432311 0 -1
29+
"""
30+
31+
_toy_ts_edges_text = """\
32+
left right parent child metadata
33+
0.000000 1000000.000000 6 0
34+
0.000000 1000000.000000 6 3
35+
0.000000 1000000.000000 7 2
36+
0.000000 1000000.000000 7 5
37+
0.000000 1000000.000000 8 1
38+
0.000000 1000000.000000 8 4
39+
0.000000 781157.000000 9 6
40+
0.000000 781157.000000 9 7
41+
0.000000 505438.000000 10 8
42+
0.000000 505438.000000 10 9
43+
505438.000000 549484.000000 11 8
44+
505438.000000 549484.000000 11 9
45+
781157.000000 1000000.000000 12 6
46+
781157.000000 1000000.000000 12 7
47+
549484.000000 1000000.000000 13 8
48+
549484.000000 781157.000000 13 9
49+
781157.000000 1000000.000000 13 12
50+
"""
51+
52+
_toy_ts_sites_text = """\
53+
position ancestral_state metadata
54+
200000.000000 A
55+
300000.000000 C
56+
520000.000000 G
57+
600000.000000 T
58+
900000.000000 A
59+
"""
60+
61+
_toy_ts_mutations_text = """\
62+
site node time derived_state parent metadata
63+
0 9 unknown G -1
64+
1 8 unknown A -1
65+
2 9 unknown T -1
66+
3 9 unknown C -1
67+
4 12 unknown C -1
68+
"""
69+
70+
_toy_ts_individuals_text = """\
71+
flags
72+
0
73+
0
74+
0
6175
"""
6276

77+
78+
def get_toy_ts():
79+
"""
80+
Returns the toy tree sequence in text format above.
81+
"""
82+
ts = tskit.load_text(
83+
nodes=StringIO(_toy_ts_nodes_text),
84+
edges=StringIO(_toy_ts_edges_text),
85+
sites=StringIO(_toy_ts_sites_text),
86+
mutations=StringIO(_toy_ts_mutations_text),
87+
individuals=StringIO(_toy_ts_individuals_text),
88+
strict=False,
89+
)
90+
return ts
91+
92+
6393
"""
6494
BEAGLE 4.1 was run on the toy data set above using default parameters.
6595
The following are the forward probability matrices and backward probability
@@ -141,7 +171,7 @@
141171

142172
def convert_to_pd_df(matrix_text):
143173
"""
144-
Convert a matrix in text to a Pandas dataframe.
174+
Converts a matrix in text to a Pandas dataframe and returns it.
145175
"""
146176
df = pd.read_csv(StringIO(matrix_text))
147177
# Check that switch and non-switch probabilities sum to 1

0 commit comments

Comments
 (0)