|
2 | 2 |
|
3 | 3 | import pandas as pd
|
4 | 4 |
|
| 5 | +import tskit |
| 6 | + |
5 | 7 |
|
6 | 8 | """
|
7 | 9 | A tree sequence containing 3 diploid individuals with 5 sites and 5 mutations
|
8 | 10 | (one per site). The first 2 individuals are used as reference panel,
|
9 | 11 | the last one is the target individual.
|
10 | 12 | """
|
11 |
| -_toy_ts_text = """ |
12 |
| - left right parent child metadata |
13 |
| - 0.000000 1000000.000000 6 0 |
14 |
| - 0.000000 1000000.000000 6 3 |
15 |
| - 0.000000 1000000.000000 7 2 |
16 |
| - 0.000000 1000000.000000 7 5 |
17 |
| - 0.000000 1000000.000000 8 1 |
18 |
| - 0.000000 1000000.000000 8 4 |
19 |
| - 0.000000 781157.000000 9 6 |
20 |
| - 0.000000 781157.000000 9 7 |
21 |
| - 0.000000 505438.000000 10 8 |
22 |
| - 0.000000 505438.000000 10 9 |
23 |
| - 505438.000000 549484.000000 11 8 |
24 |
| - 505438.000000 549484.000000 11 9 |
25 |
| - 781157.000000 1000000.000000 12 6 |
26 |
| - 781157.000000 1000000.000000 12 7 |
27 |
| - 549484.000000 1000000.000000 13 8 |
28 |
| - 549484.000000 781157.000000 13 9 |
29 |
| - 781157.000000 1000000.000000 13 12 |
30 |
| - id flags location parents metadata |
31 |
| - 0 0 |
32 |
| - 1 0 |
33 |
| - 2 0 |
34 |
| - site node time derived_state parent metadata |
35 |
| - 0 9 unknown G -1 |
36 |
| - 1 8 unknown A -1 |
37 |
| - 2 9 unknown T -1 |
38 |
| - 3 9 unknown C -1 |
39 |
| - 4 12 unknown C -1 |
40 |
| - id is_sample time population individual metadata |
41 |
| - 0 1 0.000000 0 0 |
42 |
| - 1 1 0.000000 0 0 |
43 |
| - 2 1 0.000000 0 1 |
44 |
| - 3 1 0.000000 0 1 |
45 |
| - 4 1 0.000000 0 2 |
46 |
| - 5 1 0.000000 0 2 |
47 |
| - 6 0 0.029768 0 -1 |
48 |
| - 7 0 0.133017 0 -1 |
49 |
| - 8 0 0.223233 0 -1 |
50 |
| - 9 0 0.651586 0 -1 |
51 |
| - 10 0 0.698831 0 -1 |
52 |
| - 11 0 2.114867 0 -1 |
53 |
| - 12 0 4.322031 0 -1 |
54 |
| - 13 0 7.432311 0 -1 |
55 |
| - position ancestral_state metadata |
56 |
| - 200000.000000 A |
57 |
| - 300000.000000 C |
58 |
| - 520000.000000 G |
59 |
| - 600000.000000 T |
60 |
| - 900000.000000 A |
| 13 | +_toy_ts_nodes_text = """\ |
| 14 | +id is_sample time population individual metadata |
| 15 | +0 1 0.000000 0 0 |
| 16 | +1 1 0.000000 0 0 |
| 17 | +2 1 0.000000 0 1 |
| 18 | +3 1 0.000000 0 1 |
| 19 | +4 1 0.000000 0 2 |
| 20 | +5 1 0.000000 0 2 |
| 21 | +6 0 0.029768 0 -1 |
| 22 | +7 0 0.133017 0 -1 |
| 23 | +8 0 0.223233 0 -1 |
| 24 | +9 0 0.651586 0 -1 |
| 25 | +10 0 0.698831 0 -1 |
| 26 | +11 0 2.114867 0 -1 |
| 27 | +12 0 4.322031 0 -1 |
| 28 | +13 0 7.432311 0 -1 |
| 29 | +""" |
| 30 | + |
| 31 | +_toy_ts_edges_text = """\ |
| 32 | +left right parent child metadata |
| 33 | +0.000000 1000000.000000 6 0 |
| 34 | +0.000000 1000000.000000 6 3 |
| 35 | +0.000000 1000000.000000 7 2 |
| 36 | +0.000000 1000000.000000 7 5 |
| 37 | +0.000000 1000000.000000 8 1 |
| 38 | +0.000000 1000000.000000 8 4 |
| 39 | +0.000000 781157.000000 9 6 |
| 40 | +0.000000 781157.000000 9 7 |
| 41 | +0.000000 505438.000000 10 8 |
| 42 | +0.000000 505438.000000 10 9 |
| 43 | +505438.000000 549484.000000 11 8 |
| 44 | +505438.000000 549484.000000 11 9 |
| 45 | +781157.000000 1000000.000000 12 6 |
| 46 | +781157.000000 1000000.000000 12 7 |
| 47 | +549484.000000 1000000.000000 13 8 |
| 48 | +549484.000000 781157.000000 13 9 |
| 49 | +781157.000000 1000000.000000 13 12 |
| 50 | +""" |
| 51 | + |
| 52 | +_toy_ts_sites_text = """\ |
| 53 | +position ancestral_state metadata |
| 54 | +200000.000000 A |
| 55 | +300000.000000 C |
| 56 | +520000.000000 G |
| 57 | +600000.000000 T |
| 58 | +900000.000000 A |
| 59 | +""" |
| 60 | + |
| 61 | +_toy_ts_mutations_text = """\ |
| 62 | +site node time derived_state parent metadata |
| 63 | +0 9 unknown G -1 |
| 64 | +1 8 unknown A -1 |
| 65 | +2 9 unknown T -1 |
| 66 | +3 9 unknown C -1 |
| 67 | +4 12 unknown C -1 |
| 68 | +""" |
| 69 | + |
| 70 | +_toy_ts_individuals_text = """\ |
| 71 | +flags |
| 72 | +0 |
| 73 | +0 |
| 74 | +0 |
61 | 75 | """
|
62 | 76 |
|
| 77 | + |
| 78 | +def get_toy_ts(): |
| 79 | + """ |
| 80 | + Returns the toy tree sequence in text format above. |
| 81 | + """ |
| 82 | + ts = tskit.load_text( |
| 83 | + nodes=StringIO(_toy_ts_nodes_text), |
| 84 | + edges=StringIO(_toy_ts_edges_text), |
| 85 | + sites=StringIO(_toy_ts_sites_text), |
| 86 | + mutations=StringIO(_toy_ts_mutations_text), |
| 87 | + individuals=StringIO(_toy_ts_individuals_text), |
| 88 | + strict=False, |
| 89 | + ) |
| 90 | + return ts |
| 91 | + |
| 92 | + |
63 | 93 | """
|
64 | 94 | BEAGLE 4.1 was run on the toy data set above using default parameters.
|
65 | 95 | The following are the forward probability matrices and backward probability
|
|
141 | 171 |
|
142 | 172 | def convert_to_pd_df(matrix_text):
|
143 | 173 | """
|
144 |
| - Convert a matrix in text to a Pandas dataframe. |
| 174 | + Converts a matrix in text to a Pandas dataframe and returns it. |
145 | 175 | """
|
146 | 176 | df = pd.read_csv(StringIO(matrix_text))
|
147 | 177 | # Check that switch and non-switch probabilities sum to 1
|
|
0 commit comments