@@ -100,7 +100,7 @@ Here is python code that will write out a makefile from the information in ``df`
100100f = open("sims.make", "w")
101101print(f"all: {' '.join(df.outfile.to_list())}\n", file=f)
102102for i, row in df.iterrows():
103- print(f"{row.outfile}: {row.infile}", file=f)
103+ print(f"{row.outfile}: {row.infile} phylo_bgs.slim\n ", file=f)
104104 print(f"\tslim -d \"infile='{row.infile}'\" -d popsize={row.popsize} -d num_gens={row.edgelen} -d \"outfile='{row.child}.trees'\" phylo_bgs.slim\n", file=f)
105105f.close()
106106```
@@ -117,13 +117,15 @@ cat sims.make
117117With the makefile in hand,
118118we can now run make, specifying the maximum number of simulations
119119to be run simultaneously the `` -j `` .
120-
121- ``` python
120+ (Click on the "+" icon to see SLiM's output.)
121+ ``` {code-cell}
122+ :tags: ["hide-output"]
122123%%bash
123124make -f sims.make -j 3
124125```
125126
126- ``` {dropdown} Alternative to using make
127+
128+ ``` {dropdown} Click here for how to use python instead of make
127129
128130You would have to write a recursion over the branches in your tree (starting
129131from the root) and then parallelize the runs of sister branches somehow.
@@ -199,42 +201,46 @@ recursive function that goes through our data frame with the phylogeny and
199201returns a dictionary with the merged tree sequences from the tip to the root.
200202
201203``` {code-cell}
202- def union_recursion(df, focal="root", merged=None):
203- if merged is None:
204- merged = {
205- row.child : (
206- tskit.load(row.outfile),
207- row.edgelen,
208- [row.child]
209- ) for i, row in df[df.is_leaf].iterrows()
210- }
211- print(f"Going in: {focal}")
212- childs = df[df.parent == focal]
213- assert (len(childs) == 2) or (len(childs) == 0)
214- if len(childs) == 2:
215- for i, row in childs.iterrows():
216- merged = union_recursion(df, row.child, merged)
217- cname1 = childs.iloc[0,]["child"]
218- cname2 = childs.iloc[1,]["child"]
219- split_time = merged[cname1][1]
220- assert split_time == merged[cname2][1] # ultrametric
221- print(f'Unioning: {childs["child"].to_list()}, Split time: {split_time}')
222- ts1 = merged[cname1][0]
223- ts2 = merged[cname2][0]
224- node_map = match_nodes(ts2, ts1, split_time)
225- tsu = ts1.union(ts2, node_map, check_shared_equality=True)
204+ merged = {
205+ row.child : {
206+ "ts": tskit.load(row.outfile),
207+ "depth": row.edgelen,
208+ "children": [row.child]
209+ }
210+ for i, row in df[df.is_leaf].iterrows()
211+ }
212+
213+ def union_children(parent, df, merged):
214+ print(f"Going in: {parent}")
215+ child_rows = df[df.parent == parent]
216+ assert (len(child_rows) == 2) or (len(childs) == 0)
217+ if len(child_rows) == 2:
218+ children = [row.child for _, row in child_rows.iterrows()]
219+ for child in children:
220+ if child not in merged:
221+ union_children(child, df, merged)
222+ split_time = merged[children[0]]["depth"]
223+ assert split_time == merged[children[1]]["depth"] # ultrametric
224+ print(f'Unioning: {children}, Split time: {split_time}')
225+ ts0 = merged[children[0]]["ts"]
226+ ts1 = merged[children[1]]["ts"]
227+ node_map = match_nodes(ts1, ts0, split_time)
228+ tsu = pyslim.SlimTreeSequence(
229+ ts0.union(ts1, node_map, check_shared_equality=True)
230+ )
226231 # the time from tip to start of simulation is split_time plus the
227232 # length of the edge
228- merged[focal] = (
229- tsu,
230- split_time + df[df.child==focal].edgelen.item() ,
231- merged[cname1][2] + merged[cname2][2]
232- )
233- return merged
233+ parent_edgelength = df[df.child==parent].edgelen.item()
234+ merged[parent] = {
235+ "ts": tsu ,
236+ "depth": split_time + parent_edgelength,
237+ "children": merged[children[0]]["children"] + merged[children[1]]["children"]
238+ }
234239
235- merged = union_recursion(df )
240+ union_children("root", df, merged )
236241# union of all three species tree sequences is in the root.
237- tsu, _, pops = merged["root"]
242+ tsu = merged["root"]["ts"]
243+ pops = merged["root"]["children"]
238244```
239245
240246A slightly tricky thing we had to do there was to make sure we kept track of
@@ -245,19 +251,25 @@ contains the names of the populations
245251in the order in which they are indexed in the resulting tree sequence `` tsu `` .
246252(Since the population in SLiM is `` p1 `` , the zero-th population will be unused.)
247253
248- Let's make sure we have the right number of samples in each of the populations
249- we specified.
254+ Let's make sure we have the right number of present-day samples
255+ in each of the populations. To do this we need to make sure to get
256+ "alive" samples, because recall that we have saved the state of the
257+ population at each species split time.
250258
251259``` {code-cell}
260+ alive = np.where(np.isclose(tsu.tables.nodes.time, 0))[0]
252261for i, name in enumerate(pops):
253- n_samples = len(tsu.samples(i + 1)) // 2
262+ pop_samples = tsu.samples(i + 1)
263+ n_samples = sum(np.isin(pop_samples, alive)) // 2
254264 print(f"Union-ed tree sequence has {n_samples} samples in population {name},\n"
255265 f"\tand we specified {df[df.child==name].popsize.item()} individuals in our simulations.")
266+ assert n_samples == df[df.child==name].popsize.item()
256267```
257268
258269Finally, we should recapitate the result,
259270in case some trees on the root branch haven't coalesced,
260271and write out the result:
272+
261273``` {code-cell}
262274tsu = pyslim.SlimTreeSequence(tsu)
263275tsu = tsu.recapitate(recombination_rate=1e-8)
0 commit comments