tskit-dev
diff --git a/‎CHANGELOG.rst‎
Lines changed: 8 additions & 0 deletions b/‎CHANGELOG.rst‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/_static/pedigree01.png‎
0 Bytes b/‎docs/_static/pedigree01.png‎
0 Bytes
diff --git a/‎docs/metadata.md‎
Lines changed: 16 additions & 13 deletions b/‎docs/metadata.md‎
Lines changed: 16 additions & 13 deletions
diff --git a/‎docs/phylo_bgs.slim‎
Lines changed: 12 additions & 5 deletions b/‎docs/phylo_bgs.slim‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎docs/python_api.md‎
Lines changed: 13 additions & 2 deletions b/‎docs/python_api.md‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎docs/tutorial.md‎
Lines changed: 44 additions & 22 deletions b/‎docs/tutorial.md‎
Lines changed: 44 additions & 22 deletions
diff --git a/‎docs/vignette_coalescent_diversity.md‎
Lines changed: 3 additions & 7 deletions b/‎docs/vignette_coalescent_diversity.md‎
Lines changed: 3 additions & 7 deletions
@@ -2,6 +2,14 @@
 [UPCOMING.X.X] - XXXX-XX-XX
 ***************************
 
+**Breaking changes**:
+
+- 
+
+**New features**:
+
+- Added `pyslim.population_size( )` to compute an array giving numbers of
+    individuals across a grid of space and time bins. ({user}giliapatterson)
 
 ********************
 [0.600] - 2021-02-24
 
@@ -155,7 +155,7 @@ to SLiM time as follows:
 
 ```{code-cell}
 def slim_time(ts, time, stage):
-  slim_time = ts.slim_generation - time
+  slim_time = ts.metadata["SLiM"]["generation"] - time
   if ts.metadata['SLiM']['model_type'] == "WF":
     if (ts.metadata['SLiM']['stage'] == "early"
         and stage == "late"):
@@ -211,21 +211,24 @@ could be used to set spatial bounds on an annotated msprime simulation, for inst
 To modify the metadata that ``pyslim`` has introduced into
 the tree sequence produced by a coalescent simulation,
 or the metadata in a SLiM-produced tree sequence,
-what we do is (a) extract the metadata (as a list of dicts),
-(b) modify them, and then (c) write them back into the tables.
+we need to edit the TableCollection that forms the editable data behind the tree sequence.
 For instance, to set the ages of the individuals in the tree sequence to random numbers between 1 and 4,
-and write out the resulting tree sequence:
+we will extract a copy of the underlying tables, clear it,
+and then iterate over the individuals in the tree sequence,
+as we go re-inserting them into the tables
+after replacing their metadata with a modified version:
 
 ```{code-cell}
-tables = ts.tables
-ind_md = [ind.metadata for ind in tables.individuals]
-for md in ind_md:
-   md["age"] = random.choice([1,2,3,4])
+tables = ts.dump_tables()
+tables.individuals.clear()
+for ind in ts.individuals():
+    md = ind.metadata
+    md["age"] = random.choice([1,2,3,4])
+    _ = tables.individuals.append(
+        ind.replace(metadata=md)
+    )
 
-ims = tables.individuals.metadata_schema
-tables.individuals.packset_metadata(
-  [ims.validate_and_encode_row(md) for md in ind_md])
-mod_ts = pyslim.load_tables(tables)
+mod_ts = tables.tree_sequence()
 
 # check that it worked:
 print("First ten ages:", [mod_ts.individual(i).metadata["age"] for i in range(10)])
@@ -342,7 +345,7 @@ These methods would set the metadata column of a table -
 for instance, if ``metadata`` is a list of NodeMetadata objects, then
 ``annotate_node_metadata(tables, metadata)`` would modify ``tables.nodes`` in place
 to contain the (encoded) metadata in the list ``metadata``.
-Now, this would be done as follows (where now ``metadata`` is a list of metadata dicts):
+Now, this could be done as follows (where now ``metadata`` is a list of metadata dicts):
 
 ```{code-cell}
 metadata = [ {'slim_id': k, 'is_null': False, 'genome_type': 0}
 
@@ -14,16 +14,23 @@ initialize()
     initializeRecombinationRate(1e-9);
 }
 
-1 early() {
+1 late() {
     // if no input tree sequence is provided, then start a subpopulation
     if (infile == "") {
-        sim.addSubpop("p1", popsize);
+        p = sim.addSubpop("p1", popsize);
     } else {
+        // relaoding must happen in late()
         sim.readFromPopulationFile(infile);
-        p1.setSubpopulationSize(popsize);
+        parent = sim.subpopulations[0];
+        p = sim.addSubpopSplit(max(sim.subpopulations.id) + 1, popsize, parent);
+        parent.setSubpopulationSize(0);
     }
-    finalgen = num_gens + sim.generation - 1;
-    // scheduling the end of the simulation
+    p.name = popname;
+}
+
+// schedule the end of the simulation
+1 late() {
+    finalgen = num_gens + sim.generation;
     sim.rescheduleScriptBlock(s0, generations=finalgen);
 }
 
 
@@ -34,9 +34,9 @@ kernelspec:
 This page provides detailed documentation for the methods and classes
 available in pyslim.
 
-## Methods
+## Editing or adding to tree sequences
 
-pyslim provides tools for transforming tree sequences:
+``pyslim`` provides tools for transforming tree sequences:
 
 
 ```{eval-rst}
@@ -45,8 +45,19 @@ pyslim provides tools for transforming tree sequences:
   recapitate
   convert_alleles
   generate_nucleotides
+  annotate_defaults
+  update_tables
 ```
 
+## Summarizing tree sequences
+
+Additionally, ``pyslim`` contains the following summary methods:
+
+```{eval-rst}
+.. autosummary::
+
+  population_size
+```
 
 
 ## Additions to the tree sequence
 
@@ -108,11 +108,13 @@ and so our simulation would have less genetic variation than it should have
 Doing this is as simple as:
 
 ```{code-cell}
-orig_ts = pyslim.load("example_sim.trees")
-rts = orig_ts.recapitate(
+orig_ts = tskit.load("example_sim.trees")
+rts = pyslim.recapitate(orig_ts,
             recombination_rate=1e-8,
-            Ne=200, random_seed=5)
+            ancestral_Ne=200, random_seed=5)
 ```
+The warning is harmless; it is reminding us to think about generation time
+when recapitating a nonWF simulation (a topic we'll deal with later).
 
 We can check that this worked as expected, by verifying that after recapitation
 all trees have only one root:
@@ -124,14 +126,14 @@ print(f"Maximum number of roots before recapitation: {orig_max_roots}\n"
       f"After recapitation: {recap_max_roots}")
 ```
 
-The {meth}`.SlimTreeSequence.recapitate` method
+The {func}`.recapitate` method
 is just a thin wrapper around {func}`msprime.sim_ancestry`,
 and you need to set up demography explicitly - for instance, in the example above
 we've simulated from an ancestral population of ``Ne=200`` diploids.
 If you have more than one population,
 you must set migration rates or else coalescence will never happen
 (see {ref}`sec_recapitate_with_migration` for an example,
-and {meth}`.SlimTreeSequence.recapitate` for more).
+and {func}`.recapitate` for more).
 
 
 #### Recapitation with a nonuniform recombination map
@@ -223,9 +225,9 @@ positions[-1] += 1
 assert positions[-1] == orig_ts.sequence_length
 
 recomb_map = msprime.RateMap(position=positions, rate=rates)
-rts = orig_ts.recapitate(
-                recombination_map=recomb_map,
-                Ne=200, random_seed=7)
+rts = pyslim.recapitate(orig_ts,
+                recombination_rate=recomb_map,
+                ancestral_Ne=200, random_seed=7)
 assert(max([t.num_roots for t in rts.trees()]) == 1)
 ```
 (As before, you should *not* usually explicitly set
@@ -301,7 +303,7 @@ which would be inconsistent with the SLiM simulation.
 
 After recapitation,
 simplification to the history of 100 individuals alive today
-can be done with the {meth}`.SlimTreeSequence.simplify` method:
+can be done with the {meth}`tskit.TreeSequence.simplify` method:
 
 ```{code-cell}
 import numpy as np
@@ -423,7 +425,9 @@ and write their SNPs to a VCF is:
 ```{code-cell}
 np.random.seed(1)
 keep_indivs = np.random.choice(alive, 100, replace=False)
-ts = pyslim.SlimTreeSequence(msprime.mutate(orig_ts, rate=1e-8, random_seed=1))
+ts = pyslim.SlimTreeSequence(
+    msprime.sim_mutations(orig_ts, rate=1e-8, random_seed=1)
+)
 with open("example_snps.vcf", "w") as vcffile:
     ts.write_vcf(vcffile, individuals=keep_indivs)
 ```
@@ -452,7 +456,9 @@ keep_nodes = []
 for i in keep_indivs:
     keep_nodes.extend(orig_ts.individual(i).nodes)
 sts = rts.simplify(keep_nodes)
-ts = pyslim.SlimTreeSequence(msprime.mutate(sts, rate=1e-8, random_seed=1))
+ts = pyslim.SlimTreeSequence(
+    msprime.sim_mutations(sts, rate=1e-8, random_seed=1)
+)
 ```
 Individuals are retained by simplify if any of their nodes are,
 so we would get an alive individual without sample nodes if, for instance,
@@ -533,27 +539,43 @@ so there is an empty "population 0" in a SLiM-produced tree sequence.
 
 (sec_recapitate_with_migration)=
 
-## Recapitation with more than one population
+## Recapitation with migration between more than one population
 
 Following on the last example,
 let's recapitate and mutate the tree sequence.
-Recapitation takes a bit more thought, because we have to specify a migration matrix
-(or else it will run forever, unable to coalesce).
+Recall that this recipe had two populations, ``p1`` and ``p2``,
+each of size 1000.
+Recapitation takes a bit more thought, because if the two populations stay separate,
+it will run forever, unable to coalesce.
+By default, :func:`.recapitate` *merges* the two populations into a single
+one of size ``ancestral_Ne``.
+But, if we'd like them to stay separate, we need to inclue migration between them.
+Here's how we set up the demography using msprime's tools:
 
 ```{code-cell}
-pop_configs = [msprime.PopulationConfiguration(initial_size=1000)
-              for _ in range(orig_ts.num_populations)]
-rts = orig_ts.recapitate(population_configurations=pop_configs,
-                        migration_matrix=[[0.0, 0.0, 0.0],
-                                          [0.0, 0.0, 0.1],
-                                          [0.0, 0.1, 0.0]],
+demography = msprime.Demography.from_tree_sequence(orig_ts)
+for pop in demography.populations:
+    # must set their effective population sizes
+    pop.initial_size = 1000
+
+demography.add_migration_rate_change(
+    time=orig_ts.metadata['SLiM']['generation'],
+    rate=0.1, source="p1", dest="p2",
+)
+demography.add_migration_rate_change(
+    time=orig_ts.metadata['SLiM']['generation'],
+    rate=0.1, source="p2", dest="p1",
+)
+rts = pyslim.recapitate(orig_ts, demography=demography,
                         recombination_rate=1e-8,
-                        random_seed=4)
+                        random_seed=4
+)
 ts = pyslim.SlimTreeSequence(
         msprime.sim_mutations(
                     rts, rate=1e-8,
                     model=msprime.SLiMMutationModel(type=0),
-                    random_seed=7))
+                    random_seed=7)
+)
 ```
 
 Again, there are *three* populations because SLiM starts counting at 1;
 
@@ -201,13 +201,9 @@ for m in ots.mutations():
      if sid not in mut_map:
         mut_map[sid] = np.random.exponential(scale=0.04)
      md["selection_coeff"] = mut_map[sid]
-  _ = tables.mutations.add_row(
-     site=m.site,
-     node=m.node,
-     time=m.time,
-     derived_state=m.derived_state,
-     parent=m.parent,
-     metadata={"mutation_list": md_list})
+  _ = tables.mutations.append(
+          m.replace(metadata={"mutation_list": md_list})
+  )
 
 # check we didn't mess anything up
 assert tables.mutations.num_rows == ots.num_mutations