Skip to content

Commit b34a853

Browse files
committed
switched to new reference sequence: closes #147
new individual metadata schema: closes #207 test provenance: closes #217 bounds: closes #162 slim_gen: closes #172
1 parent e9d586c commit b34a853

14 files changed

+487
-502
lines changed

pyslim/_version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# coding: utf-8
22
pyslim_version = '0.600'
3-
slim_file_version = '0.6'
3+
slim_file_version = '0.7'
44
# other file versions that require no modification
5-
compatible_slim_file_versions = ['0.5', '0.6']
5+
compatible_slim_file_versions = ['0.7']

pyslim/methods.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,21 @@ def recapitate(ts,
5757
derived_names = [pop.name for pop in demography.populations]
5858
while ancestral_name in derived_names:
5959
ancestral_name = (ancestral_name + "_ancestral")
60-
ancestral_metadata = default_slim_metadata('population')
60+
ancestral_metadata = {}
6161
ancestral_metadata['slim_id'] = ts.num_populations
6262
demography.add_population(
6363
name=ancestral_name,
6464
description="ancestral population simulated by msprime",
6565
initial_size=ancestral_Ne,
6666
extra_metadata=ancestral_metadata,
67-
)
67+
)
6868
# the split has to come slightly longer ago than slim_generation,
6969
# since that's when all the linages are at, and otherwise the event
7070
# won't apply to them
7171
demography.add_population_split(
7272
np.nextafter(
73-
ts.slim_generation,
74-
2 * ts.slim_generation,
73+
ts.metadata['SLiM']['generation'],
74+
2 * ts.metadata['SLiM']['generation'],
7575
),
7676
derived=derived_names,
7777
ancestral=ancestral_name,
@@ -83,7 +83,13 @@ def recapitate(ts,
8383
initial_state = ts,
8484
demography = demography,
8585
**kwargs)
86-
return SlimTreeSequence(recap, reference_sequence=ts.reference_sequence)
86+
# msprime does not maintain reference sequence yet:
87+
# https://github.com/tskit-dev/msprime/issues/1951
88+
if ts.has_reference_sequence() and len(ts.reference_sequence.data) > 0:
89+
t = recap.dump_tables()
90+
t.reference_sequence = ts.reference_sequence
91+
recap = t.tree_sequence()
92+
return SlimTreeSequence(recap)
8793

8894

8995
def convert_alleles(ts):
@@ -113,8 +119,8 @@ def convert_alleles(ts):
113119
"""
114120
tables = ts.dump_tables()
115121
has_refseq = (
116-
hasattr(ts, 'reference_sequence')
117-
and (ts.reference_sequence is not None)
122+
ts.has_reference_sequence()
123+
and len(ts.reference_sequence.data) > 0
118124
)
119125
# unfortunately, nucleotide mutations may be stacked (e.g., substitutions
120126
# will appear this way) and they don't appear in any particular order;
@@ -152,19 +158,17 @@ def convert_alleles(ts):
152158
raise ValueError("All mutations must be nucleotide mutations.")
153159
if not has_refseq:
154160
raise ValueError("Tree sequence must have a valid reference sequence.")
155-
aa = [ts.reference_sequence[k] for k in tables.sites.position.astype('int')]
161+
aa = [ts.reference_sequence.data[k] for k in tables.sites.position.astype('int')]
156162
da = np.array(NUCLEOTIDES)[nuc_inds]
157163
tables.sites.packset_ancestral_state(aa)
158164
tables.mutations.packset_derived_state(da)
159165

160166
out = SlimTreeSequence(tables.tree_sequence())
161-
out.reference_sequence = ts.reference_sequence
162167
return out
163168

164169

165170
def generate_nucleotides(ts, reference_sequence=None, keep=True, seed=None):
166171
"""
167-
168172
Returns a modified tree sequence in which mutations have been randomly assigned nucleotides
169173
and (optionally) a reference sequence has been randomly generated.
170174
@@ -206,6 +210,7 @@ def generate_nucleotides(ts, reference_sequence=None, keep=True, seed=None):
206210
raise ValueError("Reference sequence must be a string of A, C, G, and T only.")
207211

208212
tables = ts.dump_tables()
213+
tables.reference_sequence.data = reference_sequence
209214
tables.mutations.clear()
210215
sets = [[k for k in range(4) if k != i] for i in range(4)]
211216
states = np.full((ts.num_mutations,), -1)
@@ -242,5 +247,4 @@ def generate_nucleotides(ts, reference_sequence=None, keep=True, seed=None):
242247

243248
return SlimTreeSequence(
244249
tables.tree_sequence(),
245-
reference_sequence=reference_sequence
246250
)

pyslim/slim_metadata.py

Lines changed: 227 additions & 173 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)