@@ -57,21 +57,21 @@ def recapitate(ts,
5757 derived_names = [pop .name for pop in demography .populations ]
5858 while ancestral_name in derived_names :
5959 ancestral_name = (ancestral_name + "_ancestral" )
60- ancestral_metadata = default_slim_metadata ( 'population' )
60+ ancestral_metadata = {}
6161 ancestral_metadata ['slim_id' ] = ts .num_populations
6262 demography .add_population (
6363 name = ancestral_name ,
6464 description = "ancestral population simulated by msprime" ,
6565 initial_size = ancestral_Ne ,
6666 extra_metadata = ancestral_metadata ,
67- )
67+ )
6868 # the split has to come slightly longer ago than slim_generation,
6969 # since that's when all the linages are at, and otherwise the event
7070 # won't apply to them
7171 demography .add_population_split (
7272 np .nextafter (
73- ts .slim_generation ,
74- 2 * ts .slim_generation ,
73+ ts .metadata [ 'SLiM' ][ 'generation' ] ,
74+ 2 * ts .metadata [ 'SLiM' ][ 'generation' ] ,
7575 ),
7676 derived = derived_names ,
7777 ancestral = ancestral_name ,
@@ -83,7 +83,13 @@ def recapitate(ts,
8383 initial_state = ts ,
8484 demography = demography ,
8585 ** kwargs )
86- return SlimTreeSequence (recap , reference_sequence = ts .reference_sequence )
86+ # msprime does not maintain reference sequence yet:
87+ # https://github.com/tskit-dev/msprime/issues/1951
88+ if ts .has_reference_sequence () and len (ts .reference_sequence .data ) > 0 :
89+ t = recap .dump_tables ()
90+ t .reference_sequence = ts .reference_sequence
91+ recap = t .tree_sequence ()
92+ return SlimTreeSequence (recap )
8793
8894
8995def convert_alleles (ts ):
@@ -113,8 +119,8 @@ def convert_alleles(ts):
113119 """
114120 tables = ts .dump_tables ()
115121 has_refseq = (
116- hasattr ( ts , 'reference_sequence' )
117- and (ts .reference_sequence is not None )
122+ ts . has_reference_sequence ( )
123+ and len (ts .reference_sequence . data ) > 0
118124 )
119125 # unfortunately, nucleotide mutations may be stacked (e.g., substitutions
120126 # will appear this way) and they don't appear in any particular order;
@@ -152,19 +158,17 @@ def convert_alleles(ts):
152158 raise ValueError ("All mutations must be nucleotide mutations." )
153159 if not has_refseq :
154160 raise ValueError ("Tree sequence must have a valid reference sequence." )
155- aa = [ts .reference_sequence [k ] for k in tables .sites .position .astype ('int' )]
161+ aa = [ts .reference_sequence . data [k ] for k in tables .sites .position .astype ('int' )]
156162 da = np .array (NUCLEOTIDES )[nuc_inds ]
157163 tables .sites .packset_ancestral_state (aa )
158164 tables .mutations .packset_derived_state (da )
159165
160166 out = SlimTreeSequence (tables .tree_sequence ())
161- out .reference_sequence = ts .reference_sequence
162167 return out
163168
164169
165170def generate_nucleotides (ts , reference_sequence = None , keep = True , seed = None ):
166171 """
167-
168172 Returns a modified tree sequence in which mutations have been randomly assigned nucleotides
169173 and (optionally) a reference sequence has been randomly generated.
170174
@@ -206,6 +210,7 @@ def generate_nucleotides(ts, reference_sequence=None, keep=True, seed=None):
206210 raise ValueError ("Reference sequence must be a string of A, C, G, and T only." )
207211
208212 tables = ts .dump_tables ()
213+ tables .reference_sequence .data = reference_sequence
209214 tables .mutations .clear ()
210215 sets = [[k for k in range (4 ) if k != i ] for i in range (4 )]
211216 states = np .full ((ts .num_mutations ,), - 1 )
@@ -242,5 +247,4 @@ def generate_nucleotides(ts, reference_sequence=None, keep=True, seed=None):
242247
243248 return SlimTreeSequence (
244249 tables .tree_sequence (),
245- reference_sequence = reference_sequence
246250 )
0 commit comments