11
11
import datetime
12
12
import pickle
13
13
14
+ import numpy as np
14
15
import tqdm
15
16
import tskit
16
17
import tszip
22
23
from . import core
23
24
from . import inference
24
25
26
+ logger = logging .getLogger (__name__ )
27
+
25
28
26
29
def get_environment ():
27
30
"""
@@ -230,6 +233,12 @@ def dump_samples(samples, output_file):
230
233
@click .option ("--num-threads" , default = 0 , type = int , help = "Number of match threads" )
231
234
@click .option ("--random-seed" , default = 42 , type = int , help = "Random seed for subsampling" )
232
235
@click .option ("--stop-date" , default = "2030-01-01" , type = str , help = "Stopping date" )
236
+ @click .option (
237
+ "--additional-problematic-sites" ,
238
+ default = None ,
239
+ type = str ,
240
+ help = "File containing the list of additional problematic sites to exclude." ,
241
+ )
233
242
@click .option ("-p" , "--precision" , default = None , type = int , help = "Match precision" )
234
243
@click .option ("--no-progress" , default = False , type = bool , help = "Don't show progress" )
235
244
@click .option ("-v" , "--verbose" , count = True )
@@ -248,6 +257,7 @@ def daily_extend(
248
257
num_threads ,
249
258
random_seed ,
250
259
stop_date ,
260
+ additional_problematic_sites ,
251
261
precision ,
252
262
no_progress ,
253
263
verbose ,
@@ -259,13 +269,27 @@ def daily_extend(
259
269
setup_logging (verbose , log_file )
260
270
rng = random .Random (random_seed )
261
271
272
+ additional_problematic = []
273
+ if additional_problematic_sites is not None :
274
+ additional_problematic = (
275
+ np .loadtxt (additional_problematic_sites ).astype (int ).tolist ()
276
+ )
277
+ logger .info (
278
+ f"Excluding additional { len (additional_problematic )} problematic sites"
279
+ )
280
+
262
281
match_db_path = f"{ output_prefix } match.db"
263
282
if base is None :
264
- base_ts = inference .initial_ts ()
283
+ base_ts = inference .initial_ts (additional_problematic )
265
284
match_db = inference .MatchDb .initialise (match_db_path )
266
285
else :
267
286
base_ts = tskit .load (base )
268
287
288
+ assert (
289
+ base_ts .metadata ["sc2ts" ]["additional_problematic_sites" ]
290
+ == additional_problematic
291
+ )
292
+
269
293
with contextlib .ExitStack () as exit_stack :
270
294
alignment_store = exit_stack .enter_context (sc2ts .AlignmentStore (alignments ))
271
295
metadata_db = exit_stack .enter_context (sc2ts .MetadataDb (metadata ))
0 commit comments