@@ -360,139 +360,6 @@ def extend(
360
360
add_provenance (ts_out , output_ts )
361
361
362
362
363
- @click .command ()
364
- @click .argument ("alignments" , type = click .Path (exists = True , dir_okay = False ))
365
- @click .argument ("metadata" , type = click .Path (exists = True , dir_okay = False ))
366
- @click .argument ("output-prefix" )
367
- @click .option (
368
- "-b" ,
369
- "--base" ,
370
- type = click .Path (dir_okay = False , exists = True ),
371
- default = None ,
372
- help = (
373
- "The base tree sequence to match against. If not specified, create "
374
- "a new initial base containing the reference. "
375
- ),
376
- )
377
- @click .option ("--num-mismatches" , default = None , type = float , help = "num-mismatches" )
378
- @click .option ("--max-hmm-cost" , default = 5 , type = float , help = "max-hmm-cost" )
379
- @click .option (
380
- "--min-group-size" ,
381
- default = 10 ,
382
- type = int ,
383
- help = "Minimum size of groups of reconsidered samples" ,
384
- )
385
- @click .option (
386
- "--num-past-days" ,
387
- default = None ,
388
- type = int ,
389
- help = "Number of past days to retrieve filtered samples" ,
390
- )
391
- @click .option (
392
- "--max-submission-delay" ,
393
- default = None ,
394
- type = int ,
395
- help = (
396
- "The maximum number of days between the sample and its submission date "
397
- "for it to be included in the inference"
398
- ),
399
- )
400
- @click .option (
401
- "--max-daily-samples" ,
402
- default = None ,
403
- type = int ,
404
- help = (
405
- "The maximum number of samples to match in a single day. If the total "
406
- "is greater than this, randomly subsample."
407
- ),
408
- )
409
- @click .option ("--num-threads" , default = 0 , type = int , help = "Number of match threads" )
410
- @click .option ("--random-seed" , default = 42 , type = int , help = "Random seed for subsampling" )
411
- @click .option ("--stop-date" , default = "2030-01-01" , type = str , help = "Stopping date" )
412
- @click .option (
413
- "--additional-problematic-sites" ,
414
- default = None ,
415
- type = str ,
416
- help = "File containing the list of additional problematic sites to exclude." ,
417
- )
418
- @click .option ("-p" , "--precision" , default = None , type = int , help = "Match precision" )
419
- @click .option ("--no-progress" , default = False , type = bool , help = "Don't show progress" )
420
- @click .option ("-v" , "--verbose" , count = True )
421
- @click .option ("-l" , "--log-file" , default = None , type = click .Path (dir_okay = False ))
422
- def daily_extend (
423
- alignments ,
424
- metadata ,
425
- output_prefix ,
426
- base ,
427
- num_mismatches ,
428
- max_hmm_cost ,
429
- min_group_size ,
430
- num_past_days ,
431
- max_submission_delay ,
432
- max_daily_samples ,
433
- num_threads ,
434
- random_seed ,
435
- stop_date ,
436
- additional_problematic_sites ,
437
- precision ,
438
- no_progress ,
439
- verbose ,
440
- log_file ,
441
- ):
442
- """
443
- Sequentially extend the trees by adding samples in daily batches.
444
- """
445
- setup_logging (verbose , log_file )
446
- rng = random .Random (random_seed )
447
-
448
- additional_problematic = []
449
- if additional_problematic_sites is not None :
450
- additional_problematic = (
451
- np .loadtxt (additional_problematic_sites ).astype (int ).tolist ()
452
- )
453
- logger .info (
454
- f"Excluding additional { len (additional_problematic )} problematic sites"
455
- )
456
-
457
- match_db_path = f"{ output_prefix } match.db"
458
- if base is None :
459
- base_ts = inference .initial_ts (additional_problematic )
460
- match_db = inference .MatchDb .initialise (match_db_path )
461
- else :
462
- base_ts = tskit .load (base )
463
-
464
- assert (
465
- base_ts .metadata ["sc2ts" ]["additional_problematic_sites" ]
466
- == additional_problematic
467
- )
468
-
469
- with contextlib .ExitStack () as exit_stack :
470
- alignment_store = exit_stack .enter_context (sc2ts .AlignmentStore (alignments ))
471
- metadata_db = exit_stack .enter_context (sc2ts .MetadataDb (metadata ))
472
- match_db = exit_stack .enter_context (inference .MatchDb (match_db_path ))
473
- ts_iter = inference .daily_extend (
474
- alignment_store = alignment_store ,
475
- metadata_db = metadata_db ,
476
- base_ts = base_ts ,
477
- match_db = match_db ,
478
- num_mismatches = num_mismatches ,
479
- max_hmm_cost = max_hmm_cost ,
480
- min_group_size = min_group_size ,
481
- num_past_days = num_past_days ,
482
- max_submission_delay = max_submission_delay ,
483
- max_daily_samples = max_daily_samples ,
484
- rng = rng ,
485
- precision = precision ,
486
- num_threads = num_threads ,
487
- show_progress = not no_progress ,
488
- )
489
- for ts , date in ts_iter :
490
- output_ts = output_prefix + date + ".ts"
491
- add_provenance (ts , output_ts )
492
- if date >= stop_date :
493
- break
494
-
495
-
496
363
@click .command ()
497
364
@click .argument ("alignment_db" )
498
365
@click .argument ("ts_file" )
@@ -658,6 +525,5 @@ def cli():
658
525
cli .add_command (initialise )
659
526
cli .add_command (list_dates )
660
527
cli .add_command (extend )
661
- cli .add_command (daily_extend )
662
528
cli .add_command (validate )
663
529
cli .add_command (annotate_recombinants )
0 commit comments