@@ -404,35 +404,36 @@ def match_samples(
404
404
# Default to no recombination
405
405
num_mismatches = 1000
406
406
407
- remaining_samples = samples
408
407
# FIXME Something wrong here, we don't seem to get precisely the same
409
408
# ARG for some reason. Need to track it down
410
409
# Also: should only run the things at low precision that have that HMM cost.
411
410
# Start out by setting everything to have 0 mutations and work up from there.
412
411
413
- for cost , precision in [(0 , 0 ), (1 , 2 )]: #, (2, 3)]:
414
- match_tsinfer (
415
- samples = remaining_samples ,
416
- ts = base_ts ,
417
- num_mismatches = num_mismatches ,
418
- precision = precision ,
419
- num_threads = num_threads ,
420
- show_progress = show_progress ,
421
- mirror_coordinates = mirror_coordinates ,
422
- )
423
- samples_to_rerun = []
424
- for sample in remaining_samples :
425
- hmm_cost = sample .get_hmm_cost (num_mismatches )
426
- # print(f"HMM@p={precision}: {sample.strain} hmm_cost={hmm_cost} path={sample.path}")
427
- logger .debug (
428
- f"HMM@p={ precision } : { sample .strain } hmm_cost={ hmm_cost } path={ sample .path } "
429
- )
430
- if hmm_cost > cost :
431
- sample .path .clear ()
432
- sample .mutations .clear ()
433
- samples_to_rerun .append (sample )
434
- remaining_samples = samples_to_rerun
435
-
412
+ # remaining_samples = samples
413
+ # for cost, precision in [(0, 0), (1, 2)]: #, (2, 3)]:
414
+ # match_tsinfer(
415
+ # samples=remaining_samples,
416
+ # ts=base_ts,
417
+ # num_mismatches=num_mismatches,
418
+ # precision=precision,
419
+ # num_threads=num_threads,
420
+ # show_progress=show_progress,
421
+ # mirror_coordinates=mirror_coordinates,
422
+ # )
423
+ # samples_to_rerun = []
424
+ # for sample in remaining_samples:
425
+ # hmm_cost = sample.get_hmm_cost(num_mismatches)
426
+ # # print(f"HMM@p={precision}: {sample.strain} hmm_cost={hmm_cost} path={sample.path}")
427
+ # logger.debug(
428
+ # f"HMM@p={precision}: {sample.strain} hmm_cost={hmm_cost} path={sample.path}"
429
+ # )
430
+ # if hmm_cost > cost:
431
+ # sample.path.clear()
432
+ # sample.mutations.clear()
433
+ # samples_to_rerun.append(sample)
434
+ # remaining_samples = samples_to_rerun
435
+
436
+ samples_to_rerun = samples
436
437
match_tsinfer (
437
438
samples = samples_to_rerun ,
438
439
ts = base_ts ,
@@ -605,6 +606,18 @@ def update_top_level_metadata(ts, date):
605
606
return tables .tree_sequence ()
606
607
607
608
609
+ def add_sample_to_tables (sample , tables , flags = tskit .NODE_IS_SAMPLE , time = 0 ):
610
+ metadata = {
611
+ ** sample .metadata ,
612
+ "sc2ts" : {
613
+ "qc" : sample .alignment_qc ,
614
+ "path" : [x .asdict () for x in sample .path ],
615
+ "mutations" : [x .asdict () for x in sample .mutations ],
616
+ },
617
+ }
618
+ return tables .nodes .add_row (flags = flags , time = time , metadata = metadata )
619
+
620
+
608
621
def match_path_ts (samples , ts , path , reversions ):
609
622
"""
610
623
Given the specified list of samples with equal copying paths,
@@ -623,17 +636,7 @@ def match_path_ts(samples, ts, path, reversions):
623
636
)
624
637
for sample in samples :
625
638
assert sample .path == path
626
- metadata = {
627
- ** sample .metadata ,
628
- "sc2ts" : {
629
- "qc" : sample .alignment_qc ,
630
- "path" : [x .asdict () for x in sample .path ],
631
- "mutations" : [x .asdict () for x in sample .mutations ],
632
- },
633
- }
634
- node_id = tables .nodes .add_row (
635
- flags = tskit .NODE_IS_SAMPLE , time = 0 , metadata = metadata
636
- )
639
+ node_id = add_sample_to_tables (sample , tables )
637
640
tables .edges .add_row (0 , ts .sequence_length , parent = 0 , child = node_id )
638
641
for mut in sample .mutations :
639
642
if mut .site_id not in site_id_map :
@@ -671,10 +674,10 @@ def add_exact_matches(match_db, ts, date):
671
674
for sample in samples :
672
675
assert len (sample .path ) == 1
673
676
assert len (sample .mutations ) == 0
674
- node_id = tables .nodes .add_row (
677
+ node_id = add_sample_to_tables (
678
+ sample ,
679
+ tables ,
675
680
flags = tskit .NODE_IS_SAMPLE | core .NODE_IS_EXACT_MATCH ,
676
- time = 0 ,
677
- metadata = sample .metadata ,
678
681
)
679
682
parent = sample .path [0 ].parent
680
683
logger .debug (f"ARG add exact match { sample .strain } :{ node_id } ->{ parent } " )
0 commit comments