@@ -209,7 +209,9 @@ def initial_ts(additional_problematic_sites=list()):
209
209
reference = core .get_reference_sequence ()
210
210
L = core .REFERENCE_SEQUENCE_LENGTH
211
211
assert L == len (reference )
212
- problematic_sites = set (core .get_problematic_sites ()) | set (additional_problematic_sites )
212
+ problematic_sites = set (core .get_problematic_sites ()) | set (
213
+ additional_problematic_sites
214
+ )
213
215
214
216
tables = tskit .TableCollection (L )
215
217
tables .time_units = core .TIME_UNITS
@@ -452,7 +454,6 @@ def preprocess(
452
454
) as bar :
453
455
for md in bar :
454
456
strain = md ["strain" ]
455
- logger .debug (f"Getting alignment for { strain } " )
456
457
try :
457
458
alignment = alignment_store [strain ]
458
459
except KeyError :
@@ -470,6 +471,15 @@ def preprocess(
470
471
sample .masked_sites = ma .masked_sites
471
472
sample .alignment = ma .alignment [keep_sites ]
472
473
samples .append (sample )
474
+ num_Ns = ma .original_base_composition .get ("N" , 0 )
475
+ non_nuc_counts = dict (ma .original_base_composition )
476
+ for nuc in "ACGT" :
477
+ del non_nuc_counts [nuc ]
478
+ counts = "," .join (
479
+ f"{ key } ={ count } " for key , count in sorted (non_nuc_counts .items ())
480
+ )
481
+ num_masked = len (ma .masked_sites )
482
+ logger .debug (f"Mask { strain } : masked={ num_masked } { counts } " )
473
483
474
484
logger .info (
475
485
f"Got alignments for { len (samples )} of { len (metadata_matches )} in metadata"
@@ -830,7 +840,6 @@ def add_matching_results(
830
840
return ts # , excluded_samples, added_samples
831
841
832
842
833
-
834
843
def solve_num_mismatches (ts , k ):
835
844
"""
836
845
Return the low-level LS parameters corresponding to accepting
0 commit comments