Skip to content

Commit e38737b

Browse files
Improve aligment quality logging
Closes #221
1 parent 0eb0ef4 commit e38737b

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

sc2ts/inference.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,9 @@ def initial_ts(additional_problematic_sites=list()):
209209
reference = core.get_reference_sequence()
210210
L = core.REFERENCE_SEQUENCE_LENGTH
211211
assert L == len(reference)
212-
problematic_sites = set(core.get_problematic_sites()) | set(additional_problematic_sites)
212+
problematic_sites = set(core.get_problematic_sites()) | set(
213+
additional_problematic_sites
214+
)
213215

214216
tables = tskit.TableCollection(L)
215217
tables.time_units = core.TIME_UNITS
@@ -452,7 +454,6 @@ def preprocess(
452454
) as bar:
453455
for md in bar:
454456
strain = md["strain"]
455-
logger.debug(f"Getting alignment for {strain}")
456457
try:
457458
alignment = alignment_store[strain]
458459
except KeyError:
@@ -470,6 +471,15 @@ def preprocess(
470471
sample.masked_sites = ma.masked_sites
471472
sample.alignment = ma.alignment[keep_sites]
472473
samples.append(sample)
474+
num_Ns = ma.original_base_composition.get("N", 0)
475+
non_nuc_counts = dict(ma.original_base_composition)
476+
for nuc in "ACGT":
477+
del non_nuc_counts[nuc]
478+
counts = ",".join(
479+
f"{key}={count}" for key, count in sorted(non_nuc_counts.items())
480+
)
481+
num_masked = len(ma.masked_sites)
482+
logger.debug(f"Mask {strain}: masked={num_masked} {counts}")
473483

474484
logger.info(
475485
f"Got alignments for {len(samples)} of {len(metadata_matches)} in metadata"
@@ -830,7 +840,6 @@ def add_matching_results(
830840
return ts # , excluded_samples, added_samples
831841

832842

833-
834843
def solve_num_mismatches(ts, k):
835844
"""
836845
Return the low-level LS parameters corresponding to accepting

0 commit comments

Comments
 (0)