Skip to content

Commit 3debb78

Browse files
Merge pull request #237 from jeromekelleher/direct-attach-zero-mutation-samples
Add special case for inserting exact matches
2 parents 23ef308 + 2344b99 commit 3debb78

File tree

3 files changed

+33
-3
lines changed

3 files changed

+33
-3
lines changed

sc2ts/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
NODE_IS_MUTATION_OVERLAP = 1 << 21
1818
NODE_IS_REVERSION_PUSH = 1 << 22
1919
NODE_IS_RECOMBINANT = 1 << 23
20+
NODE_IS_EXACT_MATCH = 1 << 24
2021

2122

2223
__version__ = "undefined"

sc2ts/inference.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,9 +593,11 @@ def extend(
593593
match_db.create_mask_table(base_ts)
594594
ts = increment_time(date, base_ts)
595595

596+
ts = add_exact_matches(ts=ts, match_db=match_db, date=date)
597+
596598
logger.info(f"Update ARG with low-cost samples for {date}")
597599
ts = add_matching_results(
598-
f"match_date=='{date}' and hmm_cost<={max_hmm_cost}",
600+
f"match_date=='{date}' and hmm_cost>0 and hmm_cost<={max_hmm_cost}",
599601
ts=ts,
600602
match_db=match_db,
601603
date=date,
@@ -674,6 +676,31 @@ def match_path_ts(samples, ts, path, reversions):
674676
# print(tables)
675677

676678

679+
def add_exact_matches(match_db, ts, date):
680+
where_clause = f"match_date=='{date}' AND hmm_cost==0"
681+
logger.info(f"Querying match DB WHERE: {where_clause}")
682+
samples = list(match_db.get(where_clause))
683+
if len(samples) == 0:
684+
logger.info(f"No exact matches on {date}")
685+
return ts
686+
logger.info(f"Update ARG with {len(samples)} exact matches for {date}")
687+
tables = ts.dump_tables()
688+
for sample in samples:
689+
assert len(sample.path) == 1
690+
assert len(sample.mutations) == 0
691+
node_id = tables.nodes.add_row(
692+
flags=tskit.NODE_IS_SAMPLE | core.NODE_IS_EXACT_MATCH,
693+
time=0,
694+
metadata=sample.metadata,
695+
)
696+
parent = sample.path[0].parent
697+
logger.debug(f"ARG add exact match {sample.strain}:{node_id}->{parent}")
698+
tables.edges.add_row(0, ts.sequence_length, parent=parent, child=node_id)
699+
tables.sort()
700+
tables.build_index()
701+
return tables.tree_sequence()
702+
703+
677704
def add_matching_results(
678705
where_clause,
679706
match_db,

sc2ts/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,13 +494,14 @@ def summary(self):
494494
mc_nodes = np.sum(self.ts.nodes_flags == sc2ts.NODE_IS_MUTATION_OVERLAP)
495495
pr_nodes = np.sum(self.ts.nodes_flags == sc2ts.NODE_IS_REVERSION_PUSH)
496496
re_nodes = np.sum(self.ts.nodes_flags == sc2ts.NODE_IS_RECOMBINANT)
497+
exact_matches = np.sum((self.ts.nodes_flags & sc2ts.NODE_IS_EXACT_MATCH) > 0)
497498

498499
samples = self.ts.samples()
499500
nodes_with_zero_muts = np.sum(self.nodes_num_mutations == 0)
500501
sites_with_zero_muts = np.sum(self.sites_num_mutations == 0)
501502
latest_sample = self.nodes_date[samples[-1]]
502503
masked_sites_per_sample = self.nodes_num_masked_sites[samples]
503-
non_samples = self.ts.nodes_flags != tskit.NODE_IS_SAMPLE
504+
non_samples = (self.ts.nodes_flags & tskit.NODE_IS_SAMPLE) == 0
504505
max_non_sample_mutations = np.max(self.nodes_num_mutations[non_samples])
505506
insertions = np.sum(self.mutations_inherited_state == "-")
506507
deletions = np.sum(self.mutations_derived_state == "-")
@@ -509,6 +510,7 @@ def summary(self):
509510
("latest_sample", latest_sample),
510511
("samples", self.ts.num_samples),
511512
("nodes", self.ts.num_nodes),
513+
("exact_matches", exact_matches),
512514
("mc_nodes", mc_nodes),
513515
("pr_nodes", pr_nodes),
514516
("re_nodes", re_nodes),
@@ -584,7 +586,7 @@ def _node_summary(self, u, child_mutations=True):
584586
qc += status
585587
flags = self.ts.nodes_flags[u]
586588
strain = ""
587-
if flags == tskit.NODE_IS_SAMPLE:
589+
if (flags & tskit.NODE_IS_SAMPLE) != 0:
588590
strain = md["strain"]
589591
elif flags == 1 << 21:
590592
if "overlap" in md:

0 commit comments

Comments
 (0)