Skip to content

Commit 4724742

Browse files
szhanjeromekelleher
authored andcommitted
Remove samples added back after reconsidering
1 parent d774510 commit 4724742

File tree

1 file changed

+23
-6
lines changed

1 file changed

+23
-6
lines changed

sc2ts/inference.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def daily_extend(
284284

285285
last_ts = base_ts
286286
for date in metadata_db.get_days(start_day):
287-
ts, excluded_samples = extend(
287+
ts, excluded_samples, added_back_samples = extend(
288288
alignment_store=alignment_store,
289289
metadata_db=metadata_db,
290290
date=date,
@@ -303,10 +303,24 @@ def daily_extend(
303303
yield ts, excluded_samples, date
304304

305305
# Update list of reconsidered samples.
306-
reconsidered_samples.extend(excluded_samples)
306+
# Remove oldest reconsidered samples.
307307
if len(reconsidered_samples) > 0:
308308
while reconsidered_samples[0].date == earliest_date:
309309
reconsidered_samples.popleft()
310+
# Remove samples just added back.
311+
if len(added_back_samples) > 0:
312+
# TODO: Horrible. This needs to be reworked after
313+
# storing pickled Samples in a SQLite db.
314+
samples_to_remove = []
315+
for sample_added_back in added_back_samples:
316+
for sample_reconsidered in reconsidered_samples:
317+
if sample_added_back.strain == sample_reconsidered.strain:
318+
samples_to_remove.append(sample_added_back)
319+
continue
320+
for sample in samples_to_remove:
321+
reconsidered_samples.remove(sample)
322+
# Add new excluded samples.
323+
reconsidered_samples.extend(excluded_samples)
310324

311325
earliest_date += datetime.timedelta(days=1)
312326

@@ -414,7 +428,7 @@ def extend(
414428
)
415429
ts = increment_time(date, base_ts)
416430

417-
ts, excluded_samples = add_matching_results(
431+
ts, excluded_samples, _ = add_matching_results(
418432
samples=samples,
419433
ts=ts,
420434
date=date,
@@ -424,7 +438,7 @@ def extend(
424438
show_progress=show_progress,
425439
)
426440

427-
ts, _ = add_matching_results(
441+
ts, _, added_back_samples = add_matching_results(
428442
samples=reconsidered_samples,
429443
ts=ts,
430444
date=date,
@@ -434,7 +448,7 @@ def extend(
434448
show_progress=show_progress,
435449
)
436450

437-
return ts, excluded_samples
451+
return ts, excluded_samples, added_back_samples
438452

439453

440454
def match_path_ts(samples, ts, path, reversions):
@@ -524,6 +538,7 @@ def add_matching_results(
524538
logger.info(f"Got {len(grouped_matches)} distinct paths")
525539

526540
attach_nodes = []
541+
added_samples = []
527542

528543
with tqdm.tqdm(
529544
grouped_matches.items(),
@@ -535,6 +550,8 @@ def add_matching_results(
535550
if len(match_samples) < min_group_size:
536551
continue
537552

553+
added_samples.extend(match_samples)
554+
538555
# print(path, reversions, len(match_samples))
539556
# Delete the reversions from these samples so that we don't
540557
# build them into the trees
@@ -594,7 +611,7 @@ def add_matching_results(
594611
# print(ts.draw_text())
595612
ts = coalesce_mutations(ts, attach_nodes)
596613

597-
return ts, excluded_samples
614+
return ts, excluded_samples, added_samples
598615

599616

600617
def fetch_samples_from_pickle_file(date, num_past_days=None, in_dir=None):

0 commit comments

Comments
 (0)