@@ -32,6 +32,7 @@ def __init__(self, path):
32
32
self .uri = uri
33
33
self .conn = sqlite3 .connect (uri , uri = True )
34
34
self .conn .row_factory = metadata .dict_factory
35
+ logger .debug (f"Opened MatchDb at { path } mode=rw" )
35
36
36
37
def __len__ (self ):
37
38
sql = "SELECT COUNT(*) FROM samples"
@@ -52,6 +53,17 @@ def last_date(self):
52
53
row = self .conn .execute (sql ).fetchone ()
53
54
return row ["MAX(match_date)" ]
54
55
56
+ def count_newer (self , date ):
57
+ with self .conn :
58
+ sql = "SELECT COUNT(*) FROM samples WHERE match_date >= ?"
59
+ row = self .conn .execute (sql , (date ,)).fetchone ()
60
+ return row ["COUNT(*)" ]
61
+
62
+ def delete_newer (self , date ):
63
+ sql = "DELETE FROM samples WHERE match_date >= ?"
64
+ with self .conn :
65
+ self .conn .execute (sql , (date ,))
66
+
55
67
def __str__ (self ):
56
68
return f"MatchDb at { self .uri } has { len (self )} samples"
57
69
@@ -376,49 +388,49 @@ def asdict(self):
376
388
}
377
389
378
390
379
- def daily_extend (
380
- * ,
381
- alignment_store ,
382
- metadata_db ,
383
- base_ts ,
384
- match_db ,
385
- num_mismatches = None ,
386
- max_hmm_cost = None ,
387
- min_group_size = None ,
388
- num_past_days = None ,
389
- show_progress = False ,
390
- max_submission_delay = None ,
391
- max_daily_samples = None ,
392
- num_threads = None ,
393
- precision = None ,
394
- rng = None ,
395
- excluded_sample_dir = None ,
396
- ):
397
- assert num_past_days is None
398
- assert max_submission_delay is None
399
-
400
- start_day = last_date (base_ts )
401
-
402
- last_ts = base_ts
403
- for date in metadata_db .get_days (start_day ):
404
- ts = extend (
405
- alignment_store = alignment_store ,
406
- metadata_db = metadata_db ,
407
- date = date ,
408
- base_ts = last_ts ,
409
- match_db = match_db ,
410
- num_mismatches = num_mismatches ,
411
- max_hmm_cost = max_hmm_cost ,
412
- min_group_size = min_group_size ,
413
- show_progress = show_progress ,
414
- max_submission_delay = max_submission_delay ,
415
- max_daily_samples = max_daily_samples ,
416
- num_threads = num_threads ,
417
- precision = precision ,
418
- )
419
- yield ts , date
420
-
421
- last_ts = ts
391
+ # def daily_extend(
392
+ # *,
393
+ # alignment_store,
394
+ # metadata_db,
395
+ # base_ts,
396
+ # match_db,
397
+ # num_mismatches=None,
398
+ # max_hmm_cost=None,
399
+ # min_group_size=None,
400
+ # num_past_days=None,
401
+ # show_progress=False,
402
+ # max_submission_delay=None,
403
+ # max_daily_samples=None,
404
+ # num_threads=None,
405
+ # precision=None,
406
+ # rng=None,
407
+ # excluded_sample_dir=None,
408
+ # ):
409
+ # assert num_past_days is None
410
+ # assert max_submission_delay is None
411
+
412
+ # start_day = last_date(base_ts)
413
+
414
+ # last_ts = base_ts
415
+ # for date in metadata_db.get_days(start_day):
416
+ # ts = extend(
417
+ # alignment_store=alignment_store,
418
+ # metadata_db=metadata_db,
419
+ # date=date,
420
+ # base_ts=last_ts,
421
+ # match_db=match_db,
422
+ # num_mismatches=num_mismatches,
423
+ # max_hmm_cost=max_hmm_cost,
424
+ # min_group_size=min_group_size,
425
+ # show_progress=show_progress,
426
+ # max_submission_delay=max_submission_delay,
427
+ # max_daily_samples=max_daily_samples,
428
+ # num_threads=num_threads,
429
+ # precision=precision,
430
+ # )
431
+ # yield ts, date
432
+
433
+ # last_ts = ts
422
434
423
435
424
436
def preprocess (
@@ -559,30 +571,28 @@ def extend(
559
571
base_ts ,
560
572
match_db ,
561
573
num_mismatches = None ,
562
- max_hmm_cost = None ,
574
+ hmm_cost_threshold = None ,
563
575
min_group_size = None ,
564
- show_progress = False ,
565
- max_submission_delay = None ,
566
576
max_daily_samples = None ,
577
+ show_progress = False ,
578
+ retrospective_window = None ,
579
+ random_seed = 42 ,
567
580
num_threads = 0 ,
568
- precision = None ,
569
- rng = None ,
570
581
):
571
582
if num_mismatches is None :
572
583
num_mismatches = 3
573
- if max_hmm_cost is None :
574
- max_hmm_cost = 5
584
+ if hmm_cost_threshold is None :
585
+ hmm_cost_threshold = 5
575
586
if min_group_size is None :
576
587
min_group_size = 10
577
588
589
+ # TMP
590
+ precision = 6
578
591
check_base_ts (base_ts )
579
592
logger .info (
580
593
f"Extend { date } ; ts:nodes={ base_ts .num_nodes } ;samples={ base_ts .num_samples } ;"
581
594
f"mutations={ base_ts .num_mutations } ;date={ base_ts .metadata ['sc2ts' ]['date' ]} "
582
595
)
583
- # TODO not sure whether we'll keep these params. Making sure they're not
584
- # used for now
585
- assert max_submission_delay is None
586
596
587
597
samples = preprocess (
588
598
date ,
@@ -615,7 +625,7 @@ def extend(
615
625
616
626
logger .info (f"Update ARG with low-cost samples for { date } " )
617
627
ts = add_matching_results (
618
- f"match_date=='{ date } ' and hmm_cost>0 and hmm_cost<={ max_hmm_cost } " ,
628
+ f"match_date=='{ date } ' and hmm_cost>0 and hmm_cost<={ hmm_cost_threshold } " ,
619
629
ts = ts ,
620
630
match_db = match_db ,
621
631
date = date ,
0 commit comments