Skip to content

Commit 45548cd

Browse files
Add info-matches CLI command
1 parent 3debb78 commit 45548cd

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

sc2ts/cli.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import collections
23
import concurrent
34
import logging
45
import platform
@@ -145,6 +146,28 @@ def info_metadata(metadata, verbose, log_file):
145146
print(metadata_db)
146147

147148

149+
@click.command()
150+
@click.argument("match_db", type=click.Path(exists=True, dir_okay=False))
151+
@click.option("-v", "--verbose", count=True)
152+
@click.option("-l", "--log-file", default=None, type=click.Path(dir_okay=False))
153+
def info_matches(match_db, verbose, log_file):
154+
"""
155+
Information about an alignment store
156+
"""
157+
setup_logging(verbose, log_file)
158+
with sc2ts.MatchDb(match_db) as db:
159+
print(db)
160+
print("last date = ", db.last_date())
161+
print("cost\tpercent\tcount")
162+
df = db.as_dataframe()
163+
total = len(db)
164+
hmm_cost_counter = collections.Counter(df["hmm_cost"].astype(int))
165+
for cost in sorted(hmm_cost_counter.keys()):
166+
count = hmm_cost_counter[cost]
167+
percent = count / total * 100
168+
print(f"{cost}\t{percent:.1f}\t{count}")
169+
170+
148171
def add_provenance(ts, output_file):
149172
# Record provenance here because this is where the arguments are provided.
150173
provenance = get_provenance_dict()
@@ -389,6 +412,7 @@ def cli():
389412
cli.add_command(import_metadata)
390413
cli.add_command(info_alignments)
391414
cli.add_command(info_metadata)
415+
cli.add_command(info_matches)
392416

393417
cli.add_command(daily_extend)
394418
cli.add_command(validate)

sc2ts/inference.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import scipy.cluster.hierarchy
1818
import zarr
1919
import numba
20+
import pandas as pd
2021

2122
from . import core
2223
from . import alignments
@@ -38,8 +39,21 @@ def __len__(self):
3839
row = self.conn.execute(sql).fetchone()
3940
return row["COUNT(*)"]
4041

42+
def as_dataframe(self):
43+
with self.conn:
44+
cursor = self.conn.execute(
45+
"SELECT strain, match_date, hmm_cost FROM samples"
46+
)
47+
return pd.DataFrame(cursor.fetchall())
48+
49+
def last_date(self):
50+
sql = "SELECT MAX(match_date) FROM samples"
51+
with self.conn:
52+
row = self.conn.execute(sql).fetchone()
53+
return row["MAX(match_date)"]
54+
4155
def __str__(self):
42-
return "MatchDb at {self.uri} has {len(self)} samples"
56+
return f"MatchDb at {self.uri} has {len(self)} samples"
4357

4458
def __enter__(self):
4559
return self

0 commit comments

Comments
 (0)