Skip to content

Commit 49956e8

Browse files
Db status check (#124)
* Add `pyard-status` command to check the status of all tables in the databases. This will help to see if there are missing tables and also to compare number of data between versions. ``` ------------------------------------------- IMGT DB Version: 3450 ------------------------------------------- |Table Name |Rows | |-----------------------------------------| |dup_g | 50| |dup_lg | 2| |dup_lgx | 2| |g_group | 10841| |lg_group | 10841| |lgx_group | 10841| |exon_group | 9724| |p_group | 9724| |alleles | 33525| |xx_codes | 1690| |who_alleles | 31552| |who_group | 31930| ------------------------------------------- ``` The missing tables are noted and the database can be rebuilt with `pyard-import --re-install` ------------------------------------------- IMGT DB Version: 3290 ------------------------------------------- |Table Name |Rows | |-----------------------------------------| |dup_g | 17| |dup_lg | 0| |dup_lgx | 0| |g_group | 2786| |lg_group | 2786| |lgx_group | 2786| MISSING: exon_group table MISSING: p_group table |alleles | 18451| |xx_codes | 946| MISSING: who_alleles table MISSING: who_group table ------------------------------------------- ``` * Bump version: 0.6.8 → 0.6.9
1 parent 16d18e6 commit 49956e8

File tree

6 files changed

+104
-3
lines changed

6 files changed

+104
-3
lines changed

README.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,7 @@ Command Line Tools
135135
136136
$ pyard -v 3290 --gl 'A1' -r lgx
137137
A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ...
138+
139+
# Show the status of all py-ard databases
140+
$ pyard-status
141+

pyard/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
from .pyard import ARD
2525

2626
__author__ = """NMDP Bioinformatics"""
27-
__version__ = '0.6.8'
27+
__version__ = '0.6.9'

pyard/db.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,21 @@ def tables_exist(connection: sqlite3.Connection, table_names: List[str]):
9090
return all([table_exists(connection, table_name) for table_name in table_names])
9191

9292

93+
def count_rows(connection: sqlite3.Connection, table_name: str) -> int:
94+
"""
95+
Count number of rows in the table.
96+
97+
:param connection: db connection of type sqlite.Connection
98+
:param table_name: table in the sqlite db
99+
:return: bool indicating whether table_name exists as a table
100+
"""
101+
query = f"SELECT count(*) from '{table_name}'"
102+
cursor = connection.execute(query)
103+
result = cursor.fetchone()
104+
cursor.close()
105+
return result[0]
106+
107+
93108
def mac_code_to_alleles(connection: sqlite3.Connection, code: str) -> List[str]:
94109
"""
95110
Look up the MAC code in the database and return corresponding list

scripts/pyard-status

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# py-ard
5+
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
6+
#
7+
# This library is free software; you can redistribute it and/or modify it
8+
# under the terms of the GNU Lesser General Public License as published
9+
# by the Free Software Foundation; either version 3 of the License, or (at
10+
# your option) any later version.
11+
#
12+
# This library is distributed in the hope that it will be useful, but WITHOUT
13+
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
14+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15+
# License for more details.
16+
#
17+
# You should have received a copy of the GNU Lesser General Public License
18+
# along with this library; if not, write to the Free Software Foundation,
19+
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
20+
#
21+
# > http://www.fsf.org/licensing/licenses/lgpl.html
22+
# > http://www.opensource.org/licenses/lgpl-license.php
23+
#
24+
import argparse
25+
import os
26+
import pathlib
27+
import re
28+
29+
from pyard import db, data_repository
30+
31+
32+
def get_data_dir(data_dir):
33+
if data_dir:
34+
path = pathlib.Path(data_dir)
35+
if not path.exists() or not path.is_dir():
36+
raise RuntimeError(f"{data_dir} is not a valid directory")
37+
data_dir = path
38+
else:
39+
data_dir = db.get_pyard_db_install_directory()
40+
return data_dir
41+
42+
43+
if __name__ == '__main__':
44+
parser = argparse.ArgumentParser(
45+
usage="""
46+
[--data-dir <directory for db file>]\n
47+
""",
48+
description="""
49+
py-ard tool to provide a status report for reference SQLite databases.
50+
"""
51+
)
52+
parser.add_argument(
53+
"--data-dir",
54+
dest="data_dir"
55+
)
56+
args = parser.parse_args()
57+
data_dir = get_data_dir(args.data_dir)
58+
# print(data_dir)
59+
60+
imgt_regex = re.compile(r'pyard-(.+)\.sqlite3')
61+
for _, _, filenames in os.walk(data_dir):
62+
for filename in filenames:
63+
# Get imgt version from the filename
64+
# eg: get 3440 from 'pyard-3440.sqlite3'
65+
match = imgt_regex.match(filename)
66+
imgt_version = match.group(1) # Get first group
67+
db_connection = db.create_db_connection(data_dir, imgt_version)
68+
print('-' * 43)
69+
print(f"IMGT DB Version: {imgt_version}")
70+
print('-' * 43)
71+
print(f"|{'Table Name':20}|{'Rows':20}|")
72+
print(f"|{'-' * 41}|")
73+
for table in data_repository.ars_mapping_tables + \
74+
data_repository.code_mapping_tables:
75+
if db.table_exists(db_connection, table):
76+
total_rows = db.count_rows(db_connection, table)
77+
print(f"|{table:20}|{total_rows:20}|")
78+
else:
79+
print(f"MISSING: {table} table")
80+
print('-' * 43)
81+
db_connection.close()

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.6.8
2+
current_version = 0.6.9
33
commit = True
44
tag = True
55

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242

4343
setup(
4444
name='py-ard',
45-
version='0.6.8',
45+
version='0.6.9',
4646
description="ARD reduction for HLA with Python",
4747
long_description=readme + '\n\n' + history,
4848
author="CIBMTR",
@@ -55,6 +55,7 @@
5555
scripts=[
5656
'scripts/pyard',
5757
'scripts/pyard-import',
58+
'scripts/pyard-status',
5859
'scripts/pyard-reduce-csv'
5960
],
6061
install_requires=requirements,

0 commit comments

Comments
 (0)