Skip to content

Commit e5baf42

Browse files
committed
- check the IMGT db version is valid when creating/importing a new database
- add `--list` option in `pyard-import` to show list of available IMGT versions - `pyard-status` shows the `Latest` version number and compares one from IMGT. Suggests re-install new one.
1 parent 7a527cc commit e5baf42

File tree

6 files changed

+131
-43
lines changed

6 files changed

+131
-43
lines changed

pyard/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from .pyard import ARD
2525
from .blender import blender as dr_blender
2626
from .broad_splits import find_splits as find_broad_splits
27+
from .misc import get_imgt_db_versions as db_versions
2728

2829
__author__ = """NMDP Bioinformatics"""
2930
__version__ = "0.9.1"

pyard/db.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@
2222
#
2323
import pathlib
2424
import sqlite3
25+
import sys
2526
from typing import Tuple, Dict, Set, List
2627

28+
from pyard.misc import get_imgt_db_versions
29+
2730

2831
def get_pyard_db_install_directory():
2932
return pathlib.Path.home() / ".pyard"
@@ -49,17 +52,25 @@ def create_db_connection(data_dir, imgt_version, ro=False):
4952
# If in read-only mode, make sure the db file exists
5053
if not pathlib.Path(db_filename).exists():
5154
raise RuntimeError(f"Reference Database {db_filename} not available.")
52-
53-
# Create the data directory if it doesn't exist
54-
if not pathlib.Path(data_dir).exists():
55-
pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
56-
57-
if ro:
5855
# Open the database in read-only mode
5956
file_uri = f"file:{db_filename}?mode=ro"
6057
# Multiple threads can access the same connection since it's only ro
6158
return sqlite3.connect(file_uri, check_same_thread=False, uri=True)
6259

60+
# Check the imgt_version is a valid IMGT DB Version
61+
# by querying the IMGT site
62+
if imgt_version != "Latest":
63+
if not pathlib.Path(db_filename).exists():
64+
all_imgt_versions = get_imgt_db_versions()
65+
if imgt_version not in all_imgt_versions:
66+
raise ValueError(
67+
f"{imgt_version} is not a valid IMGT database version."
68+
)
69+
70+
# Create the data directory if it doesn't exist
71+
if not pathlib.Path(data_dir).exists():
72+
pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
73+
6374
# Open the database for read/write
6475
file_uri = f"file:{db_filename}"
6576
return sqlite3.connect(file_uri, uri=True)

pyard/misc.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# List of expression characters
2+
import pathlib
3+
from typing import List
4+
5+
from pyard import db
6+
27
expression_chars = ["N", "Q", "L", "S"]
38
# List of P and G characters
49
PandG_chars = ["P", "G"]
@@ -64,3 +69,28 @@ def get_P_name(a: str) -> str:
6469
if last_char in PandG_chars + expression_chars:
6570
a = a[:-1]
6671
return ":".join(a.split(":")[0:2]) + "P"
72+
73+
74+
def get_imgt_db_versions() -> List[str]:
75+
import urllib.request
76+
import json
77+
78+
req = urllib.request.Request(
79+
url="https://api.github.com/repos/ANHIG/IMGTHLA/branches?per_page=100"
80+
)
81+
res = urllib.request.urlopen(req, timeout=5)
82+
if res.status == 200:
83+
json_body = json.loads(res.read())
84+
versions = list(map(lambda x: x["name"], json_body))
85+
return versions
86+
87+
88+
def get_data_dir(data_dir):
89+
if data_dir:
90+
path = pathlib.Path(data_dir)
91+
if not path.exists() or not path.is_dir():
92+
raise RuntimeError(f"{data_dir} is not a valid directory")
93+
data_dir = path
94+
else:
95+
data_dir = db.get_pyard_db_install_directory()
96+
return data_dir

pyard/pyard.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ def __del__(self):
147147
Close the db connection, when ARD instance goes away
148148
:return:
149149
"""
150-
self.db_connection.close()
150+
if hasattr(self, "db_connection") and self.db_connection:
151+
self.db_connection.close()
151152

152153
@functools.lru_cache(maxsize=max_cache_size)
153154
def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) -> str:

scripts/pyard-import

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
#
2424
import argparse
2525
import pathlib
26+
import sys
2627

2728
import pyard
2829
from pyard import db, data_repository
29-
import pandas as pd
30+
from pyard.misc import get_data_dir
3031

3132

3233
def get_imgt_version(imgt_version):
@@ -40,18 +41,10 @@ def get_imgt_version(imgt_version):
4041
return "Latest"
4142

4243

43-
def get_data_dir(data_dir):
44-
if data_dir:
45-
path = pathlib.Path(data_dir)
46-
if not path.exists() or not path.is_dir():
47-
raise RuntimeError(f"{data_dir} is not a valid directory")
48-
else:
49-
data_dir = db.get_pyard_db_install_directory()
50-
return data_dir
51-
52-
5344
def get_v2_v3_mapping(v2_v3_mapping):
5445
if v2_v3_mapping:
46+
import pandas as pd
47+
5548
path = pathlib.Path(v2_v3_mapping)
5649
if not path.exists() or not path.is_file():
5750
raise RuntimeError(f"{data_dir} is not a valid file")
@@ -62,21 +55,52 @@ def get_v2_v3_mapping(v2_v3_mapping):
6255

6356
if __name__ == "__main__":
6457
parser = argparse.ArgumentParser(
65-
usage="""[--db-version <IMGT DB Version>]\n
66-
[--data-dir <directory for db file>]\n
67-
[--v2-to-v3-mapping <V2 to V3 mapping CSV file>]""",
6858
description="""
6959
py-ard tool to generate reference SQLite database.
7060
Allows updating db with custom V2 to V3 mappings.
61+
Displays the list of available IMGT database versions.
7162
""",
7263
)
73-
parser.add_argument("--db-version", dest="imgt_version")
74-
parser.add_argument("--data-dir", dest="data_dir")
75-
parser.add_argument("--v2-to-v3-mapping", dest="v2_v3_mapping")
76-
parser.add_argument("--refresh-mac", dest="refresh_mac", action="store_true")
77-
parser.add_argument("--re-install", dest="reinstall", action="store_true")
64+
parser.add_argument(
65+
"--list",
66+
dest="show_versions",
67+
action="store_true",
68+
help="Show Versions of available IMGT Databases",
69+
)
70+
parser.add_argument(
71+
"--db-version",
72+
dest="imgt_version",
73+
help="Import supplied IMGT_VERSION DB Version",
74+
)
75+
parser.add_argument(
76+
"--data-dir",
77+
dest="data_dir",
78+
help="Data directory to store imported data",
79+
)
80+
parser.add_argument(
81+
"--v2-to-v3-mapping", dest="v2_v3_mapping", help="V2 to V3 mapping CSV file"
82+
)
83+
parser.add_argument(
84+
"--refresh-mac",
85+
dest="refresh_mac",
86+
action="store_true",
87+
help="Only refresh MAC data",
88+
)
89+
parser.add_argument(
90+
"--re-install",
91+
dest="reinstall",
92+
action="store_true",
93+
help="reinstall a fresh version of database",
94+
)
7895
args = parser.parse_args()
7996

97+
if args.show_versions:
98+
versions = pyard.db_versions()
99+
print("Available IMGT Versions:")
100+
for version in versions:
101+
print(f" {version}")
102+
sys.exit(0)
103+
80104
imgt_version = get_imgt_version(args.imgt_version)
81105
# print(imgt_version)
82106

@@ -94,8 +118,13 @@ if __name__ == "__main__":
94118
db_fullname.unlink(missing_ok=True)
95119

96120
print(f"Importing IMGT database version: {imgt_version}")
97-
ard = pyard.ARD(imgt_version=imgt_version, data_dir=data_dir)
121+
try:
122+
ard = pyard.ARD(imgt_version=imgt_version, data_dir=data_dir)
123+
except ValueError as e:
124+
print(f"Error importing version {imgt_version}:", e)
125+
sys.exit(1)
98126
print(f"Import complete for database version: {imgt_version}")
127+
# We don't need ard object anymore
99128
del ard
100129

101130
if v2_to_v3_dict:

scripts/pyard-status

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,34 @@
2323
#
2424
import argparse
2525
import os
26-
import pathlib
2726
import re
2827

28+
import pyard
2929
from pyard import db, data_repository
30+
from pyard.misc import get_data_dir
3031

3132

32-
def get_data_dir(data_dir):
33-
if data_dir:
34-
path = pathlib.Path(data_dir)
35-
if not path.exists() or not path.is_dir():
36-
raise RuntimeError(f"{data_dir} is not a valid directory")
37-
data_dir = path
38-
else:
39-
data_dir = db.get_pyard_db_install_directory()
40-
return data_dir
33+
def get_latest_imgt_version() -> int:
34+
"""
35+
Gets the list of db versions and returns the maximum
36+
version numbered db
37+
@return: int
38+
"""
39+
return max(map(int, pyard.db_versions()[:-1]))
4140

4241

4342
if __name__ == "__main__":
4443
parser = argparse.ArgumentParser(
45-
usage="""
46-
[--data-dir <directory for db file>]\n
47-
""",
4844
description="""
4945
py-ard tool to provide a status report for reference SQLite databases.
5046
""",
5147
)
52-
parser.add_argument("--data-dir", dest="data_dir")
48+
parser.add_argument(
49+
"--data-dir",
50+
dest="data_dir",
51+
help="Data directory to store imported data",
52+
)
53+
5354
args = parser.parse_args()
5455
data_dir = get_data_dir(args.data_dir)
5556
# print(data_dir)
@@ -61,9 +62,24 @@ if __name__ == "__main__":
6162
# eg: get 3440 from 'pyard-3440.sqlite3'
6263
match = imgt_regex.match(filename)
6364
imgt_version = match.group(1) # Get first group
64-
db_connection = db.create_db_connection(data_dir, imgt_version)
65+
db_connection = db.create_db_connection(data_dir, imgt_version, ro=True)
6566
print("-" * 43)
66-
print(f"IMGT DB Version: {imgt_version}")
67+
if imgt_version == "Latest":
68+
db_version = data_repository.get_db_version(db_connection)
69+
print(f"IMGT DB Version: {imgt_version} ({db_version})")
70+
latest_version = get_latest_imgt_version()
71+
if latest_version == db_version:
72+
print(
73+
f"You're up to date. {db_version} is the most recent version."
74+
)
75+
else:
76+
print(f"There is a newer IMGT release than version {db_version}")
77+
print(
78+
f"Upgrade to latest version '{latest_version}'",
79+
"with 'pyard-import --re-install'",
80+
)
81+
else:
82+
print(f"IMGT DB Version: {imgt_version}")
6783
print("-" * 43)
6884
print(f"|{'Table Name':20}|{'Rows':20}|")
6985
print(f"|{'-' * 41}|")

0 commit comments

Comments
 (0)