Skip to content

Commit dee2d1f

Browse files
committed
Allow to Refresh MAC data
- adds `refresh_mac` flag to ARD() to allow updating of MAC data
1 parent e06cea7 commit dee2d1f

File tree

4 files changed

+31
-16
lines changed

4 files changed

+31
-16
lines changed

README.rst

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,21 @@ Example
5454

5555
.. code-block:: python3
5656
57-
from pyard import ARD
57+
import pyard
5858
59-
# Initialize ARD object
60-
ard = ARD('3290')
59+
# Initialize ARD object with a version of IMGT HLA database
60+
ard = pyard.ARD(3290)
6161
6262
# You can specify a data directory for temp files
63-
# ard = ARD('3290', data_dir='/tmp/py-ard')
63+
# ard = pyard.ARD('3290', data_dir='/tmp/py-ard')
6464
65-
# Initialize with latest DB
66-
ard = ARD()
65+
# Initialize with latest IMGT HLA database
66+
ard = pyard.ARD()
6767
68+
# You can choose to refresh the MAC code for previously used versions
69+
# ard = pyard.ARD(3290, refresh_mac=True)
70+
71+
# Allele to reduce
6872
allele = "A*01:01:01"
6973
7074
ard.redux(allele, 'G')

pyard/data_repository.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
9797
return dup_g, g_group, lg_group, lgx_group
9898

9999

100-
def generate_mac_codes(db_connection: sqlite3.Connection):
100+
def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):
101101
"""
102102
MAC files come in 2 different versions:
103103
@@ -144,14 +144,16 @@ def generate_mac_codes(db_connection: sqlite3.Connection):
144144
:return:
145145
"""
146146
mac_table_name = 'mac_codes'
147-
if not db.table_exists(db_connection, mac_table_name):
147+
if refresh_mac or not db.table_exists(db_connection, mac_table_name):
148148
# Load the MAC file to a DataFrame
149149
mac_url = 'https://hml.nmdp.org/mac/files/numer.v3.zip'
150-
df_mac = pd.read_csv(mac_url, sep='\t', compression='zip', skiprows=3, names=['Code', 'Alleles'])
150+
df_mac = pd.read_csv(mac_url, sep='\t', compression='zip',
151+
skiprows=3, names=['Code', 'Alleles'])
151152
# Create a dict from code to alleles
152153
mac = df_mac.set_index("Code")["Alleles"].to_dict()
153154
# Save the mac dict to db
154-
db.save_dict(db_connection, table_name=mac_table_name, dictionary=mac, columns=('code', 'alleles'))
155+
db.save_dict(db_connection, table_name=mac_table_name,
156+
dictionary=mac, columns=('code', 'alleles'))
155157

156158

157159
def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_version):

pyard/db.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,16 @@ def save_dict(connection: sqlite3.Connection, table_name: str,
110110
:return: success status
111111
"""
112112
cursor = connection.cursor()
113+
114+
# Drop the table first
115+
drop_table_sql = f"DROP TABLE IF EXISTS {table_name}"
116+
cursor.execute(drop_table_sql)
117+
118+
# Create table
113119
create_table_sql = f"""CREATE TABLE {table_name} (
114120
{columns[0]} TEXT PRIMARY KEY,
115121
{columns[1]} TEXT NOT NULL
116122
)"""
117-
118-
# Create table
119123
cursor.execute(create_table_sql)
120124

121125
# insert
@@ -140,11 +144,15 @@ def save_set(connection: sqlite3.Connection, table_name: str, rows: Set, column:
140144
:return: success status
141145
"""
142146
cursor = connection.cursor()
147+
148+
# Drop the table first
149+
drop_table_sql = f"DROP TABLE IF EXISTS {table_name}"
150+
cursor.execute(drop_table_sql)
151+
152+
# Create table
143153
create_table_sql = f"""CREATE TABLE {table_name} (
144154
{column} TEXT PRIMARY KEY
145155
)"""
146-
147-
# Create table
148156
cursor.execute(create_table_sql)
149157

150158
# insert

pyard/pyard.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class ARD(object):
4141

4242
def __init__(self, imgt_version: str = 'Latest',
4343
remove_invalid: bool = True,
44-
data_dir: str = None):
44+
data_dir: str = None,
45+
refresh_mac: bool = False) -> None:
4546
"""
4647
ARD will load valid alleles, xx codes and MAC mappings for the given
4748
version of IMGT database, downloading and generating the database if
@@ -57,7 +58,7 @@ def __init__(self, imgt_version: str = 'Latest',
5758
self.db_connection = db.create_db_connection(data_dir, imgt_version)
5859

5960
# Load MAC codes
60-
generate_mac_codes(self.db_connection)
61+
generate_mac_codes(self.db_connection, refresh_mac)
6162
# Load Alleles and XX Codes
6263
self.valid_alleles, self.xx_codes = generate_alleles_and_xx_codes(self.db_connection, imgt_version)
6364
# Load ARS mappings

0 commit comments

Comments
 (0)