Skip to content

Commit 2807cd7

Browse files
committed
- Skip Building MAC tables
1 parent b8717e2 commit 2807cd7

File tree

4 files changed

+51
-29
lines changed

4 files changed

+51
-29
lines changed

pyard/data_repository.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,9 @@ def generate_short_nulls(db_connection, who_group):
486486
return shortnulls
487487

488488

489-
def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):
489+
def generate_mac_codes(
490+
db_connection: sqlite3.Connection, refresh_mac: bool = False, load_mac: bool = True
491+
):
490492
"""
491493
MAC files come in 2 different versions:
492494
@@ -530,29 +532,31 @@ def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):
530532
531533
:param db_connection: Database connection to the sqlite database
532534
:param refresh_mac: Refresh the database with newer MAC data ?
535+
:param load_mac: Should MAC be loaded at all
533536
:return: None
534537
"""
535-
mac_table_name = "mac_codes"
536-
if refresh_mac or not db.table_exists(db_connection, mac_table_name):
537-
# Load the MAC file to a DataFrame
538-
mac_url = "https://hml.nmdp.org/mac/files/numer.v3.zip"
539-
df_mac = pd.read_csv(
540-
mac_url,
541-
sep="\t",
542-
compression="zip",
543-
skiprows=3,
544-
names=["Code", "Alleles"],
545-
keep_default_na=False,
546-
)
547-
# Create a dict from code to alleles
548-
mac = df_mac.set_index("Code")["Alleles"].to_dict()
549-
# Save the mac dict to db
550-
db.save_dict(
551-
db_connection,
552-
table_name=mac_table_name,
553-
dictionary=mac,
554-
columns=("code", "alleles"),
555-
)
538+
if load_mac:
539+
mac_table_name = "mac_codes"
540+
if refresh_mac or not db.table_exists(db_connection, mac_table_name):
541+
# Load the MAC file to a DataFrame
542+
mac_url = "https://hml.nmdp.org/mac/files/numer.v3.zip"
543+
df_mac = pd.read_csv(
544+
mac_url,
545+
sep="\t",
546+
compression="zip",
547+
skiprows=3,
548+
names=["Code", "Alleles"],
549+
keep_default_na=False,
550+
)
551+
# Create a dict from code to alleles
552+
mac = df_mac.set_index("Code")["Alleles"].to_dict()
553+
# Save the mac dict to db
554+
db.save_dict(
555+
db_connection,
556+
table_name=mac_table_name,
557+
dictionary=mac,
558+
columns=("code", "alleles"),
559+
)
556560

557561

558562
def to_serological_name(locus_name: str):

pyard/pyard.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ class ARD(object):
8080
"""
8181

8282
def __init__(
83-
self, imgt_version: str = "Latest", data_dir: str = None, config: dict = None
83+
self,
84+
imgt_version: str = "Latest",
85+
data_dir: str = None,
86+
load_mac: bool = True,
87+
config: dict = None,
8488
):
8589
"""
8690
ARD will load valid alleles, xx codes and MAC mappings for the given
@@ -100,7 +104,7 @@ def __init__(
100104
self.db_connection = db.create_db_connection(data_dir, imgt_version)
101105

102106
# Load MAC codes
103-
dr.generate_mac_codes(self.db_connection, False)
107+
dr.generate_mac_codes(self.db_connection, refresh_mac=False, load_mac=load_mac)
104108
# Load ARS mappings
105109
self.ars_mappings, p_group = dr.generate_ars_mapping(
106110
self.db_connection, imgt_version
@@ -393,7 +397,7 @@ def validate(self, glstring):
393397
except InvalidAlleleError as e:
394398
raise InvalidTypingError(
395399
f"{glstring} is not valid GL String. \n {e.message}", e
396-
)
400+
) from None
397401

398402
def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
399403
if loc_antigen is None or code is None:
@@ -718,7 +722,7 @@ def refresh_mac_codes(self) -> None:
718722
Refreshes MAC code for the current IMGT db version.
719723
:return: None
720724
"""
721-
dr.generate_mac_codes(self.db_connection, True)
725+
dr.generate_mac_codes(self.db_connection, refresh_mac=True)
722726

723727
def get_db_version(self) -> str:
724728
"""

scripts/pyard-import

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ if __name__ == "__main__":
9292
action="store_true",
9393
help="reinstall a fresh version of database",
9494
)
95+
parser.add_argument(
96+
"--skip-mac",
97+
dest="skip_mac",
98+
action="store_true",
99+
help="Skip creating MAC mapping",
100+
)
95101
args = parser.parse_args()
96102

97103
if args.show_versions:
@@ -118,8 +124,14 @@ if __name__ == "__main__":
118124
db_fullname.unlink(missing_ok=True)
119125

120126
print(f"Importing IMGT database version: {imgt_version}")
127+
if args.skip_mac:
128+
load_mac = False
129+
print(f"Skipping MAC tables creation")
130+
else:
131+
load_mac = True
132+
121133
try:
122-
ard = pyard.ARD(imgt_version=imgt_version, data_dir=data_dir)
134+
ard = pyard.ARD(imgt_version=imgt_version, data_dir=data_dir, load_mac=load_mac)
123135
except ValueError as e:
124136
print(f"Error importing version {imgt_version}:", e)
125137
sys.exit(1)
@@ -142,5 +154,5 @@ if __name__ == "__main__":
142154
if args.refresh_mac:
143155
print(f"Updating MACs")
144156
db_connection = db.create_db_connection(data_dir, imgt_version, ro=False)
145-
data_repository.generate_mac_codes(db_connection, True)
157+
data_repository.generate_mac_codes(db_connection, refresh_mac=True)
146158
print(f"Updated MACs for {imgt_version} IMGT database.")

scripts/pyard-status

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ if __name__ == "__main__":
8484
print(f"|{'Table Name':20}|{'Rows':20}|")
8585
print(f"|{'-' * 41}|")
8686
for table in (
87-
data_repository.ars_mapping_tables + data_repository.code_mapping_tables
87+
data_repository.ars_mapping_tables
88+
+ data_repository.code_mapping_tables
89+
+ ["mac_codes"]
8890
):
8991
if db.table_exists(db_connection, table):
9092
total_rows = db.count_rows(db_connection, table)

0 commit comments

Comments
 (0)