Skip to content

Commit 737ea60

Browse files
committed
Remove Pandas dependency
- Use urllib for downloading files - Use simple_table.py for all pandas like operations - move file load operations to loader/ directory - tests for simple_table.py
1 parent 3371b9a commit 737ea60

17 files changed

+1115
-478
lines changed

pyard/data_repository.py

Lines changed: 160 additions & 156 deletions
Large diffs are not rendered by default.

pyard/db.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -641,12 +641,10 @@ def load_serology_associated_mappings(db_connection):
641641

642642

643643
def save_serology_broad_split_mappings(db_connection, sero_mapping):
644-
# Save the `splits` as a "/" delimited string to db
645-
sero_splits = {sero: "/".join(splits) for sero, splits in sero_mapping.items()}
646644
save_dict(
647645
db_connection,
648646
table_name="serology_broad_split_mapping",
649-
dictionary=sero_splits,
647+
dictionary=sero_mapping,
650648
columns=("broad", "splits"),
651649
)
652650

pyard/load.py

Lines changed: 0 additions & 317 deletions
This file was deleted.
File renamed without changes.

pyard/loader/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# GitHub URL where IMGT HLA files are downloaded.
2+
IMGT_HLA_URL = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/"

pyard/loader/cwd.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import os
2+
import csv
3+
4+
5+
def load_cwd2():
6+
cwd_csv_path = os.path.join(os.path.dirname(__file__), "CWD2.csv")
7+
cwd_map = {}
8+
9+
with open(cwd_csv_path, "r") as file:
10+
reader = csv.DictReader(file)
11+
for row in reader:
12+
cwd_map[row["ALLELE"]] = row["LOCUS"]
13+
14+
return cwd_map

pyard/loader/g_group.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import sys
2+
from urllib.error import URLError
3+
from urllib.request import urlopen
4+
5+
from ..loader import IMGT_HLA_URL
6+
from ..misc import get_G_name, get_2field_allele, get_3field_allele
7+
from ..simple_table import Table
8+
9+
10+
def load_g_group(imgt_version):
11+
# load the hla_nom_g.txt
12+
ars_g_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt"
13+
try:
14+
response = urlopen(ars_g_url)
15+
lines = [line.decode("utf-8").strip() for line in response]
16+
data_lines = lines[6:] # Skip first 6 header lines
17+
18+
data_tuples = []
19+
for line in data_lines:
20+
if line:
21+
fields = line.split(";")
22+
if len(fields) >= 3 and fields[1] and fields[2]:
23+
locus, a_list, g = fields[0], fields[1], fields[2]
24+
g_name = get_G_name(a_list)
25+
26+
# Explode slash-delimited alleles
27+
for a in a_list.split("/"):
28+
full_a = locus + a
29+
full_g = locus + g_name
30+
data_tuples.append(
31+
(
32+
locus,
33+
full_a,
34+
full_g,
35+
get_2field_allele(full_a),
36+
get_3field_allele(full_a),
37+
get_2field_allele(full_g),
38+
)
39+
)
40+
41+
columns = ["Locus", "A", "G", "2d", "3d", "lgx"]
42+
return Table(data_tuples, columns)
43+
44+
except URLError as e:
45+
print(f"Error downloading {ars_g_url}", e, file=sys.stderr)
46+
sys.exit(1)

0 commit comments

Comments
 (0)