nmdp-bioinformatics
diff --git a/‎pyard/data_repository.py‎
Lines changed: 160 additions & 156 deletions b/‎pyard/data_repository.py‎
Lines changed: 160 additions & 156 deletions
diff --git a/‎pyard/db.py‎
Lines changed: 1 addition & 3 deletions b/‎pyard/db.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎pyard/load.py‎
Lines changed: 0 additions & 317 deletions b/‎pyard/load.py‎
Lines changed: 0 additions & 317 deletions
diff --git a/‎pyard/CWD2.csv‎ renamed to ‎pyard/loader/CWD2.csv‎ b/‎pyard/CWD2.csv‎ renamed to ‎pyard/loader/CWD2.csv‎
diff --git a/‎pyard/loader/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎pyard/loader/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pyard/loader/cwd.py‎
Lines changed: 14 additions & 0 deletions b/‎pyard/loader/cwd.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎pyard/loader/g_group.py‎
Lines changed: 46 additions & 0 deletions b/‎pyard/loader/g_group.py‎
Lines changed: 46 additions & 0 deletions
@@ -641,12 +641,10 @@ def load_serology_associated_mappings(db_connection):
 
 
 def save_serology_broad_split_mappings(db_connection, sero_mapping):
-    # Save the `splits` as a "/" delimited string to db
-    sero_splits = {sero: "/".join(splits) for sero, splits in sero_mapping.items()}
     save_dict(
         db_connection,
         table_name="serology_broad_split_mapping",
-        dictionary=sero_splits,
+        dictionary=sero_mapping,
         columns=("broad", "splits"),
     )
 
 
@@ -0,0 +1,2 @@
+# GitHub URL where IMGT HLA files are downloaded.
+IMGT_HLA_URL = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/"
@@ -0,0 +1,14 @@
+import os
+import csv
+
+
+def load_cwd2():
+    cwd_csv_path = os.path.join(os.path.dirname(__file__), "CWD2.csv")
+    cwd_map = {}
+
+    with open(cwd_csv_path, "r") as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            cwd_map[row["ALLELE"]] = row["LOCUS"]
+
+    return cwd_map
@@ -0,0 +1,46 @@
+import sys
+from urllib.error import URLError
+from urllib.request import urlopen
+
+from ..loader import IMGT_HLA_URL
+from ..misc import get_G_name, get_2field_allele, get_3field_allele
+from ..simple_table import Table
+
+
+def load_g_group(imgt_version):
+    # load the hla_nom_g.txt
+    ars_g_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt"
+    try:
+        response = urlopen(ars_g_url)
+        lines = [line.decode("utf-8").strip() for line in response]
+        data_lines = lines[6:]  # Skip first 6 header lines
+
+        data_tuples = []
+        for line in data_lines:
+            if line:
+                fields = line.split(";")
+                if len(fields) >= 3 and fields[1] and fields[2]:
+                    locus, a_list, g = fields[0], fields[1], fields[2]
+                    g_name = get_G_name(a_list)
+
+                    # Explode slash-delimited alleles
+                    for a in a_list.split("/"):
+                        full_a = locus + a
+                        full_g = locus + g_name
+                        data_tuples.append(
+                            (
+                                locus,
+                                full_a,
+                                full_g,
+                                get_2field_allele(full_a),
+                                get_3field_allele(full_a),
+                                get_2field_allele(full_g),
+                            )
+                        )
+
+        columns = ["Locus", "A", "G", "2d", "3d", "lgx"]
+        return Table(data_tuples, columns)
+
+    except URLError as e:
+        print(f"Error downloading {ars_g_url}", e, file=sys.stderr)
+        sys.exit(1)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# GitHub URL where IMGT HLA files are downloaded.`
	`2`	`+IMGT_HLA_URL = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/"`