Skip to content

Commit 85b7c11

Browse files
authored
Merge pull request #239 from pbashyal-nmdp/fix_p_group_mappings
Fix 2 and 3 field P group redux
2 parents 95a4ee9 + b598cd7 commit 85b7c11

File tree

4 files changed

+23
-8
lines changed

4 files changed

+23
-8
lines changed

pyard/data_repository.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,6 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
7979
df_g_group = load_g_group(imgt_version)
8080
df_p_group = load_p_group(imgt_version)
8181

82-
# Extract p group mapping
83-
p_group = df_p_group.set_index("A")["P"].to_dict()
84-
8582
# compare df_p_group["2d"] with df_g_group["2d"] to find 2-field alleles in the
8683
# P-group that aren't in the G-group
8784
p_not_in_g = set(df_p_group["2d"]) - set(df_g_group["2d"])
@@ -124,7 +121,7 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
124121
.to_dict()["lgx"]
125122
)
126123

127-
# Creating dictionaries with mac_code->ARD group mapping
124+
# Extract G group mapping
128125
df_g = pd.concat(
129126
[
130127
df_g_group[["2d", "G"]].rename(columns={"2d": "A"}),
@@ -135,6 +132,18 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
135132
)
136133
g_group = df_g.set_index("A")["G"].to_dict()
137134

135+
# Extract P group mapping
136+
df_p = pd.concat(
137+
[
138+
df_p_group[["2d", "P"]].rename(columns={"2d": "A"}),
139+
df_p_group[["3d", "P"]].rename(columns={"3d": "A"}),
140+
df_p_group[["A", "P"]],
141+
],
142+
ignore_index=True,
143+
)
144+
p_group = df_p.set_index("A")["P"].to_dict()
145+
146+
# Extract lgx group mapping
138147
df_lgx = pd.concat(
139148
[
140149
df_g_group[["2d", "lgx"]].rename(columns={"2d": "A"}),
@@ -144,7 +153,7 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
144153
)
145154
lgx_group = df_lgx.set_index("A")["lgx"].to_dict()
146155

147-
# exon
156+
# Extract exon mapping
148157
df_exon = pd.concat(
149158
[
150159
df_g_group[["A", "3d"]].rename(columns={"3d": "exon"}),

pyard/db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def create_db_connection(data_dir, imgt_version, ro=False):
5858
if imgt_version != "Latest":
5959
if not pathlib.Path(db_filename).exists():
6060
all_imgt_versions = get_imgt_db_versions()
61-
if imgt_version not in all_imgt_versions:
61+
if str(imgt_version) not in all_imgt_versions:
6262
raise ValueError(
6363
f"{imgt_version} is not a valid IMGT database version."
6464
)

pyard/load.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def load_p_group(imgt_version):
110110
sys.exit(1)
111111

112112
# the P-group is named for its first allele
113-
df_p["P"] = df_p["A"].apply(get_P_name)
113+
# The P column is already present in the file
114+
# df_p["P"] = df_p["A"].apply(get_P_name)
114115
# convert slash delimited string to a list
115116
df_p["A"] = df_p["A"].apply(lambda a: a.split("/"))
116117
df_p = df_p.explode("A")
@@ -121,6 +122,7 @@ def load_p_group(imgt_version):
121122
# C* 06:06:01:02 06:06P
122123
# C* 06:271 06:06P
123124
df_p["2d"] = df_p["A"].apply(get_2field_allele)
125+
df_p["3d"] = df_p["A"].apply(get_3field_allele)
124126
# lgx has the P-group name without the P for comparison
125127
df_p["lgx"] = df_p["P"].apply(get_2field_allele)
126128
return df_p

tests/features/p_group.feature

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,8 @@ Feature: P Groups
99
Examples:
1010
| Allele | Level | Redux Allele |
1111
| B*44:15:01:01 | P | B*44:15P |
12-
| A*02:01:01 | P | A*02:01:01 |
12+
| A*02:01:01 | P | A*02:01P |
13+
| B*07:02 | P | B*07:02P |
14+
| B*07:02:01 | P | B*07:02P |
15+
| B*07:02:01:01 | P | B*07:02P |
16+
| B*15:14 | P | B*15:14P |

0 commit comments

Comments
 (0)