Skip to content

Commit 5be407b

Browse files
committed
Only single lgx reductions
- For `lgx` reduction, tracking of duplicate alleles is removed. For duplicate alleles, map it to the lower numbered allele in the list.
1 parent 3caf90f commit 5be407b

File tree

5 files changed

+28
-31
lines changed

5 files changed

+28
-31
lines changed

pyard/ard.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,7 @@ def _redux_allele(
246246
elif redux_type == "P" and allele in self.ars_mappings.p_group:
247247
return self.ars_mappings.p_group[allele]
248248
elif redux_type in ["lgx", "lg"]:
249-
if allele in self.ars_mappings.dup_lgx:
250-
redux_allele = self.ars_mappings.dup_lgx[allele]
251-
elif allele in self.ars_mappings.lgx_group:
249+
if allele in self.ars_mappings.lgx_group:
252250
redux_allele = self.ars_mappings.lgx_group[allele]
253251
else:
254252
# for 'lgx' or 'lg' mode when allele is not in G group,

pyard/data_repository.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
get_1field_allele,
5151
)
5252
from .serology import broad_splits_dna_mapping, SerologyMapping
53+
from .smart_sort import smart_sort_comparator
5354

5455

5556
def expression_reduce(df):
@@ -113,15 +114,6 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
113114
mlgx = df_g_group.drop_duplicates(["2d", "lgx"])["2d"].value_counts()
114115
multiple_lgx_list = mlgx[mlgx > 1].index.to_list()
115116

116-
# Keep only the alleles that have more than 1 mapping
117-
dup_lgx = (
118-
df_g_group[df_g_group["2d"].isin(multiple_lgx_list)][["lgx", "2d"]]
119-
.drop_duplicates()
120-
.groupby("2d", as_index=True)
121-
.agg("/".join)
122-
.to_dict()["lgx"]
123-
)
124-
125117
# Extract G group mapping
126118
df_g = pd.concat(
127119
[
@@ -154,6 +146,25 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
154146
)
155147
lgx_group = df_lgx.set_index("A")["lgx"].to_dict()
156148

149+
# Find the alleles that have more than 1 mapping
150+
dup_lgx = (
151+
df_g_group[df_g_group["2d"].isin(multiple_lgx_list)][["lgx", "2d"]]
152+
.drop_duplicates()
153+
.groupby("2d", as_index=True)
154+
.agg(list)
155+
.to_dict()["lgx"]
156+
)
157+
print(dup_lgx)
158+
# Do not keep duplicate alleles for lgx. Issue #333
159+
# DPA1*02:02/DPA1*02:07 ==> DPA1*02:02
160+
#
161+
lowest_numbered_dup_lgx = {
162+
k: sorted(v, key=functools.cmp_to_key(smart_sort_comparator))[0]
163+
for k, v in dup_lgx.items()
164+
}
165+
# Update the lgx_group with the allele with the lowest number
166+
lgx_group.update(lowest_numbered_dup_lgx)
167+
157168
# Extract exon mapping
158169
df_exon = pd.concat(
159170
[
@@ -164,7 +175,6 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS
164175

165176
ars_mapping = ARSMapping(
166177
dup_g=dup_g,
167-
dup_lgx=dup_lgx,
168178
g_group=g_group,
169179
p_group=p_group,
170180
lgx_group=lgx_group,

pyard/db.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -461,9 +461,6 @@ def set_user_version(connection: sqlite3.Connection, version: int):
461461

462462
def load_ars_mappings(db_connection):
463463
dup_g = load_dict(db_connection, table_name="dup_g", columns=("allele", "g_group"))
464-
dup_lgx = load_dict(
465-
db_connection, table_name="dup_lgx", columns=("allele", "lgx_group")
466-
)
467464
g_group = load_dict(db_connection, table_name="g_group", columns=("allele", "g"))
468465
p_group = load_dict(db_connection, table_name="p_group", columns=("allele", "p"))
469466
lgx_group = load_dict(
@@ -475,7 +472,6 @@ def load_ars_mappings(db_connection):
475472
p_not_g = load_dict(db_connection, table_name="p_not_g", columns=("allele", "lgx"))
476473
return ARSMapping(
477474
dup_g=dup_g,
478-
dup_lgx=dup_lgx,
479475
g_group=g_group,
480476
p_group=p_group,
481477
lgx_group=lgx_group,
@@ -497,12 +493,6 @@ def save_ars_mappings(db_connection: sqlite3.Connection, ars_mapping: ARSMapping
497493
dictionary=ars_mapping.dup_g,
498494
columns=("allele", "g_group"),
499495
)
500-
save_dict(
501-
db_connection,
502-
table_name="dup_lgx",
503-
dictionary=ars_mapping.dup_lgx,
504-
columns=("allele", "lgx_group"),
505-
)
506496
save_dict(
507497
db_connection,
508498
table_name="g_group",

pyard/mappings.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
ars_mapping_tables = [
2525
"dup_g",
26-
"dup_lgx",
2726
"g_group",
2827
"p_group",
2928
"lgx_group",

tests/features/p_g_group.feature

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ Feature: P and G Groups
9090
| C*02:10 | lg | C*02:02g |
9191
| C*02:10 | lgx | C*02:02 |
9292

93-
Examples: lgx with duplicates
94-
| Allele | Level | Redux Allele |
95-
| DPA1*02:12 | lgx | DPA1*02:02/DPA1*02:07 |
96-
| DPA1*02:12 | lg | DPA1*02:02g/DPA1*02:07g |
97-
| DQA1*03:03 | lgx | DQA1*03:01 |
98-
| DQA1*03:03 | lg | DQA1*03:01g |
99-
| DQA1*03:03:09 | lg | DQA1*03:03g |
93+
Examples: lgx redux with duplicate G groups
94+
| Allele | Level | Redux Allele |
95+
| DPA1*02:12 | lgx | DPA1*02:02 |
96+
| DPA1*02:12 | lg | DPA1*02:02g |
97+
| DQA1*03:03 | lgx | DQA1*03:01 |
98+
| DQA1*03:03 | lg | DQA1*03:01g |
99+
| DQA1*03:03:09 | lg | DQA1*03:03g |

0 commit comments

Comments
 (0)