Skip to content

Commit 2917c0e

Browse files
authored
Merge pull request #188 from pbashyal-nmdp/remove_lg_table
Remove lg tables and corresponding dicts
2 parents 7544e83 + 53d7522 commit 2917c0e

File tree

2 files changed

+10
-62
lines changed

2 files changed

+10
-62
lines changed

pyard/data_repository.py

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,8 @@
3838

3939
ars_mapping_tables = [
4040
"dup_g",
41-
"dup_lg",
4241
"dup_lgx",
4342
"g_group",
44-
"lg_group",
4543
"lgx_group",
4644
"exon_group",
4745
"p_group",
@@ -81,18 +79,12 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
8179
dup_g = db.load_dict(
8280
db_connection, table_name="dup_g", columns=("allele", "g_group")
8381
)
84-
dup_lg = db.load_dict(
85-
db_connection, table_name="dup_lg", columns=("allele", "lg_group")
86-
)
8782
dup_lgx = db.load_dict(
8883
db_connection, table_name="dup_lgx", columns=("allele", "lgx_group")
8984
)
9085
g_group = db.load_dict(
9186
db_connection, table_name="g_group", columns=("allele", "g")
9287
)
93-
lg_group = db.load_dict(
94-
db_connection, table_name="lg_group", columns=("allele", "lg")
95-
)
9688
lgx_group = db.load_dict(
9789
db_connection, table_name="lgx_group", columns=("allele", "lgx")
9890
)
@@ -107,10 +99,8 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
10799
)
108100
return ARSMapping(
109101
dup_g=dup_g,
110-
dup_lg=dup_lg,
111102
dup_lgx=dup_lgx,
112103
g_group=g_group,
113-
lg_group=lg_group,
114104
lgx_group=lgx_group,
115105
exon_group=exon_group,
116106
p_group=p_group,
@@ -159,7 +149,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
159149

160150
df["2d"] = df["A"].apply(get_2field_allele)
161151
df["3d"] = df["A"].apply(get_3field_allele)
162-
df["lg"] = df["G"].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
163152
df["lgx"] = df["G"].apply(lambda a: ":".join(a.split(":")[0:2]))
164153

165154
# compare df_P["2d"] with df["2d"] to find 2-field alleles in the
@@ -191,19 +180,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
191180
.to_dict()["G"]
192181
)
193182

194-
# multiple lg
195-
mlg = df.drop_duplicates(["2d", "lg"])["2d"].value_counts()
196-
multiple_lg_list = mlg[mlg > 1].reset_index()["index"].to_list()
197-
198-
# Keep only the alleles that have more than 1 mapping
199-
dup_lg = (
200-
df[df["2d"].isin(multiple_lg_list)][["lg", "2d"]]
201-
.drop_duplicates()
202-
.groupby("2d", as_index=True)
203-
.agg("/".join)
204-
.to_dict()["lg"]
205-
)
206-
207183
# multiple lgx
208184
mlgx = df.drop_duplicates(["2d", "lgx"])["2d"].value_counts()
209185
multiple_lgx_list = mlgx[mlgx > 1].reset_index()["index"].to_list()
@@ -228,15 +204,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
228204
)
229205
g_group = df_g.set_index("A")["G"].to_dict()
230206

231-
df_lg = pd.concat(
232-
[
233-
df[["2d", "lg"]].rename(columns={"2d": "A"}),
234-
df[["3d", "lg"]].rename(columns={"3d": "A"}),
235-
df[["A", "lg"]],
236-
]
237-
)
238-
lg_group = df_lg.set_index("A")["lg"].to_dict()
239-
240207
df_lgx = pd.concat(
241208
[
242209
df[["2d", "lgx"]].rename(columns={"2d": "A"}),
@@ -267,12 +234,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
267234
dictionary=dup_g,
268235
columns=("allele", "g_group"),
269236
)
270-
db.save_dict(
271-
db_connection,
272-
table_name="dup_lg",
273-
dictionary=dup_lg,
274-
columns=("allele", "lg_group"),
275-
)
276237
db.save_dict(
277238
db_connection,
278239
table_name="dup_lgx",
@@ -282,12 +243,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
282243
db.save_dict(
283244
db_connection, table_name="g_group", dictionary=g_group, columns=("allele", "g")
284245
)
285-
db.save_dict(
286-
db_connection,
287-
table_name="lg_group",
288-
dictionary=lg_group,
289-
columns=("allele", "lg"),
290-
)
291246
db.save_dict(
292247
db_connection,
293248
table_name="lgx_group",
@@ -309,10 +264,8 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
309264

310265
return ARSMapping(
311266
dup_g=dup_g,
312-
dup_lg=dup_lg,
313267
dup_lgx=dup_lgx,
314268
g_group=g_group,
315-
lg_group=lg_group,
316269
lgx_group=lgx_group,
317270
exon_group=exon_group,
318271
p_group=p_group,
@@ -434,7 +387,7 @@ def generate_alleles_and_xx_codes_and_who(
434387

435388
# W H O
436389
who_alleles = set(allele_df["Allele"])
437-
# Save this version of the who alleles
390+
# Save this version of the WHO alleles
438391
db.save_set(db_connection, "who_alleles", who_alleles, "allele")
439392
# Create WHO mapping from the unique alleles in the 1-field column
440393
unique_alleles = allele_df["Allele"].unique()

pyard/pyard.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -187,24 +187,19 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) ->
187187
return self.ars_mappings.dup_g[allele]
188188
else:
189189
return self.ars_mappings.g_group[allele]
190-
elif redux_type == "lg":
191-
if allele in self.ars_mappings.dup_lg:
192-
return self.ars_mappings.dup_lg[allele]
193-
elif allele in self.ars_mappings.lg_group:
194-
return self.ars_mappings.lg_group[allele]
195-
else:
196-
# for 'lg' when allele is not in G group,
197-
# return allele with only first 2 field
198-
return ":".join(allele.split(":")[0:2]) + "g"
199-
elif redux_type == "lgx":
190+
elif redux_type in ["lgx", "lg"]:
200191
if allele in self.ars_mappings.dup_lgx:
201-
return self.ars_mappings.dup_lgx[allele]
192+
redux_allele = self.ars_mappings.dup_lgx[allele]
202193
elif allele in self.ars_mappings.lgx_group:
203-
return self.ars_mappings.lgx_group[allele]
194+
redux_allele = self.ars_mappings.lgx_group[allele]
204195
else:
205-
# for 'lgx' when allele is not in G group,
196+
# for 'lgx' or 'lg' mode when allele is not in G group,
206197
# return allele with only first 2 field
207-
return ":".join(allele.split(":")[0:2])
198+
redux_allele = ":".join(allele.split(":")[0:2])
199+
if redux_type == "lg":
200+
# lg mode has g appended with lgx reduction
201+
return redux_allele + "g"
202+
return redux_allele
208203
elif redux_type == "W":
209204
# new redux_type which is full WHO expansion
210205
if self._is_who_allele(allele):

0 commit comments

Comments
 (0)