|
28 | 28 | from . import db |
29 | 29 | from .broad_splits import broad_splits_dna_mapping |
30 | 30 | from .broad_splits import broad_splits_ser_mapping |
31 | | -from .misc import get_2field_allele, get_3field_allele, number_of_fields |
| 31 | +from .misc import ( |
| 32 | + get_2field_allele, |
| 33 | + get_3field_allele, |
| 34 | + number_of_fields, |
| 35 | + get_1field_allele, |
| 36 | +) |
32 | 37 | from .misc import expression_chars, get_G_name, get_P_name |
33 | 38 |
|
34 | 39 | # GitHub URL where IMGT HLA files are downloaded. |
@@ -241,9 +246,12 @@ def load_g_group(imgt_version): |
241 | 246 | # A* + 02:01 = A*02:01 |
242 | 247 | df["A"] = df["Locus"] + df["A"] |
243 | 248 | df["G"] = df["Locus"] + df["G"] |
| 249 | + # Create 2,3 field versions of the alleles |
244 | 250 | df["2d"] = df["A"].apply(get_2field_allele) |
245 | 251 | df["3d"] = df["A"].apply(get_3field_allele) |
| 252 | + # lgx is 2 field version of the G group allele |
246 | 253 | df["lgx"] = df["G"].apply(get_2field_allele) |
| 254 | + |
247 | 255 | return df |
248 | 256 |
|
249 | 257 |
|
@@ -384,28 +392,23 @@ def generate_alleles_and_xx_codes_and_who( |
384 | 392 | db.save_dict(db_connection, "xx_codes", flat_xx_codes, ("allele_1d", "allele_list")) |
385 | 393 |
|
386 | 394 | # W H O |
387 | | - who_alleles = set(allele_df["Allele"]) |
| 395 | + who_alleles = allele_df["Allele"].to_list() |
388 | 396 | # Save this version of the WHO alleles |
389 | 397 | db.save_set(db_connection, "who_alleles", who_alleles, "allele") |
| 398 | + |
390 | 399 | # Create WHO mapping from the unique alleles in the 1-field column |
391 | | - unique_alleles = allele_df["Allele"].unique() |
392 | | - who_df1 = pd.DataFrame(unique_alleles, columns=["Allele"]) |
393 | | - who_df1["nd"] = allele_df["Allele"].apply(lambda x: x.split(":")[0]) |
394 | | - # Create WHO mapping from the unique alleles in the 2-field column |
395 | | - who_df2 = pd.DataFrame(unique_alleles, columns=["Allele"]) |
396 | | - who_df2["nd"] = allele_df["Allele"].apply(get_2field_allele) |
397 | | - # Create WHO mapping from the unique alleles in the 3-field column |
398 | | - who_df3 = pd.DataFrame(unique_alleles, columns=["Allele"]) |
399 | | - who_df3["nd"] = allele_df["Allele"].apply(get_3field_allele) |
400 | | - # Combine n-field dataframes in 1 |
401 | | - |
402 | | - # Create g_codes expansion mapping from the same tables used to reduce to G |
403 | | - g_df = pd.DataFrame(list(ars_mappings.g_group.items()), columns=["Allele", "nd"]) |
404 | | - |
405 | | - # Create p_codes expansion mapping from the p_group table |
406 | | - p_df = pd.DataFrame(list(p_group.items()), columns=["Allele", "nd"]) |
407 | | - |
408 | | - who_codes = pd.concat([who_df1, who_df2, who_df3, g_df, p_df]) |
| 400 | + allele_df["1d"] = allele_df["Allele"].apply(get_1field_allele) |
| 401 | + |
| 402 | + who_codes = pd.concat( |
| 403 | + [ |
| 404 | + allele_df[["Allele", "1d"]].rename(columns={"1d": "nd"}), |
| 405 | + allele_df[["Allele", "2d"]].rename(columns={"2d": "nd"}), |
| 406 | + allele_df[["Allele", "3d"]].rename(columns={"3d": "nd"}), |
| 407 | + pd.DataFrame(ars_mappings.g_group.items(), columns=["Allele", "nd"]), |
| 408 | + pd.DataFrame(p_group.items(), columns=["Allele", "nd"]), |
| 409 | + ], |
| 410 | + ignore_index=True, |
| 411 | + ) |
409 | 412 |
|
410 | 413 | # remove valid alleles from who_codes to avoid recursion |
411 | 414 | for k in who_alleles: |
|
0 commit comments