2020# > http://www.fsf.org/licensing/licenses/lgpl.html
2121# > http://www.opensource.org/licenses/lgpl-license.php
2222#
23+ from collections import namedtuple
2324import functools
2425import sqlite3
2526
3738# List of expression characters
3839expression_chars = ['N' , 'Q' , 'L' , 'S' ]
3940
41+ ars_mapping_tables = ['dup_g' , 'dup_lg' , 'dup_lgx' , 'g_group' , 'lg_group' , 'lgx_group' ]
42+ ARSMapping = namedtuple ("ARSMapping" , ars_mapping_tables )
43+
4044
4145def get_n_field_allele (allele : str , n : int ) -> str :
4246 """
@@ -64,12 +68,15 @@ def get_2field_allele(a: str) -> str:
6468
6569
6670def generate_ars_mapping (db_connection : sqlite3 .Connection , imgt_version ):
67- if db .tables_exists (db_connection , [ 'dup_g' , 'g_group' , 'lg_group' , 'lgx_group' ] ):
71+ if db .tables_exists (db_connection , ars_mapping_tables ):
6872 dup_g = db .load_dict (db_connection , table_name = 'dup_g' , columns = ('allele' , 'g_group' ))
73+ dup_lg = db .load_dict (db_connection , table_name = 'dup_lg' , columns = ('allele' , 'lg_group' ))
74+ dup_lgx = db .load_dict (db_connection , table_name = 'dup_lgx' , columns = ('allele' , 'lgx_group' ))
6975 g_group = db .load_dict (db_connection , table_name = 'g_group' , columns = ('allele' , 'g' ))
7076 lg_group = db .load_dict (db_connection , table_name = 'lg_group' , columns = ('allele' , 'lg' ))
7177 lgx_group = db .load_dict (db_connection , table_name = 'lgx_group' , columns = ('allele' , 'lgx' ))
72- return dup_g , g_group , lg_group , lgx_group
78+ return ARSMapping (dup_g = dup_g , dup_lg = dup_lg , dup_lgx = dup_lgx ,
79+ g_group = g_group , lg_group = lg_group , lgx_group = lgx_group )
7380
7481 ars_url = f'{ IMGT_HLA_URL } { imgt_version } /wmda/hla_nom_g.txt'
7582 df = pd .read_csv (ars_url , skiprows = 6 , names = ["Locus" , "A" , "G" ], sep = ";" ).dropna ()
@@ -84,32 +91,36 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
8491 df ['lg' ] = df ['G' ].apply (lambda a : ":" .join (a .split (":" )[0 :2 ]) + "g" )
8592 df ['lgx' ] = df ['G' ].apply (lambda a : ":" .join (a .split (":" )[0 :2 ]))
8693
94+ # multiple Gs
8795 mg = df .drop_duplicates (['2d' , 'G' ])['2d' ].value_counts ()
8896 multiple_g_list = mg [mg > 1 ].reset_index ()['index' ].to_list ()
8997
98+ # Keep only the alleles that have more than 1 mapping
9099 dup_g = df [df ['2d' ].isin (multiple_g_list )][['G' , '2d' ]] \
91100 .drop_duplicates () \
92101 .groupby ('2d' , as_index = True ).agg ("/" .join ) \
93102 .to_dict ()['G' ]
94103
104+ # multiple lg
95105 mlg = df .drop_duplicates (['2d' , 'lg' ])['2d' ].value_counts ()
96106 multiple_lg_list = mlg [mlg > 1 ].reset_index ()['index' ].to_list ()
97107
108+ # Keep only the alleles that have more than 1 mapping
98109 dup_lg = df [df ['2d' ].isin (multiple_lg_list )][['lg' , '2d' ]] \
99110 .drop_duplicates () \
100111 .groupby ('2d' , as_index = True ).agg ("/" .join ) \
101112 .to_dict ()['lg' ]
102113
114+ # multiple lgx
103115 mlgx = df .drop_duplicates (['2d' , 'lgx' ])['2d' ].value_counts ()
104116 multiple_lgx_list = mlgx [mlgx > 1 ].reset_index ()['index' ].to_list ()
105117
118+ # Keep only the alleles that have more than 1 mapping
106119 dup_lgx = df [df ['2d' ].isin (multiple_lgx_list )][['lgx' , '2d' ]] \
107120 .drop_duplicates () \
108121 .groupby ('2d' , as_index = True ).agg ("/" .join ) \
109122 .to_dict ()['lgx' ]
110123
111-
112-
113124 # Creating dictionaries with mac_code->ARS group mapping
114125 df_g = pd .concat ([
115126 df [['2d' , 'G' ]].rename (columns = {'2d' : 'A' }),
@@ -139,7 +150,8 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
139150 db .save_dict (db_connection , table_name = 'lg_group' , dictionary = lg_group , columns = ('allele' , 'lg' ))
140151 db .save_dict (db_connection , table_name = 'lgx_group' , dictionary = lgx_group , columns = ('allele' , 'lgx' ))
141152
142- return dup_g , dup_lg , dup_lgx , g_group , lg_group , lgx_group
153+ return ARSMapping (dup_g = dup_g , dup_lg = dup_lg , dup_lgx = dup_lgx ,
154+ g_group = g_group , lg_group = lg_group , lgx_group = lgx_group )
143155
144156
145157def generate_alleles_and_xx_codes (db_connection : sqlite3 .Connection , imgt_version ):
@@ -350,7 +362,8 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
350362
351363 # re-sort allele lists into smartsort order
352364 for sero in sero_mapping .keys ():
353- sero_mapping [sero ] = '/' .join (sorted (sero_mapping [sero ].split ('/' ), key = functools .cmp_to_key (smart_sort_comparator )))
365+ sero_mapping [sero ] = '/' .join (
366+ sorted (sero_mapping [sero ].split ('/' ), key = functools .cmp_to_key (smart_sort_comparator )))
354367
355368 # Save the serology mapping to db
356369 db .save_dict (db_connection , table_name = 'serology_mapping' ,
0 commit comments