@@ -81,6 +81,8 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
8181
8282 df ['2d' ] = df ['A' ].apply (get_2field_allele )
8383 df ['3d' ] = df ['A' ].apply (get_3field_allele )
84+ df ['lg' ] = df ['G' ].apply (lambda a : ":" .join (a .split (":" )[0 :2 ]) + "g" )
85+ df ['lgx' ] = df ['G' ].apply (lambda a : ":" .join (a .split (":" )[0 :2 ]))
8486
8587 mg = df .drop_duplicates (['2d' , 'G' ])['2d' ].value_counts ()
8688 multiple_g_list = mg [mg > 1 ].reset_index ()['index' ].to_list ()
@@ -90,8 +92,23 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
9092 .groupby ('2d' , as_index = True ).agg ("/" .join ) \
9193 .to_dict ()['G' ]
9294
93- df ['lg' ] = df ['G' ].apply (lambda a : ":" .join (a .split (":" )[0 :2 ]) + "g" )
94- df ['lgx' ] = df ['G' ].apply (lambda a : ":" .join (a .split (":" )[0 :2 ]))
95+ mlg = df .drop_duplicates (['2d' , 'lg' ])['2d' ].value_counts ()
96+ multiple_lg_list = mlg [mlg > 1 ].reset_index ()['index' ].to_list ()
97+
98+ dup_lg = df [df ['2d' ].isin (multiple_lg_list )][['lg' , '2d' ]] \
99+ .drop_duplicates () \
100+ .groupby ('2d' , as_index = True ).agg ("/" .join ) \
101+ .to_dict ()['lg' ]
102+
103+ mlgx = df .drop_duplicates (['2d' , 'lgx' ])['2d' ].value_counts ()
104+ multiple_lgx_list = mlgx [mlgx > 1 ].reset_index ()['index' ].to_list ()
105+
106+ dup_lgx = df [df ['2d' ].isin (multiple_lgx_list )][['lgx' , '2d' ]] \
107+ .drop_duplicates () \
108+ .groupby ('2d' , as_index = True ).agg ("/" .join ) \
109+ .to_dict ()['lgx' ]
110+
111+
95112
96113 # Creating dictionaries with mac_code->ARS group mapping
97114 df_g = pd .concat ([
@@ -116,11 +133,13 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
116133 lgx_group = df_lgx .set_index ('A' )['lgx' ].to_dict ()
117134
118135 db .save_dict (db_connection , table_name = 'dup_g' , dictionary = dup_g , columns = ('allele' , 'g_group' ))
136+ db .save_dict (db_connection , table_name = 'dup_lg' , dictionary = dup_lg , columns = ('allele' , 'lg_group' ))
137+ db .save_dict (db_connection , table_name = 'dup_lgx' , dictionary = dup_lgx , columns = ('allele' , 'lgx_group' ))
119138 db .save_dict (db_connection , table_name = 'g_group' , dictionary = g_group , columns = ('allele' , 'g' ))
120139 db .save_dict (db_connection , table_name = 'lg_group' , dictionary = lg_group , columns = ('allele' , 'lg' ))
121140 db .save_dict (db_connection , table_name = 'lgx_group' , dictionary = lgx_group , columns = ('allele' , 'lgx' ))
122141
123- return dup_g , g_group , lg_group , lgx_group
142+ return dup_g , dup_lg , dup_lgx , g_group , lg_group , lgx_group
124143
125144
126145def generate_alleles_and_xx_codes (db_connection : sqlite3 .Connection , imgt_version ):
0 commit comments