@@ -110,7 +110,7 @@ def reduce(allele, locus, column_name):
110110 return allele
111111 if "*" in allele :
112112 locus_allele = allele
113- elif ard_config [ "locus_in_allele_name" ] :
113+ elif ard_config . get ( "locus_in_allele_name" ) :
114114 locus_allele = allele
115115 else :
116116 locus_allele = f"{ locus } *{ allele } "
@@ -129,7 +129,7 @@ def reduce(allele, locus, column_name):
129129 return allele
130130 # print(f"reduced to '{reduced_allele}'")
131131 if reduced_allele :
132- if ard_config [ "keep_locus_in_allele_name" ] :
132+ if ard_config . get ( "keep_locus_in_allele_name" ) :
133133 allele = reduced_allele
134134 else :
135135 allele = remove_locus_name (reduced_allele )
@@ -139,16 +139,16 @@ def reduce(allele, locus, column_name):
139139 if verbose :
140140 print (f"\t { locus_allele } => { allele } " )
141141 else :
142- if ard_config [ "convert_v2_to_v3" ] :
142+ if ard_config . get ( "convert_v2_to_v3" ) :
143143 if ard .is_v2 (locus_allele ):
144144 v3_allele = ard .v2_to_v3 (locus_allele )
145- if not ard_config [ "keep_locus_in_allele_name" ] :
145+ if not ard_config . get ( "keep_locus_in_allele_name" ) :
146146 allele = remove_locus_name (v3_allele )
147147 else :
148148 allele = v3_allele
149149 if verbose :
150150 print (f"\t { locus_allele } => { allele } " )
151- elif ard_config [ "keep_locus_in_allele_name" ] :
151+ elif ard_config . get ( "keep_locus_in_allele_name" ) :
152152 allele = locus_allele
153153
154154 return allele
@@ -186,17 +186,29 @@ if __name__ == "__main__":
186186 dest = "imgt_version" ,
187187 help = "IPD-IMGT/HLA db to use for redux" ,
188188 )
189+ parser .add_argument (
190+ "-q" ,
191+ "--quiet" ,
192+ dest = "quiet" ,
193+ action = "store_true" ,
194+ default = False ,
195+ help = "Don't print verbose log" ,
196+ )
189197 args = parser .parse_args ()
190198 config_filename = args .config
191199
192200 print ("Using config file:" , config_filename )
193201 with open (config_filename ) as conf_file :
194202 ard_config = json .load (conf_file )
195203
196- verbose = ard_config ["verbose_log" ]
204+ if not args .quiet :
205+ verbose = ard_config .get ("verbose_log" )
206+ else :
207+ verbose = False
208+
197209 white_space_regex = re .compile (r"\s+" )
198210
199- if ard_config [ "output_file_format" ] == "xlsx" :
211+ if ard_config . get ( "output_file_format" ) == "xlsx" :
200212 try :
201213 import openpyxl
202214 except ImportError :
@@ -224,24 +236,20 @@ if __name__ == "__main__":
224236 keep_default_na = False ,
225237 )
226238 except FileNotFoundError as e :
227- print (f"File not found { ard_config [ 'in_csv_filename' ] } " , file = sys .stderr )
239+ print (f"File not found { ard_config . get ( 'in_csv_filename' ) } " , file = sys .stderr )
228240 sys .exit (1 )
229241
230242 reduce_prefix = "reduced_"
231243 failed_to_reduce_alleles = []
232- reduced_column_mappings = {}
233244 locus_column_mapping = ard_config ["locus_column_mapping" ]
234245 for subject in locus_column_mapping :
235- reduced_column_mappings [subject ] = {}
236246 for locus in locus_column_mapping [subject ]:
237- if locus not in reduced_column_mappings [subject ]:
238- reduced_column_mappings [subject ][locus ] = []
239247 # Reduce each of the specified columns
240248 locus_columns = locus_column_mapping [subject ][locus ]
241249 for column in locus_columns :
242250 if verbose :
243251 print (f"Column:{ column } =>" )
244- if ard_config [ "new_column_for_redux" ] :
252+ if ard_config . get ( "new_column_for_redux" ) :
245253 # insert a new column
246254 new_column_name = f"{ reduce_prefix } { column } "
247255 new_column_index = df .columns .get_loc (column ) + 1
@@ -251,17 +259,16 @@ if __name__ == "__main__":
251259 new_column_name ,
252260 df [column ].apply (clean_locus , locus = locus , column_name = column ),
253261 )
254- reduced_column_mappings [ subject ][ locus ]. append ( new_column_name )
262+ locus_columns [ locus_columns . index ( column )] = new_column_name
255263 else :
256264 # Apply clean_locus function to the column and replace the column
257265 df [column ] = df [column ].apply (
258266 clean_locus , locus = locus , column_name = column
259267 )
260- reduced_column_mappings [subject ][locus ].append (column )
261268
262269 # Map DRB3,DRB4,DRB5 to DRBX if specified
263270 # New columns DRBX_1 and DRBX_2 are created
264- if ard_config [ "map_drb345_to_drbx" ] :
271+ if ard_config . get ( "map_drb345_to_drbx" ) :
265272 drbx_loci = ["DRB3" , "DRB4" , "DRB5" ]
266273 drbx_columns = [
267274 col_name for col_name in df .columns if col_name .split ("_" )[1 ] in drbx_loci
@@ -273,18 +280,26 @@ if __name__ == "__main__":
273280 )
274281 df ["DRBX_1" ], df ["DRBX_2" ] = zip (* df_drbx )
275282
276- if ard_config ["generate_glstring" ]:
277- for subject in reduced_column_mappings :
278- for haplotype_num in range (2 ):
279- hap1_columns = list (
280- map (
281- lambda x : reduced_column_mappings [subject ][x ][haplotype_num ],
282- reduced_column_mappings [subject ].keys (),
283+ if ard_config .get ("generate_glstring" ):
284+ for subject in locus_column_mapping :
285+ slug_columns = []
286+ for locus in locus_column_mapping [subject ]:
287+ slug_column = locus + "_slug"
288+ slug_columns .append (slug_column )
289+ if len (locus_column_mapping [subject ][locus ]) > 1 :
290+ df [slug_column ] = (
291+ df [locus_column_mapping [subject ][locus ][0 ]]
292+ + "+"
293+ + df [locus_column_mapping [subject ][locus ][1 ]]
283294 )
284- )
285- df [subject + f"_haplotype_{ (haplotype_num + 1 )} " ] = df [
286- hap1_columns
287- ].agg ("~" .join , axis = 1 )
295+ else :
296+ df [slug_column ] = df [locus_column_mapping [subject ][locus ][0 ]]
297+
298+ df [subject + "_gl" ] = df [slug_columns ].agg ("^" .join , axis = 1 )
299+ df [subject + "_gl" ] = df [subject + "_gl" ].apply (
300+ lambda gl : gl .replace ("^+" , "" )
301+ )
302+ df .drop (columns = slug_columns , inplace = True )
288303
289304 # Save as XLSX if specified
290305 if ard_config ["output_file_format" ] == "xlsx" :
0 commit comments