@@ -110,7 +110,7 @@ def reduce(allele, locus, column_name):
110110 return allele
111111 if "*" in allele :
112112 locus_allele = allele
113- elif ard_config [ "locus_in_allele_name" ] :
113+ elif ard_config . get ( "locus_in_allele_name" ) :
114114 locus_allele = allele
115115 else :
116116 locus_allele = f"{ locus } *{ allele } "
@@ -129,7 +129,7 @@ def reduce(allele, locus, column_name):
129129 return allele
130130 # print(f"reduced to '{reduced_allele}'")
131131 if reduced_allele :
132- if ard_config [ "keep_locus_in_allele_name" ] :
132+ if ard_config . get ( "keep_locus_in_allele_name" ) :
133133 allele = reduced_allele
134134 else :
135135 allele = remove_locus_name (reduced_allele )
@@ -139,26 +139,25 @@ def reduce(allele, locus, column_name):
139139 if verbose :
140140 print (f"\t { locus_allele } => { allele } " )
141141 else :
142- if ard_config [ "convert_v2_to_v3" ] :
142+ if ard_config . get ( "convert_v2_to_v3" ) :
143143 if ard .is_v2 (locus_allele ):
144144 v3_allele = ard .v2_to_v3 (locus_allele )
145- if not ard_config [ "keep_locus_in_allele_name" ] :
145+ if not ard_config . get ( "keep_locus_in_allele_name" ) :
146146 allele = remove_locus_name (v3_allele )
147147 else :
148148 allele = v3_allele
149149 if verbose :
150150 print (f"\t { locus_allele } => { allele } " )
151- elif ard_config [ "keep_locus_in_allele_name" ] :
151+ elif ard_config . get ( "keep_locus_in_allele_name" ) :
152152 allele = locus_allele
153153
154154 return allele
155155
156156
157- def clean_locus (allele : str , column_name : str = "Unknown" ) -> str :
157+ def clean_locus (allele : str , locus : str , column_name : str = "Unknown" ) -> str :
158158 if allele :
159159 # Remove all white spaces
160160 allele = white_space_regex .sub ("" , allele )
161- locus = column_name .split ("_" )[1 ].upper ()
162161 # If the allele comes in as an allele list, apply reduce to all alleles
163162 if "/" in allele :
164163 return "/" .join (map (reduce , allele .split ("/" ), locus , column_name ))
@@ -187,17 +186,29 @@ if __name__ == "__main__":
187186 dest = "imgt_version" ,
188187 help = "IPD-IMGT/HLA db to use for redux" ,
189188 )
189+ parser .add_argument (
190+ "-q" ,
191+ "--quiet" ,
192+ dest = "quiet" ,
193+ action = "store_true" ,
194+ default = False ,
195+ help = "Don't print verbose log" ,
196+ )
190197 args = parser .parse_args ()
191198 config_filename = args .config
192199
193200 print ("Using config file:" , config_filename )
194201 with open (config_filename ) as conf_file :
195202 ard_config = json .load (conf_file )
196203
197- verbose = ard_config ["verbose_log" ]
204+ if not args .quiet :
205+ verbose = ard_config .get ("verbose_log" )
206+ else :
207+ verbose = False
208+
198209 white_space_regex = re .compile (r"\s+" )
199210
200- if ard_config [ "output_file_format" ] == "xlsx" :
211+ if ard_config . get ( "output_file_format" ) == "xlsx" :
201212 try :
202213 import openpyxl
203214 except ImportError :
@@ -216,36 +227,48 @@ if __name__ == "__main__":
216227 # Read only the columns to be saved.
217228 # Header is the first row
218229 # Don't convert to NAs
219- df = pd .read_csv (
220- ard_config ["in_csv_filename" ],
221- usecols = ard_config ["columns_from_csv" ],
222- header = 0 ,
223- dtype = str ,
224- keep_default_na = False ,
225- )
230+ try :
231+ df = pd .read_csv (
232+ ard_config ["in_csv_filename" ],
233+ usecols = ard_config ["columns_from_csv" ],
234+ header = 0 ,
235+ dtype = str ,
236+ keep_default_na = False ,
237+ )
238+ except FileNotFoundError as e :
239+ print (f"File not found { ard_config .get ('in_csv_filename' )} " , file = sys .stderr )
240+ sys .exit (1 )
226241
242+ reduce_prefix = "reduced_"
227243 failed_to_reduce_alleles = []
228- # Reduce each of the specified columns
229- for column in ard_config ["columns_to_reduce_in_csv" ]:
230- if verbose :
231- print (f"Column:{ column } =>" )
232- if ard_config ["new_column_for_redux" ]:
233- # insert a new column
234- new_column_name = f"reduced_{ column } "
235- new_column_index = df .columns .get_loc (column ) + 1
236- # Apply clean_locus function to the column and insert as a new column
237- df .insert (
238- new_column_index ,
239- new_column_name ,
240- df [column ].apply (clean_locus , column_name = column ),
241- )
242- else :
243- # Apply clean_locus function to the column and replace the column
244- df [column ] = df [column ].apply (clean_locus , column_name = column )
244+ locus_column_mapping = ard_config ["locus_column_mapping" ]
245+ for subject in locus_column_mapping :
246+ for locus in locus_column_mapping [subject ]:
247+ # Reduce each of the specified columns
248+ locus_columns = locus_column_mapping [subject ][locus ]
249+ for column in locus_columns :
250+ if verbose :
251+ print (f"Column:{ column } =>" )
252+ if ard_config .get ("new_column_for_redux" ):
253+ # insert a new column
254+ new_column_name = f"{ reduce_prefix } { column } "
255+ new_column_index = df .columns .get_loc (column ) + 1
256+ # Apply clean_locus function to the column and insert as a new column
257+ df .insert (
258+ new_column_index ,
259+ new_column_name ,
260+ df [column ].apply (clean_locus , locus = locus , column_name = column ),
261+ )
262+ locus_columns [locus_columns .index (column )] = new_column_name
263+ else :
264+ # Apply clean_locus function to the column and replace the column
265+ df [column ] = df [column ].apply (
266+ clean_locus , locus = locus , column_name = column
267+ )
245268
246269 # Map DRB3,DRB4,DRB5 to DRBX if specified
247270 # New columns DRBX_1 and DRBX_2 are created
248- if ard_config [ "map_drb345_to_drbx" ] :
271+ if ard_config . get ( "map_drb345_to_drbx" ) :
249272 drbx_loci = ["DRB3" , "DRB4" , "DRB5" ]
250273 drbx_columns = [
251274 col_name for col_name in df .columns if col_name .split ("_" )[1 ] in drbx_loci
@@ -257,6 +280,27 @@ if __name__ == "__main__":
257280 )
258281 df ["DRBX_1" ], df ["DRBX_2" ] = zip (* df_drbx )
259282
283+ if ard_config .get ("generate_glstring" ):
284+ for subject in locus_column_mapping :
285+ slug_columns = []
286+ for locus in locus_column_mapping [subject ]:
287+ slug_column = locus + "_slug"
288+ slug_columns .append (slug_column )
289+ if len (locus_column_mapping [subject ][locus ]) > 1 :
290+ df [slug_column ] = (
291+ df [locus_column_mapping [subject ][locus ][0 ]]
292+ + "+"
293+ + df [locus_column_mapping [subject ][locus ][1 ]]
294+ )
295+ else :
296+ df [slug_column ] = df [locus_column_mapping [subject ][locus ][0 ]]
297+
298+ df [subject + "_gl" ] = df [slug_columns ].agg ("^" .join , axis = 1 )
299+ df [subject + "_gl" ] = df [subject + "_gl" ].apply (
300+ lambda gl : gl .replace ("^+" , "" )
301+ )
302+ df .drop (columns = slug_columns , inplace = True )
303+
260304 # Save as XLSX if specified
261305 if ard_config ["output_file_format" ] == "xlsx" :
262306 out_file_name = f"{ ard_config ['out_csv_filename' ]} .xlsx"
0 commit comments