11"""Main command line interface to DeepLC."""
22
33__author__ = ["Robbin Bouwmeester" , "Ralf Gabriels" ]
4- __credits__ = ["Robbin Bouwmeester" , "Ralf Gabriels" , "Prof. Lennart Martens" , "Sven Degroeve" ]
4+ __credits__ = [
5+ "Robbin Bouwmeester" ,
6+ "Ralf Gabriels" ,
7+ "Prof. Lennart Martens" ,
8+ "Sven Degroeve" ,
9+ ]
510__license__ = "Apache License, Version 2.0"
611__maintainer__ = ["Robbin Bouwmeester" , "Ralf Gabriels" ]
7121217import warnings
1318
1419import pandas as pd
20+ from psm_utils .io import read_file
1521from psm_utils .io .peptide_record import peprec_to_proforma
1622from psm_utils .psm import PSM
1723from psm_utils .psm_list import PSMList
18- from psm_utils .io import read_file
1924
20- from deeplc import __version__ , DeepLC , FeatExtractor
25+ from deeplc import DeepLC , __version__
2126from deeplc ._argument_parser import parse_arguments
2227from deeplc ._exceptions import DeepLCError
2328
2631
2732def setup_logging (passed_level ):
2833 log_mapping = {
29- ' critical' : logging .CRITICAL ,
30- ' error' : logging .ERROR ,
31- ' warning' : logging .WARNING ,
32- ' info' : logging .INFO ,
33- ' debug' : logging .DEBUG ,
34+ " critical" : logging .CRITICAL ,
35+ " error" : logging .ERROR ,
36+ " warning" : logging .WARNING ,
37+ " info" : logging .INFO ,
38+ " debug" : logging .DEBUG ,
3439 }
3540
3641 if passed_level .lower () not in log_mapping :
3742 print (
3843 "Invalid log level. Should be one of the following: " ,
39- ', ' .join (log_mapping .keys ())
44+ ", " .join (log_mapping .keys ()),
4045 )
4146 exit (1 )
4247
4348 logging .basicConfig (
4449 stream = sys .stdout ,
45- format = ' %(asctime)s - %(levelname)s - %(message)s' ,
46- datefmt = ' %Y-%m-%d %H:%M:%S' ,
47- level = log_mapping [passed_level .lower ()]
50+ format = " %(asctime)s - %(levelname)s - %(message)s" ,
51+ datefmt = " %Y-%m-%d %H:%M:%S" ,
52+ level = log_mapping [passed_level .lower ()],
4853 )
4954
55+
5056def main (gui = False ):
5157 """Main function for the CLI."""
5258 argu = parse_arguments (gui = gui )
@@ -55,13 +61,13 @@ def main(gui=False):
5561
5662 # Reset logging levels if DEBUG (see deeplc.py)
5763 if argu .log_level .lower () == "debug" :
58- os .environ [' TF_CPP_MIN_LOG_LEVEL' ] = '0'
59- logging .getLogger (' tensorflow' ).setLevel (logging .DEBUG )
60- warnings .filterwarnings (' default' , category = DeprecationWarning )
61- warnings .filterwarnings (' default' , category = FutureWarning )
62- warnings .filterwarnings (' default' , category = UserWarning )
64+ os .environ [" TF_CPP_MIN_LOG_LEVEL" ] = "0"
65+ logging .getLogger (" tensorflow" ).setLevel (logging .DEBUG )
66+ warnings .filterwarnings (" default" , category = DeprecationWarning )
67+ warnings .filterwarnings (" default" , category = FutureWarning )
68+ warnings .filterwarnings (" default" , category = UserWarning )
6369 else :
64- os .environ [' KMP_WARNINGS' ] = '0'
70+ os .environ [" KMP_WARNINGS" ] = "0"
6571
6672 try :
6773 run (** vars (argu ))
@@ -101,13 +107,13 @@ def run(
101107 for fm in file_model :
102108 if len (sel_group ) == 0 :
103109 sel_group = "_" .join (fm .split ("_" )[:- 1 ])
104- fm_dict [sel_group ]= fm
110+ fm_dict [sel_group ] = fm
105111 continue
106112 m_group = "_" .join (fm .split ("_" )[:- 1 ])
107113 if m_group == sel_group :
108114 fm_dict [m_group ] = fm
109115 file_model = fm_dict
110-
116+
111117 with open (file_pred ) as f :
112118 first_line_pred = f .readline ().strip ()
113119 if file_cal :
@@ -118,53 +124,68 @@ def run(
118124 # Read input files
119125 df_pred = pd .read_csv (file_pred )
120126 if len (df_pred .columns ) < 2 :
121- df_pred = pd .read_csv (file_pred ,sep = " " )
127+ df_pred = pd .read_csv (file_pred , sep = " " )
122128 df_pred = df_pred .fillna ("" )
123129 file_pred = ""
124130
125131 list_of_psms = []
126- for seq ,mod ,ident in zip (df_pred ["seq" ],df_pred ["modifications" ],df_pred .index ):
127- list_of_psms .append (PSM (peptidoform = peprec_to_proforma (seq ,mod ),spectrum_id = ident ))
132+ for seq , mod , ident in zip (df_pred ["seq" ], df_pred ["modifications" ], df_pred .index ):
133+ list_of_psms .append (PSM (peptidoform = peprec_to_proforma (seq , mod ), spectrum_id = ident ))
128134 psm_list_pred = PSMList (psm_list = list_of_psms )
129135 df_pred = None
130136 else :
131137 psm_list_pred = read_file (file_pred )
132138 if "msms" in file_pred and ".txt" in file_pred :
133- mapper = pd .read_csv (os .path .join (os .path .dirname (os .path .realpath (__file__ )), "unimod/map_mq_file.csv" ),index_col = 0 )["value" ].to_dict ()
139+ mapper = pd .read_csv (
140+ os .path .join (
141+ os .path .dirname (os .path .realpath (__file__ )),
142+ "unimod/map_mq_file.csv" ,
143+ ),
144+ index_col = 0 ,
145+ )["value" ].to_dict ()
134146 psm_list_pred .rename_modifications (mapper )
135147
136148 # Allow for calibration file to be empty (undefined), fill in if/elif if present
137149 psm_list_cal = []
138- if "modifications" in first_line_cal .split ("," ) and "seq" in first_line_cal .split ("," ) and file_cal :
150+ if (
151+ "modifications" in first_line_cal .split ("," )
152+ and "seq" in first_line_cal .split ("," )
153+ and file_cal
154+ ):
139155 df_cal = pd .read_csv (file_cal )
140156 if len (df_cal .columns ) < 2 :
141- df_cal = pd .read_csv (df_cal ,sep = " " )
157+ df_cal = pd .read_csv (df_cal , sep = " " )
142158 df_cal = df_cal .fillna ("" )
143159 file_cal = ""
144160
145161 list_of_psms = []
146- for seq ,mod ,ident ,tr in zip (df_cal ["seq" ],df_cal ["modifications" ],df_cal .index ,df_cal ["tr" ]):
147- list_of_psms .append (PSM (peptidoform = peprec_to_proforma (seq ,mod ),spectrum_id = ident ,retention_time = tr ))
162+ for seq , mod , ident , tr in zip (
163+ df_cal ["seq" ], df_cal ["modifications" ], df_cal .index , df_cal ["tr" ]
164+ ):
165+ list_of_psms .append (
166+ PSM (
167+ peptidoform = peprec_to_proforma (seq , mod ),
168+ spectrum_id = ident ,
169+ retention_time = tr ,
170+ )
171+ )
148172 psm_list_cal = PSMList (psm_list = list_of_psms )
149173 df_cal = None
150174 elif file_cal :
151175 psm_list_cal = read_file (file_cal )
152176 if "msms" in file_cal and ".txt" in file_cal :
153- mapper = pd .read_csv (os .path .join (os .path .dirname (os .path .realpath (__file__ )), "unimod/map_mq_file.csv" ),index_col = 0 )["value" ].to_dict ()
177+ mapper = pd .read_csv (
178+ os .path .join (
179+ os .path .dirname (os .path .realpath (__file__ )),
180+ "unimod/map_mq_file.csv" ,
181+ ),
182+ index_col = 0 ,
183+ )["value" ].to_dict ()
154184 psm_list_cal .rename_modifications (mapper )
155- # Make a feature extraction object; you can skip this if you do not want to
156- # use the default settings for DeepLC. Here we want to use a model that does
157- # not use RDKit features so we skip the chemical descriptor making
158- # procedure.
159- f_extractor = FeatExtractor (
160- cnn_feats = True ,
161- verbose = verbose
162- )
163-
185+
164186 # Make the DeepLC object that will handle making predictions and calibration
165187 dlc = DeepLC (
166188 path_model = file_model ,
167- f_extractor = f_extractor ,
168189 cnn_model = True ,
169190 split_cal = split_cal ,
170191 dict_cal_divider = dict_divider ,
@@ -173,9 +194,9 @@ def run(
173194 batch_num = batch_num ,
174195 n_jobs = n_threads ,
175196 verbose = verbose ,
176- deeplc_retrain = transfer_learning
197+ deeplc_retrain = transfer_learning ,
177198 )
178-
199+
179200 # Calibrate the original model based on the new retention times
180201 if len (psm_list_cal ) > 0 :
181202 logger .info ("Selecting best model and calibrating predictions..." )
@@ -185,16 +206,21 @@ def run(
185206 # Make predictions; calibrated or uncalibrated
186207 logger .info ("Making predictions using model: %s" , dlc .model )
187208 if len (psm_list_cal ) > 0 :
188- preds = dlc .make_preds (seq_df = df_pred , infile = file_pred , psm_list = psm_list_pred )
209+ preds = dlc ._make_preds (seq_df = df_pred , infile = file_pred , psm_list = psm_list_pred )
189210 else :
190- preds = dlc .make_preds (seq_df = df_pred , infile = file_pred , psm_list = psm_list_pred , calibrate = False )
191-
192- #df_pred["predicted_tr"] = preds
211+ preds = dlc ._make_preds (
212+ seq_df = df_pred ,
213+ infile = file_pred ,
214+ psm_list = psm_list_pred ,
215+ calibrate = False ,
216+ )
217+
218+ # df_pred["predicted_tr"] = preds
193219 logger .info ("Writing predictions to file: %s" , file_pred_out )
194-
195- file_pred_out = open (file_pred_out ,"w" )
220+
221+ file_pred_out = open (file_pred_out , "w" )
196222 file_pred_out .write ("Sequence proforma,predicted retention time\n " )
197- for psm ,tr in zip (psm_list_pred ,preds ):
223+ for psm , tr in zip (psm_list_pred , preds ):
198224 file_pred_out .write (f"{ psm .peptidoform .proforma } ,{ tr } \n " )
199225 file_pred_out .close ()
200226
0 commit comments