11"""Main command line interface to DeepLC."""
22
3- __author__ = ["Robbin Bouwmeester" , "Ralf Gabriels" ]
4- __credits__ = [
5- "Robbin Bouwmeester" ,
6- "Ralf Gabriels" ,
7- "Prof. Lennart Martens" ,
8- "Sven Degroeve" ,
9- ]
10- __license__ = "Apache License, Version 2.0"
11- __maintainer__ = ["Robbin Bouwmeester" , "Ralf Gabriels" ]
12- 13-
143import logging
15- import os
164import sys
17- import warnings
18-
19- import pandas as pd
20- from psm_utils .io import read_file
21- from psm_utils .io .peptide_record import peprec_to_proforma
22- from psm_utils .psm import PSM
23- from psm_utils .psm_list import PSMList
245
25- from deeplc import DeepLC , __version__
26- from deeplc ._argument_parser import parse_arguments
27- from deeplc ._exceptions import DeepLCError
6+ LOGGER = logging .getLogger (__name__ )
287
29- logger = logging .getLogger (__name__ )
308
31-
32- def setup_logging (passed_level ):
9+ def _setup_logging (passed_level ):
3310 log_mapping = {
3411 "critical" : logging .CRITICAL ,
3512 "error" : logging .ERROR ,
@@ -51,181 +28,3 @@ def setup_logging(passed_level):
5128 datefmt = "%Y-%m-%d %H:%M:%S" ,
5229 level = log_mapping [passed_level .lower ()],
5330 )
54-
55-
56- def main (gui = False ):
57- """Main function for the CLI."""
58- argu = parse_arguments (gui = gui )
59-
60- setup_logging (argu .log_level )
61-
62- # Reset logging levels if DEBUG (see deeplc.py)
63- if argu .log_level .lower () == "debug" :
64- os .environ ["TF_CPP_MIN_LOG_LEVEL" ] = "0"
65- logging .getLogger ("tensorflow" ).setLevel (logging .DEBUG )
66- warnings .filterwarnings ("default" , category = DeprecationWarning )
67- warnings .filterwarnings ("default" , category = FutureWarning )
68- warnings .filterwarnings ("default" , category = UserWarning )
69- else :
70- os .environ ["KMP_WARNINGS" ] = "0"
71-
72- try :
73- run (** vars (argu ))
74- except DeepLCError as e :
75- logger .exception (e )
76- sys .exit (1 )
77-
78-
79- def run (
80- file_pred ,
81- file_cal = None ,
82- file_pred_out = None ,
83- file_model = None ,
84- pygam_calibration = True ,
85- split_cal = 50 ,
86- dict_divider = 50 ,
87- use_library = None ,
88- write_library = False ,
89- batch_num = 50000 ,
90- n_threads = None ,
91- transfer_learning = False ,
92- log_level = "info" ,
93- verbose = True ,
94- ):
95- """Run DeepLC."""
96- logger .info ("Using DeepLC version %s" , __version__ )
97- logger .debug ("Using %i CPU threads" , n_threads )
98-
99- df_pred = False
100- df_cal = False
101- first_line_pred = ""
102- first_line_cal = ""
103-
104- if not file_cal and file_model != None :
105- fm_dict = {}
106- sel_group = ""
107- for fm in file_model :
108- if len (sel_group ) == 0 :
109- sel_group = "_" .join (fm .split ("_" )[:- 1 ])
110- fm_dict [sel_group ] = fm
111- continue
112- m_group = "_" .join (fm .split ("_" )[:- 1 ])
113- if m_group == sel_group :
114- fm_dict [m_group ] = fm
115- file_model = fm_dict
116-
117- with open (file_pred ) as f :
118- first_line_pred = f .readline ().strip ()
119- if file_cal :
120- with open (file_cal ) as f :
121- first_line_cal = f .readline ().strip ()
122-
123- if "modifications" in first_line_pred .split ("," ) and "seq" in first_line_pred .split ("," ):
124- # Read input files
125- df_pred = pd .read_csv (file_pred )
126- if len (df_pred .columns ) < 2 :
127- df_pred = pd .read_csv (file_pred , sep = " " )
128- df_pred = df_pred .fillna ("" )
129- file_pred = ""
130-
131- list_of_psms = []
132- for seq , mod , ident in zip (df_pred ["seq" ], df_pred ["modifications" ], df_pred .index ):
133- list_of_psms .append (PSM (peptidoform = peprec_to_proforma (seq , mod ), spectrum_id = ident ))
134- psm_list_pred = PSMList (psm_list = list_of_psms )
135- df_pred = None
136- else :
137- psm_list_pred = read_file (file_pred )
138- if "msms" in file_pred and ".txt" in file_pred :
139- mapper = pd .read_csv (
140- os .path .join (
141- os .path .dirname (os .path .realpath (__file__ )),
142- "unimod/map_mq_file.csv" ,
143- ),
144- index_col = 0 ,
145- )["value" ].to_dict ()
146- psm_list_pred .rename_modifications (mapper )
147-
148- # Allow for calibration file to be empty (undefined), fill in if/elif if present
149- psm_list_cal = []
150- if (
151- "modifications" in first_line_cal .split ("," )
152- and "seq" in first_line_cal .split ("," )
153- and file_cal
154- ):
155- df_cal = pd .read_csv (file_cal )
156- if len (df_cal .columns ) < 2 :
157- df_cal = pd .read_csv (df_cal , sep = " " )
158- df_cal = df_cal .fillna ("" )
159- file_cal = ""
160-
161- list_of_psms = []
162- for seq , mod , ident , tr in zip (
163- df_cal ["seq" ], df_cal ["modifications" ], df_cal .index , df_cal ["tr" ]
164- ):
165- list_of_psms .append (
166- PSM (
167- peptidoform = peprec_to_proforma (seq , mod ),
168- spectrum_id = ident ,
169- retention_time = tr ,
170- )
171- )
172- psm_list_cal = PSMList (psm_list = list_of_psms )
173- df_cal = None
174- elif file_cal :
175- psm_list_cal = read_file (file_cal )
176- if "msms" in file_cal and ".txt" in file_cal :
177- mapper = pd .read_csv (
178- os .path .join (
179- os .path .dirname (os .path .realpath (__file__ )),
180- "unimod/map_mq_file.csv" ,
181- ),
182- index_col = 0 ,
183- )["value" ].to_dict ()
184- psm_list_cal .rename_modifications (mapper )
185-
186- # Make the DeepLC object that will handle making predictions and calibration
187- dlc = DeepLC (
188- path_model = file_model ,
189- cnn_model = True ,
190- split_cal = split_cal ,
191- dict_cal_divider = dict_divider ,
192- write_library = write_library ,
193- use_library = use_library ,
194- batch_num = batch_num ,
195- n_jobs = n_threads ,
196- verbose = verbose ,
197- deeplc_retrain = transfer_learning ,
198- )
199-
200- # Calibrate the original model based on the new retention times
201- if len (psm_list_cal ) > 0 :
202- logger .info ("Selecting best model and calibrating predictions..." )
203- logger .info ("Initiating transfer learning?" )
204- dlc .calibrate_preds (psm_list = psm_list_cal )
205-
206- # Make predictions; calibrated or uncalibrated
207- logger .info ("Making predictions using model: %s" , dlc .model )
208- if len (psm_list_cal ) > 0 :
209- preds = dlc ._make_preds (seq_df = df_pred , infile = file_pred , psm_list = psm_list_pred )
210- else :
211- preds = dlc ._make_preds (
212- seq_df = df_pred ,
213- infile = file_pred ,
214- psm_list = psm_list_pred ,
215- calibrate = False ,
216- )
217-
218- # df_pred["predicted_tr"] = preds
219- logger .info ("Writing predictions to file: %s" , file_pred_out )
220-
221- file_pred_out = open (file_pred_out , "w" )
222- file_pred_out .write ("Sequence proforma,predicted retention time\n " )
223- for psm , tr in zip (psm_list_pred , preds ):
224- file_pred_out .write (f"{ psm .peptidoform .proforma } ,{ tr } \n " )
225- file_pred_out .close ()
226-
227- logger .info ("DeepLC finished!" )
228-
229-
230- if __name__ == "__main__" :
231- main ()
0 commit comments