Skip to content

Commit efb18bc

Browse files
authored
Merge pull request #89 from compomics/cleanup
Cleanup and preliminary refactoring of feature extractor
2 parents a373dc9 + cc965d6 commit efb18bc

18 files changed

+1120
-1732
lines changed

deeplc/__init__.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
1-
import sys
1+
__all__ = ["DeepLC"]
22

3+
from importlib.metadata import version
34

4-
if sys.version_info >= (3,8):
5-
from importlib.metadata import version
6-
__version__ = version('deeplc')
7-
else:
8-
import pkg_resources
9-
__version__ = pkg_resources.require("deeplc")[0].version
5+
__version__ = version("deeplc")
106

117

128
from deeplc.deeplc import DeepLC
13-
from deeplc.feat_extractor import FeatExtractor
14-

deeplc/__main__.py

Lines changed: 74 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
"""Main command line interface to DeepLC."""
22

33
__author__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
4-
__credits__ = ["Robbin Bouwmeester", "Ralf Gabriels", "Prof. Lennart Martens", "Sven Degroeve"]
4+
__credits__ = [
5+
"Robbin Bouwmeester",
6+
"Ralf Gabriels",
7+
"Prof. Lennart Martens",
8+
"Sven Degroeve",
9+
]
510
__license__ = "Apache License, Version 2.0"
611
__maintainer__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
712
@@ -12,12 +17,12 @@
1217
import warnings
1318

1419
import pandas as pd
20+
from psm_utils.io import read_file
1521
from psm_utils.io.peptide_record import peprec_to_proforma
1622
from psm_utils.psm import PSM
1723
from psm_utils.psm_list import PSMList
18-
from psm_utils.io import read_file
1924

20-
from deeplc import __version__, DeepLC, FeatExtractor
25+
from deeplc import DeepLC, __version__
2126
from deeplc._argument_parser import parse_arguments
2227
from deeplc._exceptions import DeepLCError
2328

@@ -26,27 +31,28 @@
2631

2732
def setup_logging(passed_level):
2833
log_mapping = {
29-
'critical': logging.CRITICAL,
30-
'error': logging.ERROR,
31-
'warning': logging.WARNING,
32-
'info': logging.INFO,
33-
'debug': logging.DEBUG,
34+
"critical": logging.CRITICAL,
35+
"error": logging.ERROR,
36+
"warning": logging.WARNING,
37+
"info": logging.INFO,
38+
"debug": logging.DEBUG,
3439
}
3540

3641
if passed_level.lower() not in log_mapping:
3742
print(
3843
"Invalid log level. Should be one of the following: ",
39-
', '.join(log_mapping.keys())
44+
", ".join(log_mapping.keys()),
4045
)
4146
exit(1)
4247

4348
logging.basicConfig(
4449
stream=sys.stdout,
45-
format='%(asctime)s - %(levelname)s - %(message)s',
46-
datefmt='%Y-%m-%d %H:%M:%S',
47-
level=log_mapping[passed_level.lower()]
50+
format="%(asctime)s - %(levelname)s - %(message)s",
51+
datefmt="%Y-%m-%d %H:%M:%S",
52+
level=log_mapping[passed_level.lower()],
4853
)
4954

55+
5056
def main(gui=False):
5157
"""Main function for the CLI."""
5258
argu = parse_arguments(gui=gui)
@@ -55,13 +61,13 @@ def main(gui=False):
5561

5662
# Reset logging levels if DEBUG (see deeplc.py)
5763
if argu.log_level.lower() == "debug":
58-
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
59-
logging.getLogger('tensorflow').setLevel(logging.DEBUG)
60-
warnings.filterwarnings('default', category=DeprecationWarning)
61-
warnings.filterwarnings('default', category=FutureWarning)
62-
warnings.filterwarnings('default', category=UserWarning)
64+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"
65+
logging.getLogger("tensorflow").setLevel(logging.DEBUG)
66+
warnings.filterwarnings("default", category=DeprecationWarning)
67+
warnings.filterwarnings("default", category=FutureWarning)
68+
warnings.filterwarnings("default", category=UserWarning)
6369
else:
64-
os.environ['KMP_WARNINGS'] = '0'
70+
os.environ["KMP_WARNINGS"] = "0"
6571

6672
try:
6773
run(**vars(argu))
@@ -101,13 +107,13 @@ def run(
101107
for fm in file_model:
102108
if len(sel_group) == 0:
103109
sel_group = "_".join(fm.split("_")[:-1])
104-
fm_dict[sel_group]= fm
110+
fm_dict[sel_group] = fm
105111
continue
106112
m_group = "_".join(fm.split("_")[:-1])
107113
if m_group == sel_group:
108114
fm_dict[m_group] = fm
109115
file_model = fm_dict
110-
116+
111117
with open(file_pred) as f:
112118
first_line_pred = f.readline().strip()
113119
if file_cal:
@@ -118,53 +124,68 @@ def run(
118124
# Read input files
119125
df_pred = pd.read_csv(file_pred)
120126
if len(df_pred.columns) < 2:
121-
df_pred = pd.read_csv(file_pred,sep=" ")
127+
df_pred = pd.read_csv(file_pred, sep=" ")
122128
df_pred = df_pred.fillna("")
123129
file_pred = ""
124130

125131
list_of_psms = []
126-
for seq,mod,ident in zip(df_pred["seq"],df_pred["modifications"],df_pred.index):
127-
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident))
132+
for seq, mod, ident in zip(df_pred["seq"], df_pred["modifications"], df_pred.index):
133+
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq, mod), spectrum_id=ident))
128134
psm_list_pred = PSMList(psm_list=list_of_psms)
129135
df_pred = None
130136
else:
131137
psm_list_pred = read_file(file_pred)
132138
if "msms" in file_pred and ".txt" in file_pred:
133-
mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
139+
mapper = pd.read_csv(
140+
os.path.join(
141+
os.path.dirname(os.path.realpath(__file__)),
142+
"unimod/map_mq_file.csv",
143+
),
144+
index_col=0,
145+
)["value"].to_dict()
134146
psm_list_pred.rename_modifications(mapper)
135147

136148
# Allow for calibration file to be empty (undefined), fill in if/elif if present
137149
psm_list_cal = []
138-
if "modifications" in first_line_cal.split(",") and "seq" in first_line_cal.split(",") and file_cal:
150+
if (
151+
"modifications" in first_line_cal.split(",")
152+
and "seq" in first_line_cal.split(",")
153+
and file_cal
154+
):
139155
df_cal = pd.read_csv(file_cal)
140156
if len(df_cal.columns) < 2:
141-
df_cal = pd.read_csv(df_cal,sep=" ")
157+
df_cal = pd.read_csv(df_cal, sep=" ")
142158
df_cal = df_cal.fillna("")
143159
file_cal = ""
144160

145161
list_of_psms = []
146-
for seq,mod,ident,tr in zip(df_cal["seq"],df_cal["modifications"],df_cal.index,df_cal["tr"]):
147-
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident,retention_time=tr))
162+
for seq, mod, ident, tr in zip(
163+
df_cal["seq"], df_cal["modifications"], df_cal.index, df_cal["tr"]
164+
):
165+
list_of_psms.append(
166+
PSM(
167+
peptidoform=peprec_to_proforma(seq, mod),
168+
spectrum_id=ident,
169+
retention_time=tr,
170+
)
171+
)
148172
psm_list_cal = PSMList(psm_list=list_of_psms)
149173
df_cal = None
150174
elif file_cal:
151175
psm_list_cal = read_file(file_cal)
152176
if "msms" in file_cal and ".txt" in file_cal:
153-
mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
177+
mapper = pd.read_csv(
178+
os.path.join(
179+
os.path.dirname(os.path.realpath(__file__)),
180+
"unimod/map_mq_file.csv",
181+
),
182+
index_col=0,
183+
)["value"].to_dict()
154184
psm_list_cal.rename_modifications(mapper)
155-
# Make a feature extraction object; you can skip this if you do not want to
156-
# use the default settings for DeepLC. Here we want to use a model that does
157-
# not use RDKit features so we skip the chemical descriptor making
158-
# procedure.
159-
f_extractor = FeatExtractor(
160-
cnn_feats=True,
161-
verbose=verbose
162-
)
163-
185+
164186
# Make the DeepLC object that will handle making predictions and calibration
165187
dlc = DeepLC(
166188
path_model=file_model,
167-
f_extractor=f_extractor,
168189
cnn_model=True,
169190
split_cal=split_cal,
170191
dict_cal_divider=dict_divider,
@@ -173,9 +194,9 @@ def run(
173194
batch_num=batch_num,
174195
n_jobs=n_threads,
175196
verbose=verbose,
176-
deeplc_retrain=transfer_learning
197+
deeplc_retrain=transfer_learning,
177198
)
178-
199+
179200
# Calibrate the original model based on the new retention times
180201
if len(psm_list_cal) > 0:
181202
logger.info("Selecting best model and calibrating predictions...")
@@ -185,16 +206,21 @@ def run(
185206
# Make predictions; calibrated or uncalibrated
186207
logger.info("Making predictions using model: %s", dlc.model)
187208
if len(psm_list_cal) > 0:
188-
preds = dlc.make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred)
209+
preds = dlc._make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred)
189210
else:
190-
preds = dlc.make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred, calibrate=False)
191-
192-
#df_pred["predicted_tr"] = preds
211+
preds = dlc._make_preds(
212+
seq_df=df_pred,
213+
infile=file_pred,
214+
psm_list=psm_list_pred,
215+
calibrate=False,
216+
)
217+
218+
# df_pred["predicted_tr"] = preds
193219
logger.info("Writing predictions to file: %s", file_pred_out)
194-
195-
file_pred_out = open(file_pred_out,"w")
220+
221+
file_pred_out = open(file_pred_out, "w")
196222
file_pred_out.write("Sequence proforma,predicted retention time\n")
197-
for psm,tr in zip(psm_list_pred,preds):
223+
for psm, tr in zip(psm_list_pred, preds):
198224
file_pred_out.write(f"{psm.peptidoform.proforma},{tr}\n")
199225
file_pred_out.close()
200226

deeplc/_argument_parser.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,13 @@ def parse_arguments(gui=False):
7474

7575
parser = ArgumentParser(
7676
prog="DeepLC",
77-
description=(
78-
"Retention time prediction for (modified) peptides using deep " "learning."
79-
),
77+
description=("Retention time prediction for (modified) peptides using deep learning."),
8078
usage="deeplc [OPTIONS] --file_pred <peptide_file>",
8179
formatter_class=lambda prog: HelpFormatter(prog, max_help_position=42),
8280
add_help=False,
8381
)
8482

85-
io_args = parser.add_argument_group(
86-
"Input and output files", **gooey_args["io_args"]
87-
)
83+
io_args = parser.add_argument_group("Input and output files", **gooey_args["io_args"])
8884
io_args.add_argument(
8985
"--file_pred",
9086
required=True,
@@ -97,9 +93,7 @@ def parse_arguments(gui=False):
9793
type=str,
9894
default=None,
9995
metavar="Input peptides for calibration" if gui else "",
100-
help=(
101-
"path to peptide CSV file with retention times to use for " "calibration"
102-
),
96+
help=("path to peptide CSV file with retention times to use for calibration"),
10397
**gooey_args["file_cal"],
10498
)
10599
io_args.add_argument(
@@ -166,10 +160,7 @@ def parse_arguments(gui=False):
166160
dest="split_cal",
167161
default=50,
168162
metavar="split cal" if gui else "",
169-
help=(
170-
"number of splits in the chromatogram for piecewise linear "
171-
"calibration fit"
172-
),
163+
help=("number of splits in the chromatogram for piecewise linear calibration fit"),
173164
**gooey_args["split_cal"],
174165
)
175166
model_cal_args.add_argument(
@@ -265,8 +256,6 @@ def parse_arguments(gui=False):
265256
results = parser.parse_args()
266257

267258
if not results.file_pred_out:
268-
results.file_pred_out = (
269-
os.path.splitext(results.file_pred)[0] + "_deeplc_predictions.csv"
270-
)
259+
results.file_pred_out = os.path.splitext(results.file_pred)[0] + "_deeplc_predictions.csv"
271260

272261
return results

deeplc/_exceptions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""DeepLC exceptions."""
22

3+
34
class DeepLCError(Exception):
45
pass
56

0 commit comments

Comments
 (0)