Skip to content

Commit 9b21acc

Browse files
committed
Fix running without calibration
1 parent 445538f commit 9b21acc

File tree

4 files changed

+71
-46
lines changed

4 files changed

+71
-46
lines changed

im2deep/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""IM2Deep: Deep learning framework for peptide collisional cross section prediction."""
22

3-
__version__ = "0.1.5"
3+
__version__ = "0.1.6"

im2deep/__main__.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -104,18 +104,17 @@ def setup_logging(passed_level):
104104
@click.option(
105105
"--use_single_model",
106106
type=click.BOOL,
107-
default=False,
107+
default=True,
108108
help="Use a single model for prediction.",
109109
)
110-
111110
def main(
112111
psm_file: str,
113112
calibration_file: Optional[str] = None,
114113
output_file: Optional[str] = None,
115114
model_name: Optional[str] = "tims",
116115
log_level: Optional[str] = "info",
117116
n_jobs: Optional[int] = None,
118-
use_single_model: Optional[bool] = False,
117+
use_single_model: Optional[bool] = True,
119118
calibrate_per_charge: Optional[bool] = True,
120119
use_charge_state: Optional[int] = 2,
121120
):
@@ -156,31 +155,38 @@ def main(
156155
and "modifications" in first_line_cal.split(",")
157156
and "seq" in first_line_cal.split(",")
158157
):
159-
df_cal = pd.read_csv(calibration_file)
160-
df_cal.fillna("", inplace=True)
161-
del calibration_file
162-
163-
list_of_cal_psms = []
164-
for seq, mod, charge, ident, CCS in zip(
165-
df_cal["seq"],
166-
df_cal["modifications"],
167-
df_cal["charge"],
168-
df_cal.index,
169-
df_cal["CCS"],
170-
):
171-
list_of_cal_psms.append(
172-
PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident)
158+
try:
159+
df_cal = pd.read_csv(calibration_file)
160+
df_cal.fillna("", inplace=True)
161+
del calibration_file
162+
163+
list_of_cal_psms = []
164+
for seq, mod, charge, ident, CCS in zip(
165+
df_cal["seq"],
166+
df_cal["modifications"],
167+
df_cal["charge"],
168+
df_cal.index,
169+
df_cal["CCS"],
170+
):
171+
list_of_cal_psms.append(
172+
PSM(peptidoform=peprec_to_proforma(seq, mod, charge), spectrum_id=ident)
173+
)
174+
psm_list_cal = PSMList(psm_list=list_of_cal_psms)
175+
psm_list_cal_df = psm_list_cal.to_dataframe()
176+
psm_list_cal_df["ccs_observed"] = df_cal["CCS"]
177+
del df_cal
178+
179+
except IOError:
180+
LOGGER.error(
181+
"Invalid calibration file. Please check the format of the calibration file."
173182
)
174-
psm_list_cal = PSMList(psm_list=list_of_cal_psms)
175-
psm_list_cal_df = psm_list_cal.to_dataframe()
176-
psm_list_cal_df["ccs_observed"] = df_cal["CCS"]
177-
del df_cal
183+
sys.exit(1)
178184

179185
else:
180-
LOGGER.error(
181-
"Invalid calibration file. Please check the format of the calibration file."
186+
LOGGER.warning(
187+
"No calibration file found. Proceeding without calibration. Calibration is HIGHLY recommended for accurate CCS prediction."
182188
)
183-
sys.exit(1)
189+
psm_list_cal_df = None
184190

185191
if not output_file:
186192
output_file = Path(psm_file).parent / (Path(psm_file).stem + "_IM2Deep-predictions.csv")

im2deep/calibrate.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
LOGGER = logging.getLogger(__name__)
99

10+
1011
def im2ccs(reverse_im, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
1112
"""
1213
Convert ion mobility to collisional cross section.
@@ -82,7 +83,9 @@ def get_ccs_shift(
8283
)
8384
LOGGER.debug(
8485
"""Calculating CCS shift based on {} overlapping peptide-charge pairs
85-
between PSMs and reference dataset""".format(both.shape[0])
86+
between PSMs and reference dataset""".format(
87+
both.shape[0]
88+
)
8689
)
8790

8891
# How much CCS in calibration data is larger than reference CCS, so predictions
@@ -156,7 +159,7 @@ def calculate_ccs_shift(
156159
CCS shift factor.
157160
158161
"""
159-
cal_df['charge'] = cal_df['peptidoform'].apply(lambda x: x.precursor_charge)
162+
cal_df["charge"] = cal_df["peptidoform"].apply(lambda x: x.precursor_charge)
160163
cal_df = cal_df[cal_df["charge"] < 7] # predictions do not go higher for IM2Deep
161164

162165
if not per_charge:
@@ -204,27 +207,36 @@ def linear_calibration(
204207
205208
"""
206209
LOGGER.info("Calibrating CCS values using linear calibration...")
207-
preds_df["charge"] = preds_df["peptidoform"].apply(lambda x: x.precursor_charge)
208210
if per_charge:
209211
general_shift = calculate_ccs_shift(
210-
calibration_dataset, reference_dataset, per_charge=False, use_charge_state=use_charge_state
212+
calibration_dataset,
213+
reference_dataset,
214+
per_charge=False,
215+
use_charge_state=use_charge_state,
211216
)
212217
shift_factor_dict = calculate_ccs_shift(
213218
calibration_dataset, reference_dataset, per_charge=True
214219
)
215220
for charge in preds_df["charge"].unique():
216221
if charge not in shift_factor_dict:
217-
LOGGER.info("No overlapping precursors for charge state {}. Using overall shift factor for precursors with that charge.".format(charge))
222+
LOGGER.info(
223+
"No overlapping precursors for charge state {}. Using overall shift factor for precursors with that charge.".format(
224+
charge
225+
)
226+
)
218227
shift_factor_dict[charge] = general_shift
219228
LOGGER.info("Shift factors per charge: {}".format(shift_factor_dict))
220-
preds_df["predicted_ccs_calibrated"] = preds_df.apply(
229+
preds_df["predicted_ccs"] = preds_df.apply(
221230
lambda x: x["predicted_ccs"] + shift_factor_dict[x["charge"]], axis=1
222231
)
223232
else:
224233
shift_factor = calculate_ccs_shift(
225-
calibration_dataset, reference_dataset, per_charge=False, use_charge_state=use_charge_state
234+
calibration_dataset,
235+
reference_dataset,
236+
per_charge=False,
237+
use_charge_state=use_charge_state,
226238
)
227-
preds_df["predicted_ccs_calibrated"] = preds_df.apply(
239+
preds_df["predicted_ccs"] = preds_df.apply(
228240
lambda x: x["predicted_ccs"] + shift_factor, axis=1
229241
)
230242
LOGGER.info("CCS values calibrated.")

im2deep/im2deep.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def predict_ccs(
2020
model_name="tims",
2121
calibrate_per_charge=True,
2222
use_charge_state=2,
23-
use_single_model=False,
23+
use_single_model=True,
2424
n_jobs=None,
2525
write_output=True,
2626
):
@@ -33,33 +33,40 @@ def predict_ccs(
3333

3434
path_model_list = list(path_model.glob("*.hdf5"))
3535
if use_single_model:
36-
path_model_list = [path_model_list[0]]
36+
path_model_list = [path_model_list[1]]
3737

3838
dlc = DeepLC(path_model=path_model_list, n_jobs=n_jobs, predict_ccs=True)
3939
LOGGER.info("Predicting CCS values...")
4040
preds = dlc.make_preds(psm_list=psm_list_pred, calibrate=False)
4141
LOGGER.info("CCS values predicted.")
4242
psm_list_pred_df = psm_list_pred.to_dataframe()
4343
psm_list_pred_df["predicted_ccs"] = preds
44-
45-
calibrated_psm_list_pred_df = linear_calibration(
46-
psm_list_pred_df,
47-
calibration_dataset=psm_list_cal_df,
48-
reference_dataset=reference_dataset,
49-
per_charge=calibrate_per_charge,
50-
use_charge_state=use_charge_state,
44+
psm_list_pred_df["charge"] = psm_list_pred_df["peptidoform"].apply(
45+
lambda x: x.precursor_charge
5146
)
47+
48+
if psm_list_cal_df is not None:
49+
psm_list_pred_df = linear_calibration(
50+
psm_list_pred_df,
51+
calibration_dataset=psm_list_cal_df,
52+
reference_dataset=reference_dataset,
53+
per_charge=calibrate_per_charge,
54+
use_charge_state=use_charge_state,
55+
)
56+
57+
LOGGER.debug(psm_list_pred_df)
5258
if write_output:
5359
LOGGER.info("Writing output file...")
5460
output_file = open(output_file, "w")
5561
output_file.write("seq,modifications,charge,predicted CCS\n")
5662
for peptidoform, charge, CCS in zip(
57-
calibrated_psm_list_pred_df["peptidoform"],
58-
calibrated_psm_list_pred_df["charge"],
59-
calibrated_psm_list_pred_df["predicted_ccs_calibrated"],
63+
psm_list_pred_df["peptidoform"],
64+
psm_list_pred_df["charge"],
65+
psm_list_pred_df["predicted_ccs"],
6066
):
6167
output_file.write(f"{peptidoform},{charge},{CCS}\n")
6268
output_file.close()
6369

6470
LOGGER.info("IM2Deep finished!")
65-
return calibrated_psm_list_pred_df["predicted_ccs_calibrated"]
71+
72+
return psm_list_pred_df["predicted_ccs"]

0 commit comments

Comments
 (0)