Skip to content

Commit d450e39

Browse files
committed
Ver 1.1.3
In this version minor bugs are fixed and every script is linted by black.
1 parent d062257 commit d450e39

File tree

11 files changed

+485
-199
lines changed

11 files changed

+485
-199
lines changed

ideeplc/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
"""iDeepLC: A deep Learning-based retention time predictor for unseen modified peptides with a novel encoding system"""
22

3-
__version__ = "1.1.2"
4-
3+
__version__ = "1.1.3"

ideeplc/__main__.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
from pathlib import Path
77

88
from ideeplc import __version__
9-
from ideeplc.ideeplc_core import main as run_ideeplc # Assumes main logic is exposed here
9+
from ideeplc.ideeplc_core import (
10+
main as run_ideeplc,
11+
) # Assumes main logic is exposed here
1012
from rich.console import Console
1113
from rich.logging import RichHandler
1214
from rich.text import Text
@@ -39,26 +41,49 @@ def _argument_parser() -> argparse.ArgumentParser:
3941
parser = argparse.ArgumentParser(
4042
description="iDeepLC: Deep learning-based retention time prediction",
4143
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=42),
42-
epilog=("Example usage: python -m ideeplc --input peptides.csv --save --finetune\n ")
44+
epilog=(
45+
"Example usage: python -m ideeplc --input peptides.csv --save --finetune\n "
46+
),
47+
)
48+
parser.add_argument(
49+
"-i",
50+
"--input",
51+
type=str,
52+
required=True,
53+
help="Path to the CSV file containing the peptide sequences.",
54+
)
55+
parser.add_argument(
56+
"-s", "--save", action="store_true", help="Flag to save results to disk."
57+
)
58+
parser.add_argument(
59+
"-f",
60+
"--finetune",
61+
action="store_true",
62+
help="Flag to enable fine-tuning of the model.",
63+
)
64+
parser.add_argument(
65+
"-l",
66+
"--log_level",
67+
type=str,
68+
default="info",
69+
choices=LOG_MAPPING.keys(),
70+
help="Logging level (default: info).",
71+
)
72+
parser.add_argument(
73+
"-c",
74+
"--calibrate",
75+
action="store_true",
76+
help="Flag to enable calibration of the model predictions.",
4377
)
44-
parser.add_argument("-i", "--input", type=str, required=True,
45-
help="Path to the CSV file containing the peptide sequences.")
46-
parser.add_argument("-s", "--save", action="store_true",
47-
help="Flag to save results to disk.")
48-
parser.add_argument("-f", "--finetune", action="store_true",
49-
help="Flag to enable fine-tuning of the model.")
50-
parser.add_argument("-l", "--log_level", type=str, default="info",
51-
choices=LOG_MAPPING.keys(),
52-
help="Logging level (default: info).")
53-
parser.add_argument("-c", "--calibrate", action="store_true",
54-
help="Flag to enable calibration of the model predictions.")
5578
return parser
5679

5780

5881
def _setup_logging(level: str, log_file: Path = None):
5982
"""Set up the logging configuration."""
6083
if level not in LOG_MAPPING:
61-
raise ValueError(f"Invalid log level '{level}'. Choose from {', '.join(LOG_MAPPING)}")
84+
raise ValueError(
85+
f"Invalid log level '{level}'. Choose from {', '.join(LOG_MAPPING)}"
86+
)
6287
handlers = [RichHandler(rich_tracebacks=True, console=CONSOLE, show_path=False)]
6388
if log_file:
6489
handlers.append(logging.FileHandler(log_file, mode="w", encoding="utf-8"))

ideeplc/calibrate.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
LOGGER = logging.getLogger(__name__)
99

10+
1011
class SplineTransformerCalibration:
1112
"""Spline Transformer Calibration for Retention Time Prediction."""
1213

@@ -19,7 +20,12 @@ def __init__(self):
1920
self._spline_model = None
2021
self._linear_model_right = None
2122

22-
def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: bool = False):
23+
def fit(
24+
self,
25+
measured_tr: np.ndarray,
26+
predicted_tr: np.ndarray,
27+
simplified: bool = False,
28+
):
2329
"""
2430
Fit a SplineTransformer model to the measured and predicted retention times.
2531
@@ -40,8 +46,12 @@ def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: boo
4046

4147
# Check if the lengths match
4248
if len(measured_tr) != len(predicted_tr):
43-
LOGGER.error("Measured and predicted retention times must have the same length.")
44-
raise ValueError("Measured and predicted retention times must have the same length.")
49+
LOGGER.error(
50+
"Measured and predicted retention times must have the same length."
51+
)
52+
raise ValueError(
53+
"Measured and predicted retention times must have the same length."
54+
)
4555

4656
# Fit a SplineTransformer model
4757
if simplified:
@@ -54,7 +64,9 @@ def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: boo
5464
linear_model_right = linear_model
5565
else:
5666
LOGGER.info("Using SplineTransformer with more knots for calibration.")
57-
spline = SplineTransformer(degree=4, n_knots=int(len(measured_tr) / 500) + 5)
67+
spline = SplineTransformer(
68+
degree=4, n_knots=int(len(measured_tr) / 500) + 5
69+
)
5870
spline_model = make_pipeline(spline, LinearRegression())
5971
spline_model.fit(predicted_tr.reshape(-1, 1), measured_tr)
6072

@@ -83,7 +95,6 @@ def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: boo
8395
self._fit = True
8496
LOGGER.info("Calibration fitting completed successfully.")
8597

86-
8798
def transform(self, tr: np.ndarray) -> np.ndarray:
8899
"""
89100
Transform the predicted retention times using the fitted SplineTransformer model.
@@ -99,13 +110,17 @@ def transform(self, tr: np.ndarray) -> np.ndarray:
99110
The calibrated retention times.
100111
"""
101112
if not self._fit:
102-
LOGGER.error("Calibration model has not been fitted yet. Call fit() before transform().")
103-
raise RuntimeError("Calibration model has not been fitted yet. Call fit() before transform().")
113+
LOGGER.error(
114+
"Calibration model has not been fitted yet. Call fit() before transform()."
115+
)
116+
raise RuntimeError(
117+
"Calibration model has not been fitted yet. Call fit() before transform()."
118+
)
104119

105120
# if tr.shape[0] == 0:
106121
# return np.array([])
107122
tr_array = np.array(tr)
108-
tr = tr_array.reshape(-1,1)
123+
tr = tr_array.reshape(-1, 1)
109124

110125
# Get spline predictions and linear extrapolation predictions
111126
y_pred_spline = self._spline_model.predict(tr)
@@ -120,10 +135,10 @@ def transform(self, tr: np.ndarray) -> np.ndarray:
120135
cal_preds = np.copy(y_pred_spline)
121136
cal_preds[~within_range & (tr.ravel() < self._calibrate_min)] = y_pred_left[
122137
~within_range & (tr.ravel() < self._calibrate_min)
123-
]
138+
]
124139
cal_preds[~within_range & (tr.ravel() > self._calibrate_max)] = y_pred_right[
125140
~within_range & (tr.ravel() > self._calibrate_max)
126-
]
141+
]
127142

128143
LOGGER.info("Calibration transformation completed successfully.")
129-
return np.array(cal_preds)
144+
return np.array(cal_preds)

ideeplc/config.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,27 @@
1-
def get_config(lr=1e-3, epoch=10, batch=256, kernel=5, kernel2=3, kernel3=9, kernel4=7, cnn_channels=245,
2-
cnn2_channels=41, cnn3_channels=35, cnn4_channels=50, cnn_layers=1, cnn2_layers=0, cnn3_layers=5,
3-
cnn4_layers=3, fc_layers=2, fc_output=78, fc2_layers=1, fc2_output=77, drop=0.23, clip=0.25, layers_to_freeze=None):
1+
def get_config(
2+
lr=1e-3,
3+
epoch=10,
4+
batch=256,
5+
kernel=5,
6+
kernel2=3,
7+
kernel3=9,
8+
kernel4=7,
9+
cnn_channels=245,
10+
cnn2_channels=41,
11+
cnn3_channels=35,
12+
cnn4_channels=50,
13+
cnn_layers=1,
14+
cnn2_layers=0,
15+
cnn3_layers=5,
16+
cnn4_layers=3,
17+
fc_layers=2,
18+
fc_output=78,
19+
fc2_layers=1,
20+
fc2_output=77,
21+
drop=0.23,
22+
clip=0.25,
23+
layers_to_freeze=None,
24+
):
425
"""
526
Initialize the configuration for the model hyperparameters
627
@@ -29,11 +50,29 @@ def get_config(lr=1e-3, epoch=10, batch=256, kernel=5, kernel2=3, kernel3=9, ker
2950
:return: configuration dictionary
3051
"""
3152

32-
config = {"learning_rate": lr, "epochs": epoch, "batch_size": batch, "kernel_size": kernel, "kernel2_size": kernel2,
33-
"kernel3_size": kernel3, "kernel4_size": kernel4, "fc_out": fc_output, "fc_layers": fc_layers,
34-
"fc2_out": fc2_output, "fc2_layers": fc2_layers, "cnn_layers": cnn_layers, "cnn_channels": cnn_channels,
35-
"cnn2_layers": cnn2_layers, "cnn2_channels": cnn2_channels, "cnn3_layers": cnn3_layers,
36-
"cnn3_channels": cnn3_channels, "cnn4_layers": cnn4_layers, "cnn4_channels": cnn4_channels,
37-
"clipping_size": clip, "dropout": drop, "layers_to_freeze": layers_to_freeze}
53+
config = {
54+
"learning_rate": lr,
55+
"epochs": epoch,
56+
"batch_size": batch,
57+
"kernel_size": kernel,
58+
"kernel2_size": kernel2,
59+
"kernel3_size": kernel3,
60+
"kernel4_size": kernel4,
61+
"fc_out": fc_output,
62+
"fc_layers": fc_layers,
63+
"fc2_out": fc2_output,
64+
"fc2_layers": fc2_layers,
65+
"cnn_layers": cnn_layers,
66+
"cnn_channels": cnn_channels,
67+
"cnn2_layers": cnn2_layers,
68+
"cnn2_channels": cnn2_channels,
69+
"cnn3_layers": cnn3_layers,
70+
"cnn3_channels": cnn3_channels,
71+
"cnn4_layers": cnn4_layers,
72+
"cnn4_channels": cnn4_channels,
73+
"clipping_size": clip,
74+
"dropout": drop,
75+
"layers_to_freeze": layers_to_freeze,
76+
}
3877

3978
return config

ideeplc/data_initialize.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ideeplc.utilities import df_to_matrix, reform_seq
88

99
LOGGER = logging.getLogger(__name__)
10+
11+
1012
# Making the pytorch dataset
1113
class MyDataset(Dataset):
1214
def __init__(self, sequences: np.ndarray, retention: np.ndarray) -> None:
@@ -21,8 +23,7 @@ def __getitem__(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:
2123

2224

2325
def data_initialize(
24-
csv_path: str,
25-
**kwargs
26+
csv_path: str, **kwargs
2627
) -> Union[Tuple[MyDataset, np.ndarray], Tuple[MyDataset, np.ndarray]]:
2728
"""
2829
Initialize peptides matrices based on a CSV file containing raw peptide sequences.
@@ -45,17 +46,21 @@ def data_initialize(
4546
LOGGER.error(f"Error reading {csv_path}: {e}")
4647
raise
4748

48-
if 'seq' not in df.columns:
49+
if "seq" not in df.columns:
4950
LOGGER.error(f"CSV file must contain a 'seq' column with peptide sequences.")
5051
raise ValueError("Missing 'seq' column in the CSV file.")
51-
if 'modifications' not in df.columns:
52-
LOGGER.error("CSV file must contain a 'modifications' column with peptide modifications.")
52+
if "modifications" not in df.columns:
53+
LOGGER.error(
54+
"CSV file must contain a 'modifications' column with peptide modifications."
55+
)
5356
raise ValueError("Missing 'modifications' column in the CSV file.")
5457

5558
reformed_peptides = [
56-
reform_seq(seq, mod) for seq, mod in zip(df['seq'], df['modifications'])
59+
reform_seq(seq, mod) for seq, mod in zip(df["seq"], df["modifications"])
5760
]
58-
LOGGER.info(f"Loaded and reformed {len(reformed_peptides)} peptides sequences from the file.")
61+
LOGGER.info(
62+
f"Loaded and reformed {len(reformed_peptides)} peptides sequences from the file."
63+
)
5964
try:
6065
# Convert sequences to matrix format
6166
sequences, tr, errors = df_to_matrix(reformed_peptides, df)

ideeplc/figure.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,15 @@
88

99
LOGGER = logging.getLogger(__name__)
1010

11-
def make_figures(predictions: list, ground_truth: list, input_file: str, calibrated: bool = False, finetuned: bool = False, save_results: bool = True,
1211

13-
):
12+
def make_figures(
13+
predictions: list,
14+
ground_truth: list,
15+
input_file: str,
16+
calibrated: bool = False,
17+
finetuned: bool = False,
18+
save_results: bool = True,
19+
):
1420
"""
1521
Create and save scatter plot of predicted vs observed retention times.
1622
@@ -24,19 +30,33 @@ def make_figures(predictions: list, ground_truth: list, input_file: str, calibra
2430
"""
2531
try:
2632
mae_predictions = mean_absolute_error(ground_truth, predictions)
27-
max_value = max(max(ground_truth), max(predictions)) * 1.05 # Extend the max value by 5% for better visualization
33+
max_value = (
34+
max(max(ground_truth), max(predictions)) * 1.05
35+
) # Extend the max value by 5% for better visualization
2836

2937
fig, ax = plt.subplots(figsize=(7, 7))
30-
ax.scatter(ground_truth, predictions, c="b",
31-
label=f"MAE: {mae_predictions:.3f}, R: {np.corrcoef(ground_truth, predictions)[0, 1]:.3f}", s=3)
38+
ax.scatter(
39+
ground_truth,
40+
predictions,
41+
c="b",
42+
label=f"MAE: {mae_predictions:.3f}, R: {np.corrcoef(ground_truth, predictions)[0, 1]:.3f}",
43+
s=3,
44+
)
3245
plt.legend(loc="upper left")
3346
plt.xlabel("Observed Retention Time")
3447
plt.ylabel("Predicted Retention Time")
3548

3649
timestamp = datetime.datetime.now().strftime("%Y%m%d")
3750
input_file_name = os.path.splitext(os.path.basename(input_file))[0]
38-
status = "finetuned" if finetuned else ("calibrated" if calibrated else "not_calibrated")
39-
output_path = Path("ideeplc_output") / f"{input_file_name}_predictions_{timestamp}{status}.png"
51+
status = (
52+
"finetuned"
53+
if finetuned
54+
else ("calibrated" if calibrated else "not_calibrated")
55+
)
56+
output_path = (
57+
Path("ideeplc_output")
58+
/ f"{input_file_name}_predictions_{timestamp}{status}.png"
59+
)
4060
plt.title(f"scatterplot({status})\n")
4161
plt.axis("scaled")
4262
ax.plot([0, max_value], [0, max_value], ls="--", c=".5")

ideeplc/fine_tuning.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,19 @@ class iDeepLCFineTuner:
1212
A class to fine-tune the iDeepLC model on a new dataset.
1313
"""
1414

15-
def __init__(self, model, train_data, loss_function, device="cpu", learning_rate=0.001, epochs=10, batch_size=256,
16-
validation_data=None, validation_split=0.1, patience=5):
15+
def __init__(
16+
self,
17+
model,
18+
train_data,
19+
loss_function,
20+
device="cpu",
21+
learning_rate=0.001,
22+
epochs=10,
23+
batch_size=256,
24+
validation_data=None,
25+
validation_split=0.1,
26+
patience=5,
27+
):
1728
"""
1829
Initialize the fine-tuner with the model and data loaders.
1930
@@ -82,13 +93,15 @@ def fine_tune(self, layers_to_freeze=None):
8293
# Split the training data into training and validation sets
8394
train_size = int((1 - self.validation_split) * len(self.train_data))
8495
val_size = len(self.train_data) - train_size
85-
train_dataset, val_dataset = torch.utils.data.random_split(self.train_data, [train_size, val_size])
96+
train_dataset, val_dataset = torch.utils.data.random_split(
97+
self.train_data, [train_size, val_size]
98+
)
8699
dataloader_train = self.prepare_data(train_dataset)
87100
dataloader_val = self.prepare_data(val_dataset, shuffle=False)
88101
LOGGER.info(f"Training on {len(dataloader_train.dataset)} samples.")
89102

90103
best_model = copy.deepcopy(self.model)
91-
best_loss = float('inf')
104+
best_loss = float("inf")
92105
patience_counter = 0
93106

94107
for epoch in range(self.epochs):
@@ -114,7 +127,9 @@ def fine_tune(self, layers_to_freeze=None):
114127

115128
# Validate the model after each epoch
116129
if dataloader_val:
117-
val_loss, _, _, _ = validate(self.model, dataloader_val, loss_fn, self.device)
130+
val_loss, _, _, _ = validate(
131+
self.model, dataloader_val, loss_fn, self.device
132+
)
118133
if val_loss < best_loss:
119134
best_loss = val_loss
120135
best_model = copy.deepcopy(self.model)
@@ -129,5 +144,3 @@ def fine_tune(self, layers_to_freeze=None):
129144

130145
LOGGER.info("Fine-tuning complete.")
131146
return best_model
132-
133-

0 commit comments

Comments
 (0)