CompOmics
diff --git a/‎ideeplc/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎ideeplc/__init__.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎ideeplc/__main__.py‎
Lines changed: 39 additions & 14 deletions b/‎ideeplc/__main__.py‎
Lines changed: 39 additions & 14 deletions
diff --git a/‎ideeplc/calibrate.py‎
Lines changed: 26 additions & 11 deletions b/‎ideeplc/calibrate.py‎
Lines changed: 26 additions & 11 deletions
diff --git a/‎ideeplc/config.py‎
Lines changed: 48 additions & 9 deletions b/‎ideeplc/config.py‎
Lines changed: 48 additions & 9 deletions
diff --git a/‎ideeplc/data_initialize.py‎
Lines changed: 12 additions & 7 deletions b/‎ideeplc/data_initialize.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎ideeplc/figure.py‎
Lines changed: 27 additions & 7 deletions b/‎ideeplc/figure.py‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎ideeplc/fine_tuning.py‎
Lines changed: 20 additions & 7 deletions b/‎ideeplc/fine_tuning.py‎
Lines changed: 20 additions & 7 deletions
@@ -1,4 +1,3 @@
 """iDeepLC: A deep Learning-based retention time predictor for unseen modified peptides with a novel encoding system"""
 
-__version__ = "1.1.2"
-
+__version__ = "1.1.3"
@@ -6,7 +6,9 @@
 from pathlib import Path
 
 from ideeplc import __version__
-from ideeplc.ideeplc_core import main as run_ideeplc  # Assumes main logic is exposed here
+from ideeplc.ideeplc_core import (
+    main as run_ideeplc,
+)  # Assumes main logic is exposed here
 from rich.console import Console
 from rich.logging import RichHandler
 from rich.text import Text
@@ -39,26 +41,49 @@ def _argument_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
         description="iDeepLC: Deep learning-based retention time prediction",
         formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=42),
-        epilog=("Example usage: python -m ideeplc --input peptides.csv --save --finetune\n ")
+        epilog=(
+            "Example usage: python -m ideeplc --input peptides.csv --save --finetune\n "
+        ),
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        type=str,
+        required=True,
+        help="Path to the CSV file containing the peptide sequences.",
+    )
+    parser.add_argument(
+        "-s", "--save", action="store_true", help="Flag to save results to disk."
+    )
+    parser.add_argument(
+        "-f",
+        "--finetune",
+        action="store_true",
+        help="Flag to enable fine-tuning of the model.",
+    )
+    parser.add_argument(
+        "-l",
+        "--log_level",
+        type=str,
+        default="info",
+        choices=LOG_MAPPING.keys(),
+        help="Logging level (default: info).",
+    )
+    parser.add_argument(
+        "-c",
+        "--calibrate",
+        action="store_true",
+        help="Flag to enable calibration of the model predictions.",
     )
-    parser.add_argument("-i", "--input", type=str, required=True,
-                        help="Path to the CSV file containing the peptide sequences.")
-    parser.add_argument("-s", "--save", action="store_true",
-                        help="Flag to save results to disk.")
-    parser.add_argument("-f", "--finetune", action="store_true",
-                        help="Flag to enable fine-tuning of the model.")
-    parser.add_argument("-l", "--log_level", type=str, default="info",
-                        choices=LOG_MAPPING.keys(),
-                        help="Logging level (default: info).")
-    parser.add_argument("-c", "--calibrate", action="store_true",
-                        help="Flag to enable calibration of the model predictions.")
     return parser
 
 
 def _setup_logging(level: str, log_file: Path = None):
     """Set up the logging configuration."""
     if level not in LOG_MAPPING:
-        raise ValueError(f"Invalid log level '{level}'. Choose from {', '.join(LOG_MAPPING)}")
+        raise ValueError(
+            f"Invalid log level '{level}'. Choose from {', '.join(LOG_MAPPING)}"
+        )
     handlers = [RichHandler(rich_tracebacks=True, console=CONSOLE, show_path=False)]
     if log_file:
         handlers.append(logging.FileHandler(log_file, mode="w", encoding="utf-8"))
 
@@ -7,6 +7,7 @@
 
 LOGGER = logging.getLogger(__name__)
 
+
 class SplineTransformerCalibration:
     """Spline Transformer Calibration for Retention Time Prediction."""
 
@@ -19,7 +20,12 @@ def __init__(self):
         self._spline_model = None
         self._linear_model_right = None
 
-    def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: bool = False):
+    def fit(
+        self,
+        measured_tr: np.ndarray,
+        predicted_tr: np.ndarray,
+        simplified: bool = False,
+    ):
         """
         Fit a SplineTransformer model to the measured and predicted retention times.
 
@@ -40,8 +46,12 @@ def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: boo
 
         # Check if the lengths match
         if len(measured_tr) != len(predicted_tr):
-            LOGGER.error("Measured and predicted retention times must have the same length.")
-            raise ValueError("Measured and predicted retention times must have the same length.")
+            LOGGER.error(
+                "Measured and predicted retention times must have the same length."
+            )
+            raise ValueError(
+                "Measured and predicted retention times must have the same length."
+            )
 
         # Fit a SplineTransformer model
         if simplified:
@@ -54,7 +64,9 @@ def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: boo
             linear_model_right = linear_model
         else:
             LOGGER.info("Using SplineTransformer with more knots for calibration.")
-            spline = SplineTransformer(degree=4, n_knots=int(len(measured_tr) / 500) + 5)
+            spline = SplineTransformer(
+                degree=4, n_knots=int(len(measured_tr) / 500) + 5
+            )
             spline_model = make_pipeline(spline, LinearRegression())
             spline_model.fit(predicted_tr.reshape(-1, 1), measured_tr)
 
@@ -83,7 +95,6 @@ def fit(self, measured_tr: np.ndarray, predicted_tr: np.ndarray, simplified: boo
         self._fit = True
         LOGGER.info("Calibration fitting completed successfully.")
 
-
     def transform(self, tr: np.ndarray) -> np.ndarray:
         """
         Transform the predicted retention times using the fitted SplineTransformer model.
@@ -99,13 +110,17 @@ def transform(self, tr: np.ndarray) -> np.ndarray:
             The calibrated retention times.
         """
         if not self._fit:
-            LOGGER.error("Calibration model has not been fitted yet. Call fit() before transform().")
-            raise RuntimeError("Calibration model has not been fitted yet. Call fit() before transform().")
+            LOGGER.error(
+                "Calibration model has not been fitted yet. Call fit() before transform()."
+            )
+            raise RuntimeError(
+                "Calibration model has not been fitted yet. Call fit() before transform()."
+            )
 
         # if tr.shape[0] == 0:
         #     return np.array([])
         tr_array = np.array(tr)
-        tr = tr_array.reshape(-1,1)
+        tr = tr_array.reshape(-1, 1)
 
         # Get spline predictions and linear extrapolation predictions
         y_pred_spline = self._spline_model.predict(tr)
@@ -120,10 +135,10 @@ def transform(self, tr: np.ndarray) -> np.ndarray:
         cal_preds = np.copy(y_pred_spline)
         cal_preds[~within_range & (tr.ravel() < self._calibrate_min)] = y_pred_left[
             ~within_range & (tr.ravel() < self._calibrate_min)
-            ]
+        ]
         cal_preds[~within_range & (tr.ravel() > self._calibrate_max)] = y_pred_right[
             ~within_range & (tr.ravel() > self._calibrate_max)
-            ]
+        ]
 
         LOGGER.info("Calibration transformation completed successfully.")
-        return np.array(cal_preds)
+        return np.array(cal_preds)
@@ -1,6 +1,27 @@
-def get_config(lr=1e-3, epoch=10, batch=256, kernel=5, kernel2=3, kernel3=9, kernel4=7, cnn_channels=245,
-               cnn2_channels=41, cnn3_channels=35, cnn4_channels=50, cnn_layers=1, cnn2_layers=0, cnn3_layers=5,
-               cnn4_layers=3, fc_layers=2, fc_output=78, fc2_layers=1, fc2_output=77, drop=0.23, clip=0.25, layers_to_freeze=None):
+def get_config(
+    lr=1e-3,
+    epoch=10,
+    batch=256,
+    kernel=5,
+    kernel2=3,
+    kernel3=9,
+    kernel4=7,
+    cnn_channels=245,
+    cnn2_channels=41,
+    cnn3_channels=35,
+    cnn4_channels=50,
+    cnn_layers=1,
+    cnn2_layers=0,
+    cnn3_layers=5,
+    cnn4_layers=3,
+    fc_layers=2,
+    fc_output=78,
+    fc2_layers=1,
+    fc2_output=77,
+    drop=0.23,
+    clip=0.25,
+    layers_to_freeze=None,
+):
     """
     Initialize the configuration for the model hyperparameters
 
@@ -29,11 +50,29 @@ def get_config(lr=1e-3, epoch=10, batch=256, kernel=5, kernel2=3, kernel3=9, ker
     :return: configuration dictionary
     """
 
-    config = {"learning_rate": lr, "epochs": epoch, "batch_size": batch, "kernel_size": kernel, "kernel2_size": kernel2,
-              "kernel3_size": kernel3, "kernel4_size": kernel4, "fc_out": fc_output, "fc_layers": fc_layers,
-              "fc2_out": fc2_output, "fc2_layers": fc2_layers, "cnn_layers": cnn_layers, "cnn_channels": cnn_channels,
-              "cnn2_layers": cnn2_layers, "cnn2_channels": cnn2_channels, "cnn3_layers": cnn3_layers,
-              "cnn3_channels": cnn3_channels, "cnn4_layers": cnn4_layers, "cnn4_channels": cnn4_channels,
-              "clipping_size": clip, "dropout": drop, "layers_to_freeze": layers_to_freeze}
+    config = {
+        "learning_rate": lr,
+        "epochs": epoch,
+        "batch_size": batch,
+        "kernel_size": kernel,
+        "kernel2_size": kernel2,
+        "kernel3_size": kernel3,
+        "kernel4_size": kernel4,
+        "fc_out": fc_output,
+        "fc_layers": fc_layers,
+        "fc2_out": fc2_output,
+        "fc2_layers": fc2_layers,
+        "cnn_layers": cnn_layers,
+        "cnn_channels": cnn_channels,
+        "cnn2_layers": cnn2_layers,
+        "cnn2_channels": cnn2_channels,
+        "cnn3_layers": cnn3_layers,
+        "cnn3_channels": cnn3_channels,
+        "cnn4_layers": cnn4_layers,
+        "cnn4_channels": cnn4_channels,
+        "clipping_size": clip,
+        "dropout": drop,
+        "layers_to_freeze": layers_to_freeze,
+    }
 
     return config
@@ -7,6 +7,8 @@
 from ideeplc.utilities import df_to_matrix, reform_seq
 
 LOGGER = logging.getLogger(__name__)
+
+
 # Making the pytorch dataset
 class MyDataset(Dataset):
     def __init__(self, sequences: np.ndarray, retention: np.ndarray) -> None:
@@ -21,8 +23,7 @@ def __getitem__(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:
 
 
 def data_initialize(
-        csv_path: str,
-        **kwargs
+    csv_path: str, **kwargs
 ) -> Union[Tuple[MyDataset, np.ndarray], Tuple[MyDataset, np.ndarray]]:
     """
     Initialize peptides matrices based on a CSV file containing raw peptide sequences.
@@ -45,17 +46,21 @@ def data_initialize(
         LOGGER.error(f"Error reading {csv_path}: {e}")
         raise
 
-    if 'seq' not in df.columns:
+    if "seq" not in df.columns:
         LOGGER.error(f"CSV file must contain a 'seq' column with peptide sequences.")
         raise ValueError("Missing 'seq' column in the CSV file.")
-    if 'modifications' not in df.columns:
-        LOGGER.error("CSV file must contain a 'modifications' column with peptide modifications.")
+    if "modifications" not in df.columns:
+        LOGGER.error(
+            "CSV file must contain a 'modifications' column with peptide modifications."
+        )
         raise ValueError("Missing 'modifications' column in the CSV file.")
 
     reformed_peptides = [
-        reform_seq(seq, mod) for seq, mod in zip(df['seq'], df['modifications'])
+        reform_seq(seq, mod) for seq, mod in zip(df["seq"], df["modifications"])
     ]
-    LOGGER.info(f"Loaded and reformed {len(reformed_peptides)} peptides sequences from the file.")
+    LOGGER.info(
+        f"Loaded and reformed {len(reformed_peptides)} peptides sequences from the file."
+    )
     try:
         # Convert sequences to matrix format
         sequences, tr, errors = df_to_matrix(reformed_peptides, df)
 
@@ -8,9 +8,15 @@
 
 LOGGER = logging.getLogger(__name__)
 
-def make_figures(predictions: list, ground_truth: list, input_file: str, calibrated: bool = False, finetuned: bool = False, save_results: bool = True,
 
-                 ):
+def make_figures(
+    predictions: list,
+    ground_truth: list,
+    input_file: str,
+    calibrated: bool = False,
+    finetuned: bool = False,
+    save_results: bool = True,
+):
     """
     Create and save scatter plot of predicted vs observed retention times.
 
@@ -24,19 +30,33 @@ def make_figures(predictions: list, ground_truth: list, input_file: str, calibra
     """
     try:
         mae_predictions = mean_absolute_error(ground_truth, predictions)
-        max_value = max(max(ground_truth), max(predictions)) * 1.05 # Extend the max value by 5% for better visualization
+        max_value = (
+            max(max(ground_truth), max(predictions)) * 1.05
+        )  # Extend the max value by 5% for better visualization
 
         fig, ax = plt.subplots(figsize=(7, 7))
-        ax.scatter(ground_truth, predictions, c="b",
-                   label=f"MAE: {mae_predictions:.3f}, R: {np.corrcoef(ground_truth, predictions)[0, 1]:.3f}", s=3)
+        ax.scatter(
+            ground_truth,
+            predictions,
+            c="b",
+            label=f"MAE: {mae_predictions:.3f}, R: {np.corrcoef(ground_truth, predictions)[0, 1]:.3f}",
+            s=3,
+        )
         plt.legend(loc="upper left")
         plt.xlabel("Observed Retention Time")
         plt.ylabel("Predicted Retention Time")
 
         timestamp = datetime.datetime.now().strftime("%Y%m%d")
         input_file_name = os.path.splitext(os.path.basename(input_file))[0]
-        status = "finetuned" if finetuned else ("calibrated" if calibrated else "not_calibrated")
-        output_path = Path("ideeplc_output") / f"{input_file_name}_predictions_{timestamp}{status}.png"
+        status = (
+            "finetuned"
+            if finetuned
+            else ("calibrated" if calibrated else "not_calibrated")
+        )
+        output_path = (
+            Path("ideeplc_output")
+            / f"{input_file_name}_predictions_{timestamp}{status}.png"
+        )
         plt.title(f"scatterplot({status})\n")
         plt.axis("scaled")
         ax.plot([0, max_value], [0, max_value], ls="--", c=".5")
 
@@ -12,8 +12,19 @@ class iDeepLCFineTuner:
     A class to fine-tune the iDeepLC model on a new dataset.
     """
 
-    def __init__(self, model, train_data, loss_function, device="cpu", learning_rate=0.001, epochs=10, batch_size=256,
-                  validation_data=None, validation_split=0.1, patience=5):
+    def __init__(
+        self,
+        model,
+        train_data,
+        loss_function,
+        device="cpu",
+        learning_rate=0.001,
+        epochs=10,
+        batch_size=256,
+        validation_data=None,
+        validation_split=0.1,
+        patience=5,
+    ):
         """
         Initialize the fine-tuner with the model and data loaders.
 
@@ -82,13 +93,15 @@ def fine_tune(self, layers_to_freeze=None):
             # Split the training data into training and validation sets
             train_size = int((1 - self.validation_split) * len(self.train_data))
             val_size = len(self.train_data) - train_size
-            train_dataset, val_dataset = torch.utils.data.random_split(self.train_data, [train_size, val_size])
+            train_dataset, val_dataset = torch.utils.data.random_split(
+                self.train_data, [train_size, val_size]
+            )
             dataloader_train = self.prepare_data(train_dataset)
             dataloader_val = self.prepare_data(val_dataset, shuffle=False)
         LOGGER.info(f"Training on {len(dataloader_train.dataset)} samples.")
 
         best_model = copy.deepcopy(self.model)
-        best_loss = float('inf')
+        best_loss = float("inf")
         patience_counter = 0
 
         for epoch in range(self.epochs):
@@ -114,7 +127,9 @@ def fine_tune(self, layers_to_freeze=None):
 
             # Validate the model after each epoch
             if dataloader_val:
-                val_loss, _, _, _ = validate(self.model, dataloader_val, loss_fn, self.device)
+                val_loss, _, _, _ = validate(
+                    self.model, dataloader_val, loss_fn, self.device
+                )
                 if val_loss < best_loss:
                     best_loss = val_loss
                     best_model = copy.deepcopy(self.model)
@@ -129,5 +144,3 @@ def fine_tune(self, layers_to_freeze=None):
 
         LOGGER.info("Fine-tuning complete.")
         return best_model
-
-