ChEB-AI
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎chebai/__init__.py‎
Lines changed: 21 additions & 1 deletion b/‎chebai/__init__.py‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎chebai/__main__.py‎
Lines changed: 6 additions & 0 deletions b/‎chebai/__main__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎chebai/callbacks.py‎
Lines changed: 59 additions & 14 deletions b/‎chebai/callbacks.py‎
Lines changed: 59 additions & 14 deletions
diff --git a/‎chebai/callbacks/epoch_metrics.py‎
Lines changed: 71 additions & 14 deletions b/‎chebai/callbacks/epoch_metrics.py‎
Lines changed: 71 additions & 14 deletions
diff --git a/‎chebai/callbacks/model_checkpoint.py‎
Lines changed: 41 additions & 9 deletions b/‎chebai/callbacks/model_checkpoint.py‎
Lines changed: 41 additions & 9 deletions
@@ -161,6 +161,7 @@ cython_debug/
 #.idea/
 
 # configs/ # commented as new configs can be added as a part of a feature
+
 /.idea
 /data
 /logs
 
@@ -1,10 +1,30 @@
 import os
+from typing import Any
 
 import torch
 
+# Get the absolute path of the current file's directory
 MODULE_PATH = os.path.abspath(os.path.dirname(__file__))
 
 
 class CustomTensor(torch.Tensor):
-    def __new__(cls, data):
+    """
+    A custom tensor class inheriting from `torch.Tensor`.
+
+    This class allows for the creation of tensors using the provided data.
+
+    Attributes:
+        data (Any): The data to be converted into a tensor.
+    """
+
+    def __new__(cls, data: Any) -> "CustomTensor":
+        """
+        Creates a new instance of CustomTensor.
+
+        Args:
+            data (Any): The data to be converted into a tensor.
+
+        Returns:
+            CustomTensor: A tensor containing the provided data.
+        """
         return torch.tensor(data)
@@ -1,4 +1,10 @@
 from chebai.cli import cli
 
 if __name__ == "__main__":
+    """
+    Entry point for the CLI application.
+
+    This script calls the `cli` function from the `chebai.cli` module
+    when executed as the main program.
+    """
     cli()
@@ -1,31 +1,76 @@
 import json
 import os
+from typing import Any, Dict, List, Literal, Union
 
 import torch
 from lightning.pytorch.callbacks import BasePredictionWriter
 
 
 class ChebaiPredictionWriter(BasePredictionWriter):
-    def __init__(self, output_dir, write_interval, target_file="predictions.json"):
+    """
+    A custom prediction writer for saving batch and epoch predictions during model training.
+
+    This class inherits from `BasePredictionWriter` and is designed to save predictions
+    in a specified output directory at specified intervals.
+
+    Args:
+        output_dir (str): The directory where predictions will be saved.
+        write_interval (str): The interval at which predictions will be written.
+        target_file (str): The name of the file where epoch predictions will be saved (default: "predictions.json").
+    """
+
+    def __init__(
+        self,
+        output_dir: str,
+        write_interval: Literal["batch", "epoch", "batch_and_epoch"],
+        target_file: str = "predictions.json",
+    ) -> None:
         super().__init__(write_interval)
         self.output_dir = output_dir
         self.target_file = target_file
 
     def write_on_batch_end(
         self,
-        trainer,
-        pl_module,
-        prediction,
-        batch_indices,
-        batch,
-        batch_idx,
-        dataloader_idx,
-    ):
-        outpath = os.path.join(self.output_dir, dataloader_idx, f"{batch_idx}.pt")
-        os.makedirs(outpath, exist_ok=True)
+        trainer: Any,
+        pl_module: Any,
+        prediction: Union[torch.Tensor, List[torch.Tensor]],
+        batch_indices: List[int],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """
+        Saves batch predictions at the end of each batch.
+
+        Args:
+            trainer (Any): The trainer instance.
+            pl_module (Any): The LightningModule instance.
+            prediction (Union[torch.Tensor, List[torch.Tensor]]): The prediction output from the model.
+            batch_indices (List[int]): The indices of the batch.
+            batch (Any): The current batch.
+            batch_idx (int): The index of the batch.
+            dataloader_idx (int): The index of the dataloader.
+        """
+        outpath = os.path.join(self.output_dir, str(dataloader_idx), f"{batch_idx}.pt")
+        os.makedirs(os.path.dirname(outpath), exist_ok=True)
         torch.save(prediction, outpath)
 
-    def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices):
+    def write_on_epoch_end(
+        self,
+        trainer: Any,
+        pl_module: Any,
+        predictions: List[Dict[str, Any]],
+        batch_indices: List[int],
+    ) -> None:
+        """
+        Saves all predictions at the end of each epoch in a JSON file.
+
+        Args:
+            trainer (Any): The trainer instance.
+            pl_module (Any): The LightningModule instance.
+            predictions (List[Dict[str, Any]]): The list of prediction outputs from the model.
+            batch_indices (List[int]): The indices of the batches.
+        """
         pred_list = []
         for p in predictions:
             idents = p["data"]["idents"]
@@ -35,7 +80,7 @@ def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices):
             else:
                 labels = [None for _ in idents]
             output = torch.sigmoid(p["output"]["logits"]).tolist()
-            for i, l, p in zip(idents, labels, output):
-                pred_list.append(dict(ident=i, labels=l, predictions=p))
+            for i, l, o in zip(idents, labels, output):
+                pred_list.append(dict(ident=i, labels=l, predictions=o))
         with open(os.path.join(self.output_dir, self.target_file), "wt") as fout:
             json.dump(pred_list, fout)
@@ -2,13 +2,38 @@
 import torchmetrics
 
 
-def custom_reduce_fx(input):
+def custom_reduce_fx(input: torch.Tensor) -> torch.Tensor:
+    """
+    Custom reduction function for distributed training.
+
+    Args:
+        input (torch.Tensor): The input tensor to be reduced.
+
+    Returns:
+        torch.Tensor: The reduced tensor.
+    """
     print(f"called reduce (device: {input.device})")
     return torch.sum(input, dim=0)
 
 
 class MacroF1(torchmetrics.Metric):
-    def __init__(self, num_labels, dist_sync_on_step=False, threshold=0.5):
+    """
+    Computes the Macro F1 score, which is the unweighted mean of F1 scores for each class.
+    This implementation differs from torchmetrics.classification.MultilabelF1Score in the behaviour for undefined
+    values (i.e., classes where TP+FN=0). The torchmetrics implementation sets these classes to a default value.
+    Here, the mean is only taken over classes which have at least one positive sample.
+
+    Args:
+        num_labels (int): Number of classes/labels.
+        dist_sync_on_step (bool, optional): Synchronize metric state across processes at each forward
+            before returning the value at the step. Default: False.
+        threshold (float, optional): Threshold for converting predicted probabilities to binary (0, 1) predictions.
+            Default: 0.5.
+    """
+
+    def __init__(
+        self, num_labels: int, dist_sync_on_step: bool = False, threshold: float = 0.5
+    ):
         super().__init__(dist_sync_on_step=dist_sync_on_step)
 
         self.add_state(
@@ -28,15 +53,29 @@ def __init__(self, num_labels, dist_sync_on_step=False, threshold=0.5):
         )
         self.threshold = threshold
 
-    def update(self, preds: torch.Tensor, labels: torch.Tensor):
+    def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None:
+        """
+        Update the state (TPs, Positive Predictions, Positive labels) with the current batch of predictions and labels.
+
+        Args:
+            preds (torch.Tensor): Predictions from the model.
+            labels (torch.Tensor): Ground truth labels.
+        """
         tps = torch.sum(
             torch.logical_and(preds > self.threshold, labels.to(torch.bool)), dim=0
         )
         self.true_positives += tps
         self.positive_predictions += torch.sum(preds > self.threshold, dim=0)
         self.positive_labels += torch.sum(labels, dim=0)
 
-    def compute(self):
+    def compute(self) -> torch.Tensor:
+        """
+        Compute the Macro F1 score.
+
+        Returns:
+            torch.Tensor: The computed Macro F1 score.
+        """
+
         # ignore classes without positive labels
         # classes with positive labels, but no positive predictions will get a precision of "nan" (0 divided by 0),
         # which is propagated to the classwise_f1 and then turned into 0
@@ -50,14 +89,22 @@ def compute(self):
 
 
 class BalancedAccuracy(torchmetrics.Metric):
-    """Balanced Accuracy = (TPR + TNR) / 2 = ( TP/(TP + FN) + (TN)/(TN + FP) ) / 2
-
-    This metric computes the balanced accuracy, which is the average of true positive rate (TPR)
-    and true negative rate (TNR). It is useful for imbalanced datasets where the classes are not
-    represented equally.
+    """
+    Computes the Balanced Accuracy, which is the average of true positive rate (TPR) and true negative rate (TNR).
+    Useful for imbalanced datasets.
+    Balanced Accuracy = (TPR + TNR)/2 = (TP/(TP + FN) + (TN)/(TN + FP))/2
+
+    Args:
+        num_labels (int): Number of classes/labels.
+        dist_sync_on_step (bool, optional): Synchronize metric state across processes at each forward
+            before returning the value at the step. Default: False.
+        threshold (float, optional): Threshold for converting predicted probabilities to binary (0, 1) predictions.
+            Default: 0.5.
     """
 
-    def __init__(self, num_labels, dist_sync_on_step=False, threshold=0.5):
+    def __init__(
+        self, num_labels: int, dist_sync_on_step: bool = False, threshold: float = 0.5
+    ):
         super().__init__(dist_sync_on_step=dist_sync_on_step)
 
         self.add_state(
@@ -86,8 +133,14 @@ def __init__(self, num_labels, dist_sync_on_step=False, threshold=0.5):
 
         self.threshold = threshold
 
-    def update(self, preds: torch.Tensor, labels: torch.Tensor):
-        """Update the TPs, TNs ,FPs and FNs"""
+    def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None:
+        """
+        Update the state (TPs, TNs, FPs, FNs) with the current batch of predictions and labels.
+
+        Args:
+            preds (torch.Tensor): Predictions from the model.
+            labels (torch.Tensor): Ground truth labels.
+        """
 
         # Size: Batch_size x Num_of_Classes;
         # summing over 1st dimension (dim=0), gives us the True positives per class
@@ -110,9 +163,13 @@ def update(self, preds: torch.Tensor, labels: torch.Tensor):
         self.true_negatives += tns
         self.false_negatives += fns
 
-    def compute(self):
-        """Compute the average value of Balanced accuracy from each batch"""
+    def compute(self) -> torch.Tensor:
+        """
+        Compute the Balanced Accuracy.
 
+        Returns:
+            torch.Tensor: The computed Balanced Accuracy.
+        """
         tpr = self.true_positives / (self.true_positives + self.false_negatives)
         tnr = self.true_negatives / (self.true_negatives + self.false_positives)
         # Convert the nan values to 0
 
@@ -10,13 +10,25 @@
 
 
 class CustomModelCheckpoint(ModelCheckpoint):
-    """Checkpoint class that resolves checkpoint paths s.t. for the CustomLogger, checkpoints get saved to the
-    same directory as the other logs"""
+    """
+    Custom checkpoint class that resolves checkpoint paths to ensure checkpoints are saved in the same directory
+    as other logs when using CustomLogger.
+    Inherits from PyTorch Lightning's ModelCheckpoint class.
+    """
 
-    def setup(
-        self, trainer: "Trainer", pl_module: "LightningModule", stage: str
-    ) -> None:
-        """Same as in parent class, duplicated to be able to call self.__resolve_ckpt_dir"""
+    def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None:
+        """
+        Setup the directory path for saving checkpoints. If the directory path is not set, it resolves the checkpoint
+        directory using the custom logger's directory.
+
+        Note:
+            Same as in parent class, duplicated to be able to call self.__resolve_ckpt_dir
+
+        Args:
+            trainer (Trainer): The Trainer instance.
+            pl_module (LightningModule): The LightningModule instance.
+            stage (str): The stage of training (e.g., 'fit').
+        """
         if self.dirpath is not None:
             self.dirpath = None
         dirpath = self.__resolve_ckpt_dir(trainer)
@@ -26,16 +38,36 @@ def setup(
             self.__warn_if_dir_not_empty(self.dirpath)
 
     def __warn_if_dir_not_empty(self, dirpath: _PATH) -> None:
-        """Same as in parent class, duplicated because method in parent class is not accessible"""
+        """
+        Warn if the checkpoint directory is not empty.
+
+        Note:
+            Same as in parent class, duplicated because method in parent class is not accessible
+
+        Args:
+            dirpath (_PATH): The path to the checkpoint directory.
+        """
         if (
             self.save_top_k != 0
             and _is_dir(self._fs, dirpath, strict=True)
             and len(self._fs.ls(dirpath)) > 0
         ):
             rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
 
-    def __resolve_ckpt_dir(self, trainer: "Trainer") -> _PATH:
-        """Overwritten for compatibility with wandb -> saves checkpoints in same dir as wandb logs"""
+    def __resolve_ckpt_dir(self, trainer: Trainer) -> _PATH:
+        """
+        Resolve the checkpoint directory path, ensuring compatibility with WandbLogger by saving checkpoints
+        in the same directory as Wandb logs.
+
+        Note:
+            Overwritten for compatibility with wandb -> saves checkpoints in same dir as wandb logs
+
+        Args:
+            trainer (Trainer): The Trainer instance.
+
+        Returns:
+            _PATH: The resolved checkpoint directory path.
+        """
         rank_zero_info(f"Resolving checkpoint dir (custom)")
         if self.dirpath is not None:
             # short circuit if dirpath was passed to ModelCheckpoint
-Original file line number
+Diff line change
 #.idea/
 # configs/ # commented as new configs can be added as a part of a feature
++
 /.idea
 /data
 /logs