SFI-Visual-Intelligence
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CollaborativeCoding/dataloaders/download.py‎
Lines changed: 6 additions & 2 deletions b/‎CollaborativeCoding/dataloaders/download.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎CollaborativeCoding/dataloaders/mnist_0_3.py‎
Lines changed: 6 additions & 6 deletions b/‎CollaborativeCoding/dataloaders/mnist_0_3.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎CollaborativeCoding/dataloaders/uspsh5_7_9.py‎
Lines changed: 52 additions & 16 deletions b/‎CollaborativeCoding/dataloaders/uspsh5_7_9.py‎
Lines changed: 52 additions & 16 deletions
diff --git a/‎CollaborativeCoding/load_data.py‎
Lines changed: 1 addition & 1 deletion b/‎CollaborativeCoding/load_data.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CollaborativeCoding/load_metric.py‎
Lines changed: 8 additions & 2 deletions b/‎CollaborativeCoding/load_metric.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎CollaborativeCoding/metrics/F1.py‎
Lines changed: 63 additions & 13 deletions b/‎CollaborativeCoding/metrics/F1.py‎
Lines changed: 63 additions & 13 deletions
@@ -12,6 +12,9 @@ doc/autoapi
 
 *.DS_Store
 
+# Jan
+job.yaml
+sync.sh
 
 #Magnus specific
 job*
 
@@ -16,12 +16,12 @@
 
 class Downloader:
     """
-    Class to download and load the USPS dataset.
+    Class used to verify availability and potentially download implemented datasets.
 
     Methods
     -------
     mnist(data_dir: Path) -> tuple[np.ndarray, np.ndarray]
-        Download the MNIST dataset and save it as an HDF5 file to `data_dir`.
+        Checks the availability of mnist dataset. If not present downloads it into MNIST folder in `data_dir`.
     svhn(data_dir: Path) -> tuple[np.ndarray, np.ndarray]
         Download the SVHN dataset and save it as an HDF5 file to `data_dir`.
     usps(data_dir: Path) -> tuple[np.ndarray, np.ndarray]
@@ -42,6 +42,10 @@ class Downloader:
     """
 
     def mnist(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Check the availability of mnist dataset. If not present downloads it into MNIST folder in `data_dir`.
+        """
+
         def _chech_is_downloaded(path: Path) -> bool:
             path = path / "MNIST"
             if path.exists():
 
@@ -10,20 +10,20 @@
 class MNISTDataset0_3(Dataset):
     """
     A custom Dataset class for loading a subset of the MNIST dataset containing digits 0 to 3.
-    Parameters
+
+    Args
     ----------
     data_path : Path
-        The root directory where the MNIST data is stored.
+        The root directory where the MNIST folder with data is stored.
     sample_ids : list
         A list of indices specifying which samples to load.
     train : bool, optional
         If True, load training data, otherwise load test data. Default is False.
     transform : callable, optional
         A function/transform to apply to the images. Default is None.
+
     Attributes
     ----------
-    data_path : Path
-        The root directory where the MNIST data is stored.
     mnist_path : Path
         The directory where the MNIST dataset is located within the root directory.
     idx : list
@@ -40,6 +40,7 @@ class MNISTDataset0_3(Dataset):
         The path to the label file (train or test) based on the `train` flag.
     length : int
         The number of samples in the dataset.
+
     Methods
     -------
     __len__()
@@ -58,8 +59,7 @@ def __init__(
     ):
         super().__init__()
 
-        self.data_path = data_path
-        self.mnist_path = self.data_path / "MNIST"
+        self.mnist_path = data_path / "MNIST"
         self.idx = sample_ids
         self.train = train
         self.transform = transform
 
@@ -8,26 +8,45 @@
 
 class USPSH5_Digit_7_9_Dataset(Dataset):
     """
-    Custom USPS dataset class that loads images with digits 7-9 from an .h5 file.
+    This class loads a subset of the USPS dataset, specifically images of digits 7, 8, and 9, from an HDF5 file.
+    It allows for applying transformations to the images and provides methods to retrieve images and their corresponding labels.
 
     Parameters
     ----------
-    h5_path : str
-        Path to the USPS `.h5` file.
+    data_path : str or Path
+        Path to the directory containing the USPS `.h5` file. This file should contain the data in the "train" or "test" group.
+
+    sample_ids : list of int
+        A list of sample indices to be used from the dataset. This allows for filtering or selecting a subset of the full dataset.
+
+    train : bool, optional, default=False
+        If `True`, the dataset is loaded in training mode (using the "train" group). If `False`, the dataset is loaded in test mode (using the "test" group).
 
     transform : callable, optional, default=None
-        A transform function to apply on images. If None, no transformation is applied.
+        A transformation function to apply to each image. If `None`, no transformation is applied. Typically used for data augmentation or normalization.
+
+    nr_channels : int, optional, default=1
+        The number of channels in the image. USPS images are typically grayscale, so this should generally be set to 1. This parameter allows for potential future flexibility.
 
     Attributes
     ----------
     images : numpy.ndarray
-        The filtered images corresponding to digits 7-9.
+        Array of images corresponding to digits 7, 8, and 9 from the USPS dataset. The images are loaded from the HDF5 file and filtered based on the labels.
 
     labels : numpy.ndarray
-        The filtered labels corresponding to digits 7-9.
+        Array of labels corresponding to the images. Only labels of digits 7, 8, and 9 are retained, and they are mapped to 0, 1, and 2 for classification tasks.
 
     transform : callable, optional
-        A transform function to apply to the images.
+        A transformation function to apply to the images. This is passed as an argument during initialization.
+
+    label_shift : function
+        A function to shift the labels for classification purposes. It maps the original labels (7, 8, 9) to (0, 1, 2).
+
+    label_restore : function
+        A function to restore the original labels (7, 8, 9) from the shifted labels (0, 1, 2).
+
+    num_classes : int
+        The number of unique labels in the dataset, which is 3 (for digits 7, 8, and 9).
     """
 
     def __init__(
@@ -36,14 +55,25 @@ def __init__(
         super().__init__()
         """
         Initializes the USPS dataset by loading images and labels from the given `.h5` file.
-
+        
+        The dataset is filtered to only include images of digits 7, 8, and 9, which are mapped to labels 0, 1, and 2 respectively for classification purposes.
+        
         Parameters
         ----------
-        h5_path : str
-            Path to the USPS `.h5` file.
-            
+        data_path : str or Path
+          Path to the directory containing the USPS `.h5` file.
+        
+        sample_ids : list of int
+          List of sample indices to load from the dataset.
+        
+        train : bool, optional, default=False
+          If `True`, loads the training data from the HDF5 file. If `False`, loads the test data.
+        
         transform : callable, optional, default=None
-            A transform function to apply on images.
+          A function to apply transformations to the images. If None, no transformation is applied.
+        
+        nr_channels : int, optional, default=1
+          The number of channels in the image. Defaults to 1 for grayscale images.
         """
         self.filename = "usps.h5"
         path = data_path if isinstance(data_path, Path) else Path(data_path)
@@ -72,27 +102,33 @@ def __len__(self):
         """
         Returns the total number of samples in the dataset.
 
+        This method is required for PyTorch's Dataset class, as it allows PyTorch to determine the size of the dataset.
+
         Returns
         -------
         int
-            The number of images in the dataset.
+            The number of images in the dataset (after filtering for digits 7, 8, and 9).
         """
+
         return len(self.images)
 
     def __getitem__(self, id):
         """
         Returns a sample from the dataset given an index.
 
+        This method is required for PyTorch's Dataset class, as it allows indexing into the dataset to retrieve specific samples.
+
         Parameters
         ----------
         idx : int
-            The index of the sample to retrieve.
+           The index of the sample to retrieve from the dataset.
 
         Returns
         -------
         tuple
-            - image (PIL Image): The image at the specified index.
-            - label (int): The label corresponding to the image.
+           A tuple containing:
+           - image (PIL Image): The image at the specified index.
+           - label (int): The label corresponding to the image, shifted to be in the range [0, 2] for classification.
         """
         # Convert to PIL Image (USPS images are typically grayscale 16x16)
         image = Image.fromarray(self.images[id].astype(np.uint8), mode="L")
 
@@ -17,7 +17,7 @@ def filter_labels(samples: list, wanted_labels: list) -> list:
 
 def load_data(dataset: str, *args, **kwargs) -> tuple:
     """
-    load the dataset based on the dataset name.
+    Load the dataset based on the dataset name.
 
     Args
     ----
 
@@ -9,26 +9,32 @@ class MetricWrapper(nn.Module):
     This class allows you to compute several metrics simultaneously on given
     true and predicted labels. It supports a variety of common metrics and
     provides methods to accumulate results and reset the state.
+
     Args
     ----
         num_classes : int
             The number of classes in the classification task.
         metrics : list[str]
             A list of metric names to be evaluated.
+        macro_averaging : bool
+            Whether to compute macro-averaged metrics for multi-class classification.
+
     Attributes
     ----------
         metrics : dict
             A dictionary mapping metric names to their corresponding functions.
         num_classes : int
             The number of classes for the classification task.
+
     Methods
     -------
         __call__(y_true, y_pred)
-            Computes the specified metrics on the provided true and predicted labels.
+            Passes the true and predicted labels to the metric functions.
         getmetrics(str_prefix: str = None)
-            Retrieves the computed metrics, optionally prefixed with a string.
+            Retrieves the dictionary of computed metrics, optionally all keys can be prefixed with a string.
         resetmetric()
             Resets the state of all metric computations.
+
     Examples
     --------
     >>> from CollaborativeCoding import MetricWrapperProposed
 
@@ -5,18 +5,67 @@
 
 class F1Score(nn.Module):
     """
-    F1 Score implementation with support for both macro and micro averaging.
-    This class computes the F1 score during training using either macro or micro averaging.
+    Computes the F1 score for classification tasks with support for both macro and micro averaging.
+
+    This class allows you to compute the F1 score during training or evaluation. You can select between two methods of averaging:
+    - **Micro Averaging**: Computes the F1 score globally, treating each individual prediction as equally important.
+    - **Macro Averaging**: Computes the F1 score for each class individually and then averages the scores.
+
     Parameters
     ----------
     num_classes : int
         The number of classes in the classification task.
 
-    macro_averaging : bool, default=False
-        If True, computes the macro-averaged F1 score. If False, computes the micro-averaged F1 score.
+    macro_averaging : bool, optional, default=False
+        If True, computes the macro-averaged F1 score. If False, computes the micro-averaged F1 score. Default is micro averaging.
+
+    Attributes
+    ----------
+    num_classes : int
+        The number of classes in the classification task.
+
+    macro_averaging : bool
+        A flag to determine whether to compute the macro-averaged or micro-averaged F1 score.
+
+    y_true : list
+        A list to store true labels for the current batch.
+
+    y_pred : list
+        A list to store predicted labels for the current batch.
+
+    Methods
+    -------
+    forward(target, preds)
+        Stores predictions and true labels for computing the F1 score during training or evaluation.
+
+    compute_f1()
+        Computes and returns the F1 score based on the stored predictions and true labels.
+
+    _micro_F1(target, preds)
+        Computes the micro-averaged F1 score based on the global true positive, false positive, and false negative counts.
+
+    _macro_F1(target, preds)
+        Computes the macro-averaged F1 score by calculating the F1 score per class and then averaging across all classes.
+
+    __returnmetric__()
+        Computes and returns the F1 score (Micro or Macro) as specified.
+
+    __reset__()
+        Resets the stored predictions and true labels, preparing for the next batch or epoch.
     """
 
     def __init__(self, num_classes, macro_averaging=False):
+        """
+        Initializes the F1Score object with the number of classes and averaging mode.
+
+        Parameters
+        ----------
+        num_classes : int
+           The number of classes in the classification task.
+
+        macro_averaging : bool, optional, default=False
+           If True, compute the macro-averaged F1 score. If False, compute the micro-averaged F1 score.
+        """
         super().__init__()
         self.num_classes = num_classes
         self.macro_averaging = macro_averaging
@@ -25,14 +74,15 @@ def __init__(self, num_classes, macro_averaging=False):
 
     def forward(self, target, preds):
         """
-        Stores predictions and targets for computing the F1 score.
+        Stores the true labels and predictions to compute the F1 score.
 
         Parameters
         ----------
-        preds : torch.Tensor
-            Predicted logits (shape: [batch_size, num_classes]).
         target : torch.Tensor
             True labels (shape: [batch_size]).
+
+        preds : torch.Tensor
+            Predicted logits (shape: [batch_size, num_classes]).
         """
         preds = torch.argmax(preds, dim=-1)  # Convert logits to class indices
         self.y_true.append(target.detach())
@@ -47,7 +97,7 @@ def compute_f1(self):
         Returns
         -------
         torch.Tensor
-            The computed F1 score.
+            The computed F1 score. Returns NaN if no predictions or targets are available.
         """
         if not self.y_true or not self.y_pred:  # Check if empty
             return torch.tensor(np.nan)
@@ -63,7 +113,7 @@ def compute_f1(self):
         )
 
     def _micro_F1(self, target, preds):
-        """Computes Micro F1 Score (global TP, FP, FN)."""
+        """Computes the Micro-averaged F1 score (global TP, FP, FN)."""
         tp = torch.sum(preds == target).float()
         fp = torch.sum(preds != target).float()
         fn = fp  # Since all errors are either FP or FN
@@ -75,7 +125,7 @@ def _micro_F1(self, target, preds):
         return f1
 
     def _macro_F1(self, target, preds):
-        """Computes Macro F1 Score in a vectorized way (no loops)."""
+        """Computes the Macro-averaged F1 score."""
         num_classes = self.num_classes
         target = target.long()  # Ensure target is a LongTensor
         preds = preds.long()
@@ -100,12 +150,12 @@ def _macro_F1(self, target, preds):
 
     def __returnmetric__(self):
         """
-        Computes and returns the F1 score (Micro or Macro).
+        Computes and returns the F1 score (Micro or Macro) based on the stored predictions and targets.
 
         Returns
         -------
         torch.Tensor
-            The computed F1 score.
+            The computed F1 score. Returns NaN if no predictions or targets are available.
         """
         if not self.y_true or not self.y_pred:  # Check if empty
             return torch.tensor(np.nan)
@@ -121,6 +171,6 @@ def __returnmetric__(self):
         )
 
     def __reset__(self):
-        """Resets stored predictions and targets."""
+        """Resets the stored predictions and targets for the next batch or epoch."""
         self.y_true = []
         self.y_pred = []