Five more tests to go

Seilmast · Seilmast · commit 78541221ba13 · 2025-02-20T12:34:45.000+01:00
diff --git a/CollaborativeCoding/dataloaders/usps_0_6.py b/CollaborativeCoding/dataloaders/usps_0_6.py
@@ -83,6 +83,7 @@ def __init__(
         sample_ids: list,
         train: bool = False,
         transform=None,
+        nr_channels=1,
     ):
         super().__init__()
 
@@ -91,6 +92,7 @@ def __init__(
         self.transform = transform
         self.mode = "train" if train else "test"
         self.sample_ids = sample_ids
+        self.nr_channels = nr_channels
 
     def __len__(self):
         return len(self.sample_ids)
@@ -100,11 +102,18 @@ def __getitem__(self, id):
 
         with h5.File(self.filepath, "r") as f:
             data = f[self.mode]["data"][index].astype(np.uint8)
-            label = f[self.mode]["target"][index]
+            label = int(f[self.mode]["target"][index])
 
-        data = Image.fromarray(data, mode="L")
+        if self.nr_channels == 1:
+            data = Image.fromarray(data, mode="L")
+        elif self.nr_channels == 3:
+            data = Image.fromarray(data, mode="RGB")
+        else:
+            raise ValueError("Invalid number of channels")
 
         if self.transform:
             data = self.transform(data)
 
+        # label = torch.tensor(label).long()
+
         return data, label
diff --git a/CollaborativeCoding/dataloaders/uspsh5_7_9.py b/CollaborativeCoding/dataloaders/uspsh5_7_9.py
@@ -32,7 +32,9 @@ class USPSH5_Digit_7_9_Dataset(Dataset):
         A transform function to apply to the images.
     """
 
-    def __init__(self, data_path, sample_ids, train=False, transform=None,  nr_channels=1):
+    def __init__(
+        self, data_path, sample_ids, train=False, transform=None, nr_channels=1
+    ):
         super().__init__()
         """
         Initializes the USPS dataset by loading images and labels from the given `.h5` file.
@@ -112,7 +114,8 @@ def main():
     indices = np.array([7, 8, 9])
     # Load the dataset
     dataset = USPSH5_Digit_7_9_Dataset(
-        data_path="C:/Users/Solveig/OneDrive/Dokumente/UiT PhD/Courses/Git", sample_ids=indices,
+        data_path="C:/Users/Solveig/OneDrive/Dokumente/UiT PhD/Courses/Git",
+        sample_ids=indices,
         train=False,
         transform=transform,
     )
diff --git a/CollaborativeCoding/load_data.py b/CollaborativeCoding/load_data.py
@@ -93,23 +93,23 @@ def load_data(dataset: str, *args, **kwargs) -> tuple:
         sample_ids=train_samples,
         train=True,
         transform=transform,
-        nr_channels=kwargs.get("nr_channels"),
+        nr_channels=kwargs.get("nr_channels", 1),
     )
 
     val = dataset(
         data_path=data_dir,
         sample_ids=val_samples,
         train=True,
         transform=transform,
-        nr_channels=kwargs.get("nr_channels"),
+        nr_channels=kwargs.get("nr_channels", 1),
     )
 
     test = dataset(
         data_path=data_dir,
         sample_ids=test_samples,
         train=False,
         transform=transform,
-        nr_channels=kwargs.get("nr_channels"),
+        nr_channels=kwargs.get("nr_channels", 1),
     )
 
     return train, val, test
diff --git a/CollaborativeCoding/metrics/F1.py b/CollaborativeCoding/metrics/F1.py
@@ -159,11 +159,13 @@ def __returnmetric__(self):
             else:
                 self.y_true = torch.cat(self.y_true)
                 self.y_pred = torch.cat(self.y_pred)
-        return self._micro_F1(self.y_true, self.y_pred) if not self.macro_averaging else self._macro_F1(self.y_true, self.y_pred)
+        return (
+            self._micro_F1(self.y_true, self.y_pred)
+            if not self.macro_averaging
+            else self._macro_F1(self.y_true, self.y_pred)
+        )
 
     def __reset__(self):
         self.y_true = []
         self.y_pred = []
         return None
-
-
diff --git a/CollaborativeCoding/metrics/recall.py b/CollaborativeCoding/metrics/recall.py
@@ -1,3 +1,4 @@
+import numpy as np
 import torch
 import torch.nn as nn
 
@@ -57,26 +58,49 @@ def __init__(self, num_classes, macro_averaging=False):
         self.num_classes = num_classes
         self.macro_averaging = macro_averaging
 
+        self.__y_true = []
+        self.__y_pred = []
+
     def forward(self, true, logits):
         pred = logits.argmax(dim=-1)
         y_true = one_hot_encode(true, self.num_classes)
         y_pred = one_hot_encode(pred, self.num_classes)
 
+        self.__y_true.append(y_true)
+        self.__y_pred.append(y_pred)
+
+    def compute(self, y_true, y_pred):
         if self.macro_averaging:
-            recall = 0
-            for i in range(self.num_classes):
-                tp = (y_true[:, i] * y_pred[:, i]).sum()
-                fn = torch.sum(~y_pred[y_true[:, i].bool()].bool())
-                recall += tp / (tp + fn)
-            recall /= self.num_classes
-        else:
-            recall = self.__compute(y_true, y_pred)
+            return self.__compute_macro_averaging(y_true, y_pred)
+
+        return self.__compute_micro_averaging(y_true, y_pred)
+
+    def __compute_macro_averaging(self, y_true, y_pred):
+        recall = 0
+        for i in range(self.num_classes):
+            tp = (y_true[:, i] * y_pred[:, i]).sum()
+            fn = torch.sum(~y_pred[y_true[:, i].bool()].bool())
+            recall += tp / (tp + fn)
+        recall /= self.num_classes
 
         return recall
 
-    def __compute(self, y_true, y_pred):
+    def __compute_micro_averaging(self, y_true, y_pred):
         true_positives = (y_true * y_pred).sum()
         false_negatives = torch.sum(~y_pred[y_true.bool()].bool())
 
         recall = true_positives / (true_positives + false_negatives)
         return recall
+
+    def __returnmetric__(self):
+        if len(self.__y_true) == 0 and len(self.__y_pred) == 0:
+            return np.nan
+
+        y_true = torch.cat(self.__y_true, dim=0)
+        y_pred = torch.cat(self.__y_pred, dim=0)
+
+        return self.compute(y_true, y_pred)
+
+    def __reset__(self):
+        self.__y_true = []
+        self.__y_pred = []
diff --git a/CollaborativeCoding/models/solveig_model.py b/CollaborativeCoding/models/solveig_model.py
@@ -4,24 +4,24 @@
 
 def find_fc_input_shape(image_shape, model):
     """
-      Find the shape of the input to the fully connected layer after passing through the convolutional layers.
-
-      Code inspired by @Seilmast (https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam/issues/67#issuecomment-2651212254)
-
-      Args
-      ----
-      image_shape : tuple(int, int, int)
-          Shape of the input image (C, H, W), where C is the number of channels,
-          H is the height, and W is the width of the image.
-      model : nn.Module
-          The CNN model containing the convolutional layers, whose output size is used to
-          determine the number of input features for the fully connected layer.
-
-      Returns
-      -------
-      int
-          The number of elements in the input to the fully connected layer.
-      """
+    Find the shape of the input to the fully connected layer after passing through the convolutional layers.
+
+    Code inspired by @Seilmast (https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam/issues/67#issuecomment-2651212254)
+
+    Args
+    ----
+    image_shape : tuple(int, int, int)
+        Shape of the input image (C, H, W), where C is the number of channels,
+        H is the height, and W is the width of the image.
+    model : nn.Module
+        The CNN model containing the convolutional layers, whose output size is used to
+        determine the number of input features for the fully connected layer.
+
+    Returns
+    -------
+    int
+        The number of elements in the input to the fully connected layer.
+    """
 
     dummy_img = torch.randn(1, *image_shape)
     with torch.no_grad():
diff --git a/main.py b/main.py
@@ -1,11 +1,11 @@
 import numpy as np
 import torch as th
 import torch.nn as nn
+import wandb
 from torch.utils.data import DataLoader
 from torchvision import transforms
 from tqdm import tqdm
 
-import wandb
 from CollaborativeCoding import (
     MetricWrapper,
     createfolders,
@@ -17,7 +17,6 @@
 # from wandb_api import WANDB_API
 
 
-
 def main():
     """
 
diff --git a/tests/test_dataloaders.py b/tests/test_dataloaders.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pytest
 import torch
-from PIL import Image
 from torchvision import transforms
 
 from CollaborativeCoding.dataloaders import (
@@ -38,42 +37,3 @@ def test_load_data(data_name, expected):
     assert isinstance(
         dataset[0][1], (int, torch.Tensor, np.ndarray)
     )  # Should probably restrict this to only int or one-hot encoded tensor or array for consistency.
-
-
-def test_uspsdataset0_6():
-    from tempfile import TemporaryDirectory
-
-    import h5py
-    import numpy as np
-    from torchvision import transforms
-
-    # Create a temporary directory (deleted after the test)
-    with TemporaryDirectory() as tempdir:
-        tempdir = Path(tempdir)
-
-        tf = tempdir / "usps.h5"
-
-        # Create a h5 file
-        with h5py.File(tf, "w") as f:
-            targets = np.array([6, 5, 4, 3, 2, 1, 0, 0, 0, 0])
-            indices = np.arange(len(targets))
-            # Populate the file with data
-            f["train/data"] = np.random.rand(10, 16 * 16)
-            f["train/target"] = targets
-
-        trans = transforms.Compose(
-            [
-                transforms.Resize((16, 16)),
-                transforms.ToTensor(),
-            ]
-        )
-        dataset = USPSDataset0_6(
-            data_path=tempdir,
-            sample_ids=indices,
-            train=True,
-            transform=trans,
-        )
-        assert len(dataset) == 10
-        data, target = dataset[0]
-        assert data.shape == (1, 16, 16)
-        assert target == 6
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -59,8 +59,11 @@ def test_recall():
     recall_micro = Recall(7)
     recall_macro = Recall(7, macro_averaging=True)
 
-    recall_micro_score = recall_micro(y_true, logits)
-    recall_macro_score = recall_macro(y_true, logits)
+    recall_micro(y_true, logits)
+    recall_macro(y_true, logits)
+
+    recall_micro_score = recall_micro.__returnmetric__()
+    recall_macro_score = recall_macro.__returnmetric__()
 
     assert isinstance(recall_micro_score, torch.Tensor), "Expected a tensor output."
     assert isinstance(recall_macro_score, torch.Tensor), "Expected a tensor output."
@@ -88,8 +91,12 @@ def test_f1score():
     macro_f1_score = f1_macro.__returnmetric__()
 
     # Check if outputs are tensors
-    assert isinstance(micro_f1_score, torch.Tensor), "Micro F1 score should be a tensor."
-    assert isinstance(macro_f1_score, torch.Tensor), "Macro F1 score should be a tensor."
+    assert isinstance(micro_f1_score, torch.Tensor), (
+        "Micro F1 score should be a tensor."
+    )
+    assert isinstance(macro_f1_score, torch.Tensor), (
+        "Macro F1 score should be a tensor."
+    )
 
     # Check that F1 scores are between 0 and 1
     assert 0 <= micro_f1_score.item() <= 1, "Micro F1 score should be between 0 and 1."
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
@@ -1,4 +1,9 @@
 from pathlib import Path
+from tempfile import TemporaryDirectory
+
+import pytest
+import torch
+from torchvision import transforms
 
 from CollaborativeCoding import MetricWrapper, load_data, load_model
 
@@ -36,7 +41,13 @@ def test_load_data():
     import torch as th
     from torchvision import transforms
 
-    dataset_names = ["usps_0-6", "mnist_0-3", "usps_7-9", "svhn", "mnist_4-9"]
+    dataset_names = [
+        "usps_0-6",
+        "mnist_0-3",
+        "usps_7-9",
+        "svhn",
+        # 'mnist_4-9' #Uncomment when implemented
+    ]
 
     trans = transforms.Compose(
         [