Merge branch 'main' into Jan-dev

hzavadil98 · web-flow · commit 1a402bab5314 · 2025-02-14T19:47:49.000+01:00
diff --git a/CollaborativeCoding/dataloaders/download.py b/CollaborativeCoding/dataloaders/download.py
@@ -8,6 +8,8 @@
 
 import h5py as h5
 import numpy as np
+from scipy.io import loadmat
+from torchvision.datasets import SVHN
 
 from .datasources import MNIST_SOURCE, USPS_SOURCE
 
@@ -84,7 +86,26 @@ def _get_labels(path: Path) -> np.ndarray:
         return train_labels, test_labels
 
     def svhn(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]:
-        raise NotImplementedError("SVHN download not implemented yet")
+        def download_svhn(path, train: bool = True):
+            SVHN()
+
+        parent_path = data_dir / "SVHN"
+
+        if not parent_path.exists():
+            parent_path.mkdir(parents=True)
+
+        train_data = parent_path / "train_32x32.mat"
+        test_data = parent_path / "test_32x32.mat"
+
+        if not train_data.exists():
+            download_svhn(parent_path, train=True)
+        if not test_data.exists():
+            download_svhn(parent_path, train=False)
+
+        train_labels = loadmat(train_data)["y"]
+        test_labels = loadmat(test_data)["y"]
+
+        return train_labels, test_labels
 
     def usps(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]:
         """
diff --git a/CollaborativeCoding/dataloaders/svhn.py b/CollaborativeCoding/dataloaders/svhn.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 
 import h5py
 import numpy as np
@@ -11,10 +12,10 @@
 class SVHNDataset(Dataset):
     def __init__(
         self,
-        data_path: str,
+        data_path: Path,
+        sample_ids: list,
         train: bool,
         transform=None,
-        download: bool = True,
         nr_channels=3,
     ):
         """
@@ -31,11 +32,9 @@ def __init__(
         super().__init__()
 
         self.data_path = data_path
+        self.indexes = sample_ids
         self.split = "train" if train else "test"
 
-        if download:
-            self._download_data(data_path)
-
         self.nr_channels = nr_channels
         self.transforms = transform
 
diff --git a/CollaborativeCoding/load_data.py b/CollaborativeCoding/load_data.py
@@ -86,20 +86,23 @@ def load_data(dataset: str, *args, **kwargs) -> tuple:
         sample_ids=train_samples,
         train=True,
         transform=transform,
+        nr_channels=kwargs.get("nr_channels"),
     )
 
     val = dataset(
         data_path=data_dir,
         sample_ids=val_samples,
         train=True,
         transform=transform,
+        nr_channels=kwargs.get("nr_channels"),
     )
 
     test = dataset(
         data_path=data_dir,
         sample_ids=test_samples,
         train=False,
         transform=transform,
+        nr_channels=kwargs.get("nr_channels"),
     )
 
     return train, val, test
diff --git a/CollaborativeCoding/load_metric.py b/CollaborativeCoding/load_metric.py
@@ -82,7 +82,7 @@ def __call__(self, y_true, y_pred):
         for key in self.metrics:
             self.metrics[key](y_true, y_pred)
 
-    def __getmetrics__(self, str_prefix: str = None):
+    def getmetrics(self, str_prefix: str = None):
         return_metrics = {}
         for key in self.metrics:
             if str_prefix is not None:
@@ -91,6 +91,6 @@ def __getmetrics__(self, str_prefix: str = None):
                 return_metrics[key] = self.metrics[key].__returnmetric__()
         return return_metrics
 
-    def __resetmetrics__(self):
+    def resetmetric(self):
         for key in self.metrics:
             self.metrics[key].__reset__()
diff --git a/CollaborativeCoding/metrics/EntropyPred.py b/CollaborativeCoding/metrics/EntropyPred.py
@@ -5,7 +5,7 @@
 
 
 class EntropyPrediction(nn.Module):
-    def __init__(self, averages: str = "mean"):
+    def __init__(self, num_classes, macro_averaging=None):
         """
         Initializes the EntropyPrediction module, which calculates the Shannon Entropy
         of predicted logits and aggregates the results based on the specified method.
@@ -17,11 +17,8 @@ def __init__(self, averages: str = "mean"):
         """
         super().__init__()
 
-        assert averages in ["mean", "sum", "none"], (
-            "averages must be 'mean', 'sum', or 'none'"
-        )
-        self.averages = averages
         self.stored_entropy_values = []
+        self.num_classes = num_classes
 
     def __call__(self, y_true: th.Tensor, y_logits: th.Tensor):
         """
@@ -36,6 +33,10 @@ def __call__(self, y_true: th.Tensor, y_logits: th.Tensor):
         """
 
         assert len(y_logits.size()) == 2, f"y_logits shape: {y_logits.size()}"
+        assert y_logits.size(-1) == self.num_classes, (
+            f"y_logit class length: {y_logits.size(-1)}, expected: {self.num_classes}"
+        )
+
         y_pred = nn.Softmax(dim=1)(y_logits)
         print(f"y_pred: {y_pred}")
         entropy_values = entropy(y_pred, axis=1)
@@ -50,13 +51,8 @@ def __call__(self, y_true: th.Tensor, y_logits: th.Tensor):
 
     def __returnmetric__(self):
         stored_entropy_values = th.from_numpy(np.asarray(self.stored_entropy_values))
+        stored_entropy_values = th.mean(stored_entropy_values)
 
-        if self.averages == "mean":
-            stored_entropy_values = th.mean(stored_entropy_values)
-        elif self.averages == "sum":
-            stored_entropy_values = th.sum(stored_entropy_values)
-        elif self.averages == "none":
-            pass
         return stored_entropy_values
 
     def __reset__(self):
diff --git a/doc/Magnus_page.md b/doc/Magnus_page.md
@@ -1,8 +1,6 @@
 Magnus Individual Task
 ======================
 
-# Magnus Størdal Individual Task
-
 ## Task overview
 In addition to the overall task, I was tasked to implement a three layer linear network, a dataset loader for the SVHN dataset, and a entropy metric.
 
diff --git a/main.py b/main.py
@@ -53,6 +53,7 @@ def main():
         data_dir=args.datafolder,
         transform=transform,
         val_size=args.val_size,
+        nr_channels=args.nr_channels,
     )
 
     train_metrics = MetricWrapper(
@@ -124,7 +125,7 @@ def main():
             train_metrics(y, logits)
 
             break
-        print(train_metrics.__getmetrics__())
+        print(train_metrics.getmetrics())
         print("Dry run completed successfully.")
         exit()
 
@@ -172,11 +173,11 @@ def main():
                 "Train loss": np.mean(trainingloss),
                 "Validation loss": np.mean(valloss),
             }
-            | train_metrics.__getmetrics__(str_prefix="Train ")
-            | val_metrics.__getmetrics__(str_prefix="Validation ")
+            | train_metrics.getmetric(str_prefix="Train ")
+            | val_metrics.getmetric(str_prefix="Validation ")
         )
-        train_metrics.__resetmetrics__()
-        val_metrics.__resetmetrics__()
+        train_metrics.resetmetric()
+        val_metrics.resetmetric()
 
     testloss = []
     model.eval()
@@ -192,9 +193,9 @@ def main():
 
     wandb.log(
         {"Epoch": 1, "Test loss": np.mean(testloss)}
-        | test_metrics.__getmetrics__(str_prefix="Test ")
+        | test_metrics.getmetric(str_prefix="Test ")
     )
-    test_metrics.__resetmetrics__()
+    test_metrics.resetmetric()
 
 
 if __name__ == "__main__":
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -23,7 +23,7 @@
         ("accuracy", randint(2, 10), True),
         ("precision", randint(2, 10), False),
         ("precision", randint(2, 10), True),
-        # TODO: Add test for EntropyPrediction
+        ("entropy", randint(2, 10), False),
     ],
 )
 def test_metric_wrapper(metric, num_classes, macro_averaging):
@@ -40,9 +40,9 @@ def test_metric_wrapper(metric, num_classes, macro_averaging):
     )
 
     metrics(y_true, logits)
-    score = metrics.__getmetrics__()
-    metrics.__resetmetrics__()
-    empty_score = metrics.__getmetrics__()
+    score = metrics.getmetrics()
+    metrics.resetmetric()
+    empty_score = metrics.getmetrics()
 
     assert isinstance(score, dict), "Expected a dictionary output."
     assert metric in score, f"Expected {metric} metric in the output."
@@ -169,16 +169,22 @@ def test_accuracy():
 def test_entropypred():
     import torch as th
 
-    pred_logits = th.rand(6, 5)
     true_lab = th.rand(6, 5)
 
-    metric = EntropyPrediction(averages="mean")
-    metric2 = EntropyPrediction(averages="sum")
+    metric = EntropyPrediction(num_classes=5)
 
-    # Test for averaging metric consistency
+    # Test if the metric stores multiple values
+    pred_logits = th.rand(6, 5)
     metric(true_lab, pred_logits)
-    metric2(true_lab, pred_logits)
-    assert (
-        th.abs(th.sum(6 * metric.__returnmetric__() - metric2.__returnmetric__()))
-        < 1e-5
-    )
+
+    pred_logits = th.rand(6, 5)
+    metric(true_lab, pred_logits)
+
+    pred_logits = th.rand(6, 5)
+    metric(true_lab, pred_logits)
+
+    assert type(metric.__returnmetric__()) == th.Tensor
+
+    # Test than an error is raised with num_class != class dimension length
+    with pytest.raises(AssertionError):
+        metric(true_lab, th.rand(6, 6))
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -4,6 +4,7 @@
 from CollaborativeCoding.models import (
     ChristianModel,
     JanModel,
+    JohanModel,
     MagnusModel,
     SolveigModel,
 )
@@ -54,15 +55,17 @@ def test_solveig_model(image_shape, num_classes):
     assert y.shape == (n, num_classes), f"Shape: {y.shape}"
 
 
-@pytest.mark.parametrize("image_shape", [(3, 28, 28)])
-def test_magnus_model(image_shape):
+@pytest.mark.parametrize(
+    "image_shape, num_classes", [((3, 28, 28), 10), ((1, 16, 16), 10)]
+)
+def test_magnus_model(image_shape, num_classes):
     import torch as th
 
     n, c, h, w = 5, *image_shape
-    model = MagnusModel([h, w], 10, c)
+    model = MagnusModel([h, w], num_classes, c)
 
     x = th.rand((n, c, h, w))
     with th.no_grad():
         y = model(x)
 
-    assert y.shape == (n, 10), f"Shape: {y.shape}"
+    assert y.shape == (n, num_classes), f"Shape: {y.shape}"
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
@@ -1,30 +1,33 @@
+from pathlib import Path
+
 from CollaborativeCoding import load_data, load_metric, load_model
 
-# def test_load_model():
-#     import torch as th
 
-#     image_shape = (1, 16, 16)
-#     num_classes = 4
+def test_load_model():
+    import torch as th
 
-#     dummy_img = th.rand((1, *image_shape))
+    image_shape = (1, 16, 16)
+    num_classes = 4
 
-#     modelnames = [
-#         "magnusmodel",
-#         "christianmodel",
-#         "janmodel",
-#         "solveigmodel",
-#         "johanmodel",
-#     ]
+    dummy_img = th.rand((1, *image_shape))
 
-#     for name in modelnames:
-#         print(name)
-#         model = load_model(name, image_shape=image_shape, num_classes=num_classes)
+    modelnames = [
+        "magnusmodel",
+        "christianmodel",
+        "janmodel",
+        "solveigmodel",
+        "johanmodel",
+    ]
 
-#         with th.no_grad():
-#             output = model(dummy_img)
-#             assert output.size() == (1, 4), (
-#                 f"Model {name} returned image of size {output}. Expected (1,4)"
-#             )
+    for name in modelnames:
+        print(name)
+        model = load_model(name, image_shape=image_shape, num_classes=num_classes)
+
+        with th.no_grad():
+            output = model(dummy_img)
+            assert output.size() == (1, 4), (
+                f"Model {name} returned image of size {output}. Expected (1,4)"
+            )
 
 
 def test_load_data():
@@ -51,7 +54,7 @@ def test_load_data():
     with TemporaryDirectory() as tmppath:
         for name in dataset_names:
             dataset = load_data(
-                name, train=False, data_path=tmppath, download=True, transform=trans
+                name, train=False, data_dir=Path(tmppath), transform=trans
             )
 
             im, _ = dataset.__getitem__(0)