Pulled main and fixed pathing

Seilmast · Seilmast · commit 57e48925e295 · 2025-02-13T13:22:56.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@ _build/
 bin/*
 wandb/*
 wandb_api.py
+doc/autoapi
 
 #Magnus specific
 job*
diff --git a/CollaborativeCoding/dataloaders/svhn.py b/CollaborativeCoding/dataloaders/svhn.py
@@ -29,6 +29,7 @@ def __init__(
             AssertionError: If the split is not 'train' or 'test'.
         """
         super().__init__()
+
         self.data_path = data_path
         self.split = "train" if train else "test"
 
@@ -55,6 +56,7 @@ def _download_data(self, path: str):
             path (str): The directory where the dataset will be downloaded.
         """
         print(f"Downloading SVHN data into {path}")
+
         SVHN(path, split=self.split, download=True)
         data = loadmat(os.path.join(path, f"{self.split}_32x32.mat"))
 
@@ -93,7 +95,6 @@ def __getitem__(self, index):
 
         if self.nr_channels == 1:
             img = img.convert("L")
-
         if self.transforms is not None:
             img = self.transforms(img)
 
diff --git a/CollaborativeCoding/dataloaders/uspsh5_7_9.py b/CollaborativeCoding/dataloaders/uspsh5_7_9.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import h5py
 import numpy as np
 import torch
@@ -30,7 +32,7 @@ class USPSH5_Digit_7_9_Dataset(Dataset):
         A transform function to apply to the images.
     """
 
-    def __init__(self, h5_path, mode, transform=None):
+    def __init__(self, data_path, train=False, transform=None):
         super().__init__()
         """
         Initializes the USPS dataset by loading images and labels from the given `.h5` file.
@@ -43,12 +45,15 @@ def __init__(self, h5_path, mode, transform=None):
         transform : callable, optional, default=None
             A transform function to apply on images.
         """
-
+        self.filename = "usps.h5"
+        path = data_path if isinstance(data_path, Path) else Path(data_path)
+        self.filepath = path / self.filename
         self.transform = transform
-        self.mode = mode
-        self.h5_path = h5_path
+        self.mode = "train" if train else "test"
+        self.h5_path = data_path / self.filename
+
         # Load the dataset from the HDF5 file
-        with h5py.File(self.h5_path, "r") as hf:
+        with h5py.File(self.filepath, "r") as hf:
             images = hf[self.mode]["data"][:]
             labels = hf[self.mode]["target"][:]
 
@@ -105,8 +110,8 @@ def main():
 
     # Load the dataset
     dataset = USPSH5_Digit_7_9_Dataset(
-        h5_path="C:/Users/Solveig/OneDrive/Dokumente/UiT PhD/Courses/Git/usps.h5",
-        mode="train",
+        data_path="C:/Users/Solveig/OneDrive/Dokumente/UiT PhD/Courses/Git",
+        train=False,
         transform=transform,
     )
     data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)
diff --git a/CollaborativeCoding/metrics/F1.py b/CollaborativeCoding/metrics/F1.py
@@ -112,6 +112,7 @@ def _macro_F1(self):
 
     def forward(self, preds, target):
         """
+
         Update the True Positives, False Positives, and False Negatives, and compute the F1 score.
 
         This method computes the F1 score based on the predictions and true labels. It can compute either the
diff --git a/CollaborativeCoding/models/christian_model.py b/CollaborativeCoding/models/christian_model.py
@@ -3,6 +3,18 @@
 
 
 class CNNBlock(nn.Module):
+    """
+    CNN block with Conv2d, MaxPool2d, and ReLU.
+
+    Args
+    ----
+
+    in_channels : int
+        Number of input channels.
+    out_channels : int
+        Number of output channels.
+    """
+
     def __init__(self, in_channels, out_channels):
         super().__init__()
 
@@ -22,6 +34,37 @@ def forward(self, x):
         return x
 
 
+def find_fc_input_shape(image_shape, *cnn_layers):
+    """
+    Find the shape of the input to the fully connected layer.
+
+    Code inspired by @Seilmast (https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam/issues/67#issuecomment-2651212254)
+
+    Args
+    ----
+    image_shape : tuple(int, int, int)
+        Shape of the input image (C, H, W).
+    cnn_layers : nn.Module
+        List of CNN layers.
+
+    Returns
+    -------
+    int
+        Number of elements in the input to the fully connected layer.
+    """
+
+    dummy_img = torch.randn(1, *image_shape)
+    with torch.no_grad():
+        x = cnn_layers[0](dummy_img)
+
+        for layer in cnn_layers[1:]:
+            x = layer(x)
+
+        x = x.view(x.size(0), -1)
+
+    return x.size(1)
+
+
 class ChristianModel(nn.Module):
     """Simple CNN model for image classification.
 
@@ -57,7 +100,9 @@ def __init__(self, image_shape, num_classes):
         self.cnn1 = CNNBlock(C, 50)
         self.cnn2 = CNNBlock(50, 100)
 
-        self.fc1 = nn.Linear(100 * 4 * 4, num_classes)
+        fc_input_shape = find_fc_input_shape(image_shape, self.cnn1, self.cnn2)
+
+        self.fc1 = nn.Linear(fc_input_shape, num_classes)
 
     def forward(self, x):
         x = self.cnn1(x)
@@ -70,9 +115,10 @@ def forward(self, x):
 
 
 if __name__ == "__main__":
-    model = ChristianModel(3, 7)
+    x = torch.randn(3, 3, 28, 28)
+
+    model = ChristianModel(x.shape[1:], 7)
 
-    x = torch.randn(3, 3, 16, 16)
     y = model(x)
 
     print(y)
diff --git a/main.py b/main.py
@@ -35,10 +35,10 @@ def main():
 
     device = args.device
 
-    if args.dataset.lower() in ["usps_0-6", "usps_7-9"]:
+    if "usps" in args.dataset.lower():
         transform = transforms.Compose(
             [
-                transforms.Resize((16, 16)),
+                transforms.Resize((28, 28)),
                 transforms.ToTensor(),
             ]
         )
diff --git a/tests/test_dataloaders.py b/tests/test_dataloaders.py
@@ -1,8 +1,45 @@
-from CollaborativeCoding.dataloaders.usps_0_6 import USPSDataset0_6
+from pathlib import Path
+
+import numpy as np
+import pytest
+import torch
+from PIL import Image
+from torchvision import transforms
+
+from CollaborativeCoding.dataloaders import (
+    MNISTDataset0_3,
+    USPSDataset0_6,
+    USPSH5_Digit_7_9_Dataset,
+)
+from CollaborativeCoding.load_data import load_data
+
+
+@pytest.mark.parametrize(
+    "data_name, expected",
+    [
+        ("usps_0-6", USPSDataset0_6),
+        ("usps_7-9", USPSH5_Digit_7_9_Dataset),
+        ("mnist_0-3", MNISTDataset0_3),
+        # TODO: Add more datasets here
+    ],
+)
+def test_load_data(data_name, expected):
+    dataset = load_data(
+        data_name,
+        data_path=Path("data"),
+        download=True,
+        transform=transforms.ToTensor(),
+    )
+    assert isinstance(dataset, expected)
+    assert len(dataset) > 0
+    assert isinstance(dataset[0], tuple)
+    assert isinstance(dataset[0][0], torch.Tensor)
+    assert isinstance(
+        dataset[0][1], (int, torch.Tensor, np.ndarray)
+    )  # Should probably restrict this to only int or one-hot encoded tensor or array for consistency.
 
 
 def test_uspsdataset0_6():
-    from pathlib import Path
     from tempfile import TemporaryDirectory
 
     import h5py
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -34,6 +34,21 @@ def test_jan_model(image_shape, num_classes):
     assert y.shape == (n, num_classes), f"Shape: {y.shape}"
 
 
+@pytest.mark.parametrize(
+    "image_shape, num_classes",
+    [((3, 16, 16), 3), ((3, 16, 16), 7)],
+)
+def test_solveig_model(image_shape, num_classes):
+    n, c, h, w = 5, *image_shape
+
+    model = SolveigModel(image_shape, num_classes)
+
+    x = torch.randn(n, c, h, w)
+    y = model(x)
+
+    assert y.shape == (n, num_classes), f"Shape: {y.shape}"
+
+
 @pytest.mark.parametrize("image_shape", [(3, 28, 28)])
 def test_magnus_model(image_shape):
     import torch as th

Original file line number	Diff line number	Diff line change
`@@ -35,10 +35,10 @@ def main():`
`35`	`35`
`36`	`36`	`device = args.device`
`37`	`37`
`38`		`- if args.dataset.lower() in ["usps_0-6", "usps_7-9"]:`
	`38`	`+ if "usps" in args.dataset.lower():`
`39`	`39`	`transform = transforms.Compose(`
`40`	`40`	`[`
`41`		`- transforms.Resize((16, 16)),`
	`41`	`+ transforms.Resize((28, 28)),`
`42`	`42`	`transforms.ToTensor(),`
`43`	`43`	`]`
`44`	`44`	`)`