Merge branch 'main' into christian/update-dataloader-recall

c-salomonsen · c-salomonsen · commit 9b5f6cb26174 · 2025-02-20T13:21:23.000+01:00
diff --git a/CollaborativeCoding/dataloaders/mnist_0_3.py b/CollaborativeCoding/dataloaders/mnist_0_3.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import numpy as np
+from PIL import Image
 from torch.utils.data import Dataset
 
 from .datasources import MNIST_SOURCE
@@ -87,7 +88,8 @@ def __getitem__(self, index):
                 28, 28
             )  # Read image data
 
-        image = np.expand_dims(image, axis=0)  # Add channel dimension
+        # image = np.expand_dims(image, axis=0)  # Add channel dimension
+        image = Image.fromarray(image.astype(np.uint8))
 
         if self.transform:
             image = self.transform(image)
diff --git a/CollaborativeCoding/dataloaders/mnist_4_9.py b/CollaborativeCoding/dataloaders/mnist_4_9.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import numpy as np
+from PIL import Image
 from torch.utils.data import Dataset
 
 from .datasources import MNIST_SOURCE
@@ -28,11 +29,13 @@ def __init__(
         transform=None,
         nr_channels: int = 1,
     ):
-        super.__init__()
+        super().__init__()
         self.data_path = data_path
         self.mnist_path = self.data_path / "MNIST"
         self.samples = sample_ids
         self.train = train
+        self.transform = transform
+        self.num_classes = 6
 
         self.images_path = self.mnist_path / (
             MNIST_SOURCE["train_images"][1] if train else MNIST_SOURCE["test_images"][1]
@@ -46,7 +49,7 @@ def __len__(self):
 
     def __getitem__(self, idx):
         with open(self.labels_path, "rb") as labelfile:
-            label_pos = 8 + self.sample[idx]
+            label_pos = 8 + self.samples[idx]
             labelfile.seek(label_pos)
             label = int.from_bytes(labelfile.read(1), byteorder="big")
 
@@ -57,7 +60,8 @@ def __getitem__(self, idx):
                 28, 28
             )
 
-        image = np.expand_dims(image, axis=0)  # Channel
+        # image = np.expand_dims(image, axis=0)  # Channel
+        image = Image.fromarray(image.astype(np.uint8))
 
         if self.transform:
             image = self.transform(image)
diff --git a/CollaborativeCoding/load_data.py b/CollaborativeCoding/load_data.py
@@ -78,10 +78,6 @@ def load_data(dataset: str, *args, **kwargs) -> tuple:
     train_indices = np.arange(len(train_labels))
     test_indices = np.arange(len(test_labels))
 
-    print(train_indices.shape)
-    print(np.asarray(train_labels).shape)
-    print(labels.shape)
-
     # Filter the labels to only get indices of the wanted labels
     train_samples = train_indices[np.isin(train_labels, labels)]
     test_samples = test_indices[np.isin(test_labels, labels)]
diff --git a/tests/test_dataloaders.py b/tests/test_dataloaders.py
@@ -7,6 +7,7 @@
 
 from CollaborativeCoding.dataloaders import (
     MNISTDataset0_3,
+    MNISTDataset4_9,
     SVHNDataset,
     USPSDataset0_6,
     USPSH5_Digit_7_9_Dataset,
@@ -21,11 +22,12 @@
         ("usps_7-9", USPSH5_Digit_7_9_Dataset),
         ("mnist_0-3", MNISTDataset0_3),
         ("svhn", SVHNDataset),
-        # TODO: Add more datasets here
+        ("mnist_4-9", MNISTDataset4_9),
     ],
 )
 def test_load_data(data_name, expected):
-    dataset = load_data(
+    print(data_name)
+    dataset, _, _ = load_data(
         data_name,
         data_dir=Path("data"),
         transform=transforms.ToTensor(),
@@ -34,6 +36,4 @@ def test_load_data(data_name, expected):
     assert len(dataset) > 0
     assert isinstance(dataset[0], tuple)
     assert isinstance(dataset[0][0], torch.Tensor)
-    assert isinstance(
-        dataset[0][1], (int, torch.Tensor, np.ndarray)
-    )  # Should probably restrict this to only int or one-hot encoded tensor or array for consistency.
+    assert isinstance(dataset[0][1], int)