Fixed several tests after chaning the code

Seilmast · Seilmast · commit d7e83d4e040b · 2025-02-11T14:32:25.000+01:00
diff --git a/tests/test_dataloaders.py b/tests/test_dataloaders.py
@@ -44,12 +44,12 @@ def test_svhn_dataset():
         trans = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor()])
 
         dataset = SVHNDataset(
-            tempdir, train=True, transform=trans, download=True, nr_channels=1
+            tempdir, train=False, transform=trans, download=True, nr_channels=1
         )
 
         assert dataset.__len__() != 0
-        assert os.path.exists(os.path.join(tempdir, "train_32x32.mat"))
+        assert os.path.exists(os.path.join(tempdir, "test_32x32.mat")), f'No such file as test_32x32.mat. Try running download=True'
+        assert os.path.exists(os.path.join(tempdir, "svhn_testdata.h5")), f'No such file as svhn_testdata.h5. Try running download=True'
 
         img, label = dataset.__getitem__(0)
         assert len(img.size()) == 3 and img.size() == (1, 28, 28) and img.size(0) == 1
-        assert len(label.size()) == 1
diff --git a/utils/dataloaders/svhn.py b/utils/dataloaders/svhn.py
@@ -1,6 +1,8 @@
 import os
 
+import h5py 
 import numpy as np
+from PIL import Image
 from scipy.io import loadmat
 from torch.utils.data import Dataset
 from torchvision.datasets import SVHN
@@ -27,22 +29,23 @@ def __init__(
             AssertionError: If the split is not 'train' or 'test'.
         """
         super().__init__()
+        self.data_path = data_path
         self.split = "train" if train else "test"
 
         if download:
             self._download_data(data_path)
 
-        data = loadmat(os.path.join(data_path, f"{self.split}_32x32.mat"))
-
-        # Reform images to the form N x H x W x C
-        self.images = data["X"].transpose(3, 1, 0, 2)
-        self.labels = data["y"].flatten()
-
-        self.labels[self.labels == 10] = 0
-
         self.nr_channels = nr_channels
         self.transforms = transform
+        
+        
+        assert os.path.exists(os.path.join(self.data_path, f'svhn_{self.split}data.h5')), f'File svhn_{self.split}data.h5 does not exists. Run download=True'
+        with h5py.File(os.path.join(self.data_path, f'svhn_{self.split}data.h5'), 'r') as h5f:
+            self.labels = h5f['labels'][:]
+        
         self.num_classes = len(np.unique(self.labels))
+        
+        
 
     def _download_data(self, path: str):
         """
@@ -52,7 +55,17 @@ def _download_data(self, path: str):
         """
         print(f"Downloading SVHN data into {path}")
         SVHN(path, split=self.split, download=True)
+        data = loadmat(os.path.join(path, f'{self.split}_32x32.mat'))
 
+        images, labels = data['X'], data['y']
+        images = images.transpose(3,1,0,2)
+        labels[labels == 10] = 0
+        labels = labels.flatten()
+        
+        with h5py.File(os.path.join(self.data_path, f'svhn_{self.split}data.h5'), 'w') as h5f:
+            h5f.create_dataset('images', data=images)
+            h5f.create_dataset('labels', data=labels)
+ 
     def __len__(self):
         """
         Returns the number of samples in the dataset.
@@ -69,11 +82,15 @@ def __getitem__(self, index):
         Returns:
             tuple: A tuple containing the image and its corresponding label.
         """
-        img, lab = self.images[index], self.labels[index]
-
+        lab = self.labels[index]
+        with h5py.File(os.path.join(self.data_path, f'svhn_{self.split}data.h5'), 'r') as h5f:
+            img = Image.fromarray(h5f['images'][index])
+            
         if self.nr_channels == 1:
-            img = np.mean(img, axis=2, keepdims=True)
+            img = img.convert('L')
+            
         if self.transforms is not None:
             img = self.transforms(img)
 
         return img, lab
+
diff --git a/utils/metrics/EntropyPred.py b/utils/metrics/EntropyPred.py
@@ -63,15 +63,3 @@ def __returnmetric__(self):
     def __reset__(self):
         self.stored_entropy_values = []
 
-if __name__ == '__main__':
-     
-    pred_logits = th.rand(6, 5)
-    true_lab = th.rand(6, 5)
-    
-    metric = EntropyPrediction(averages="mean")
-    metric2 = EntropyPrediction(averages="sum")
-    
-    # Test for averaging metric consistency
-    metric(true_lab, pred_logits)
-    metric2(true_lab, pred_logits)
-    assert (th.abs(th.sum(6 * metric.__returnmetric__() - metric2.__returnmetric__())) < 1e-5)