ruffed, isorted

hzavadil98 · hzavadil98 · commit 601caca8e6fb · 2025-02-10T13:46:24.000+01:00
diff --git a/utils/dataloaders/datasources.py b/utils/dataloaders/datasources.py
@@ -19,20 +19,24 @@
 }
 
 MNIST_SOURCE = {
-    "train_images": ["https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz", 
-                     "train-images-idx3-ubyte", 
-                     None
+    "train_images": [
+        "https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz",
+        "train-images-idx3-ubyte",
+        None,
     ],
-    "train_labels": ["https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz",
-                    "train-labels-idx1-ubyte",
-                    None
+    "train_labels": [
+        "https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz",
+        "train-labels-idx1-ubyte",
+        None,
     ],
-    "test_images": ["https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz",
-                    "t10k-images-idx3-ubyte",
-                    None
+    "test_images": [
+        "https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz",
+        "t10k-images-idx3-ubyte",
+        None,
     ],
-    "test_labels": ["https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz",
-                    "t10k-labels-idx1-ubyte",
-                    None
+    "test_labels": [
+        "https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz",
+        "t10k-labels-idx1-ubyte",
+        None,
     ],
 }
diff --git a/utils/dataloaders/download.py b/utils/dataloaders/download.py
@@ -1,15 +1,15 @@
 import bz2
+import gzip
 import hashlib
 import os
-import gzip
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from urllib.request import urlretrieve
 
 import h5py as h5
 import numpy as np
 
-from .datasources import USPS_SOURCE, MNIST_SOURCE
+from .datasources import MNIST_SOURCE, USPS_SOURCE
 
 
 class Downloader:
@@ -52,35 +52,36 @@ def _chech_is_downloaded(path: Path) -> bool:
             else:
                 path.mkdir(parents=True, exist_ok=True)
                 return False
-            
+
         def _download_data(path: Path) -> None:
             urls = {key: MNIST_SOURCE[key][0] for key in MNIST_SOURCE.keys()}
 
             for name, url in urls.items():
                 file_path = os.path.join(path, url.split("/")[-1])
-                if not os.path.exists(file_path.replace(".gz", "")):  # Avoid re-downloading
+                if not os.path.exists(
+                    file_path.replace(".gz", "")
+                ):  # Avoid re-downloading
                     urlretrieve(url, file_path)
                     with gzip.open(file_path, "rb") as f_in:
                         with open(file_path.replace(".gz", ""), "wb") as f_out:
                             f_out.write(f_in.read())
                     os.remove(file_path)  # Remove compressed file
-                    
+
         def _get_labels(path: Path) -> np.ndarray:
             with open(path, "rb") as f:
                 data = np.frombuffer(f.read(), dtype=np.uint8, offset=8)
             return data
-                    
+
         if not _chech_is_downloaded(data_dir):
             _download_data(data_dir)
-            
+
         train_labels_path = data_dir / "MNIST" / MNIST_SOURCE["train_labels"][1]
         test_labels_path = data_dir / "MNIST" / MNIST_SOURCE["test_labels"][1]
-        
+
         train_labels = _get_labels(train_labels_path)
         test_labels = _get_labels(test_labels_path)
-        
+
         return train_labels, test_labels
-        
 
     def svhn(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]:
         raise NotImplementedError("SVHN download not implemented yet")
diff --git a/utils/dataloaders/mnist_0_3.py b/utils/dataloaders/mnist_0_3.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from torch.utils.data import Dataset
+
 from .datasources import MNIST_SOURCE
 
 
@@ -62,11 +63,15 @@ def __init__(
         self.transform = transform
         self.num_classes = 4
 
-        self.images_path = self.mnist_path / (MNIST_SOURCE["train_images"][1] if train else MNIST_SOURCE["test_images"][1])
-        self.labels_path = self.mnist_path / (MNIST_SOURCE["train_labels"][1] if train else MNIST_SOURCE["test_labels"][1])
+        self.images_path = self.mnist_path / (
+            MNIST_SOURCE["train_images"][1] if train else MNIST_SOURCE["test_images"][1]
+        )
+        self.labels_path = self.mnist_path / (
+            MNIST_SOURCE["train_labels"][1] if train else MNIST_SOURCE["test_labels"][1]
+        )
 
         self.length = len(self.idx)
-        
+
     def __len__(self):
         return self.length