Merge pull request #69 from SFI-Visual-Intelligence/usps-inputshape

Seilmast · web-flow · commit 59e1d5a9cf3e · 2025-02-13T12:57:22.000+01:00
Usps inputshape, resolve #67 LGTM
diff --git a/main.py b/main.py
@@ -30,10 +30,12 @@ def main():
 
     device = args.device
 
-    if args.dataset.lower() in ["usps_0-6", "usps_7-9"]:
+
+    if "usps" in args.dataset.lower():
+
         transform = transforms.Compose(
             [
-                transforms.Resize((16, 16)),
+                transforms.Resize((28, 28)),
                 transforms.ToTensor(),
             ]
         )
@@ -45,6 +47,7 @@ def main():
         data_dir=args.datafolder,
         transform=transform,
         val_size=args.val_size,
+
     )
 
     train_metrics = MetricWrapper(
@@ -126,6 +129,7 @@ def main():
         project=args.run_name,
         tags=[args.modelname, args.dataset],
         config=args,
+
     )
     wandb.watch(model)
 
diff --git a/utils/arg_parser.py b/utils/arg_parser.py
@@ -33,6 +33,7 @@ def get_args():
         help="Whether model should be saved or not.",
     )
 
+
     # Data/Model specific values
     parser.add_argument(
         "--modelname",
@@ -82,6 +83,7 @@ def get_args():
         "--macro_averaging",
         action="store_true",
         help="If the flag is included, the metrics will be calculated using macro averaging.",
+
     )
 
     # Training specific values
diff --git a/utils/dataloaders/svhn.py b/utils/dataloaders/svhn.py
@@ -1,5 +1,6 @@
 import os
 
+
 import h5py
 import numpy as np
 from PIL import Image
@@ -29,6 +30,7 @@ def __init__(
             AssertionError: If the split is not 'train' or 'test'.
         """
         super().__init__()
+
         self.data_path = data_path
         self.split = "train" if train else "test"
 
@@ -55,6 +57,7 @@ def _download_data(self, path: str):
             path (str): The directory where the dataset will be downloaded.
         """
         print(f"Downloading SVHN data into {path}")
+
         SVHN(path, split=self.split, download=True)
         data = loadmat(os.path.join(path, f"{self.split}_32x32.mat"))
 
@@ -92,8 +95,8 @@ def __getitem__(self, index):
             img = Image.fromarray(h5f["images"][index])
 
         if self.nr_channels == 1:
-            img = img.convert("L")
 
+            img = img.convert("L")
         if self.transforms is not None:
             img = self.transforms(img)
 
diff --git a/utils/models/christian_model.py b/utils/models/christian_model.py
@@ -3,6 +3,18 @@
 
 
 class CNNBlock(nn.Module):
+    """
+    CNN block with Conv2d, MaxPool2d, and ReLU.
+
+    Args
+    ----
+
+    in_channels : int
+        Number of input channels.
+    out_channels : int
+        Number of output channels.
+    """
+
     def __init__(self, in_channels, out_channels):
         super().__init__()
 
@@ -22,6 +34,37 @@ def forward(self, x):
         return x
 
 
+def find_fc_input_shape(image_shape, *cnn_layers):
+    """
+    Find the shape of the input to the fully connected layer.
+
+    Code inspired by @Seilmast (https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam/issues/67#issuecomment-2651212254)
+
+    Args
+    ----
+    image_shape : tuple(int, int, int)
+        Shape of the input image (C, H, W).
+    cnn_layers : nn.Module
+        List of CNN layers.
+
+    Returns
+    -------
+    int
+        Number of elements in the input to the fully connected layer.
+    """
+
+    dummy_img = torch.randn(1, *image_shape)
+    with torch.no_grad():
+        x = cnn_layers[0](dummy_img)
+
+        for layer in cnn_layers[1:]:
+            x = layer(x)
+
+        x = x.view(x.size(0), -1)
+
+    return x.size(1)
+
+
 class ChristianModel(nn.Module):
     """Simple CNN model for image classification.
 
@@ -57,7 +100,9 @@ def __init__(self, image_shape, num_classes):
         self.cnn1 = CNNBlock(C, 50)
         self.cnn2 = CNNBlock(50, 100)
 
-        self.fc1 = nn.Linear(100 * 4 * 4, num_classes)
+        fc_input_shape = find_fc_input_shape(image_shape, self.cnn1, self.cnn2)
+
+        self.fc1 = nn.Linear(fc_input_shape, num_classes)
 
     def forward(self, x):
         x = self.cnn1(x)
@@ -70,9 +115,10 @@ def forward(self, x):
 
 
 if __name__ == "__main__":
-    model = ChristianModel(3, 7)
+    x = torch.randn(3, 3, 28, 28)
+
+    model = ChristianModel(x.shape[1:], 7)
 
-    x = torch.randn(3, 3, 16, 16)
     y = model(x)
 
     print(y)

Original file line number	Diff line number	Diff line change
`@@ -30,10 +30,12 @@ def main():`
`30`	`30`
`31`	`31`	`device = args.device`
`32`	`32`
`33`		`- if args.dataset.lower() in ["usps_0-6", "usps_7-9"]:`
	`33`	`+`
	`34`	`+ if "usps" in args.dataset.lower():`
	`35`	`+`
`34`	`36`	`transform = transforms.Compose(`
`35`	`37`	`[`
`36`		`- transforms.Resize((16, 16)),`
	`38`	`+ transforms.Resize((28, 28)),`
`37`	`39`	`transforms.ToTensor(),`
`38`	`40`	`]`
`39`	`41`	`)`
`@@ -45,6 +47,7 @@ def main():`
`45`	`47`	`data_dir=args.datafolder,`
`46`	`48`	`transform=transform,`
`47`	`49`	`val_size=args.val_size,`
	`50`	`+`
`48`	`51`	`)`
`49`	`52`
`50`	`53`	`train_metrics = MetricWrapper(`
`@@ -126,6 +129,7 @@ def main():`
`126`	`129`	`project=args.run_name,`
`127`	`130`	`tags=[args.modelname, args.dataset],`
`128`	`131`	`config=args,`
	`132`	`+`
`129`	`133`	`)`
`130`	`134`	`wandb.watch(model)`
`131`	`135`
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ def get_args():`
`33`	`33`	`help="Whether model should be saved or not.",`
`34`	`34`	`)`
`35`	`35`
	`36`	`+`
`36`	`37`	`# Data/Model specific values`
`37`	`38`	`parser.add_argument(`
`38`	`39`	`"--modelname",`
`@@ -82,6 +83,7 @@ def get_args():`
`82`	`83`	`"--macro_averaging",`
`83`	`84`	`action="store_true",`
`84`	`85`	`help="If the flag is included, the metrics will be calculated using macro averaging.",`
	`86`	`+`
`85`	`87`	`)`
`86`	`88`
`87`	`89`	`# Training specific values`