Merge branch 'main' into Jan-doc

hzavadil98 · hzavadil98 · commit 93c101350718 · 2025-02-26T13:13:15.000+01:00
diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml
@@ -0,0 +1,29 @@
+on:
+  push:
+    # Sequence of patterns matched against refs/tags
+    tags:
+      - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
+
+name: Create Release
+
+jobs:
+  build:
+    name: Create Release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@master
+      - name: Create Release
+        id: create_release
+        uses: actions/create-release@latest
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
+        with:
+          tag_name: ${{ github.ref }}
+          release_name: Release ${{ github.ref }}
+          body: |
+            Changes in this Release
+            - First Change
+            - Second Change
+          draft: false
+          prerelease: false
diff --git a/CollaborativeCoding/dataloaders/download.py b/CollaborativeCoding/dataloaders/download.py
@@ -102,11 +102,11 @@ def download_svhn(path, train: bool = True):
         train_data = parent_path / "train_32x32.mat"
         test_data = parent_path / "test_32x32.mat"
 
-        if not train_data.is_file():
+        if not train_data.exists():
             download_svhn(parent_path, train=True)
-        if not test_data.is_file():
+        if not test_data.exists():
             download_svhn(parent_path, train=False)
-        print(test_data)
+
         train_labels = loadmat(train_data)["y"]
         test_labels = loadmat(test_data)["y"]
 
diff --git a/CollaborativeCoding/dataloaders/svhn.py b/CollaborativeCoding/dataloaders/svhn.py
@@ -6,7 +6,6 @@
 from PIL import Image
 from scipy.io import loadmat
 from torch.utils.data import Dataset
-from torchvision.datasets import SVHN
 
 
 class SVHNDataset(Dataset):
@@ -31,7 +30,7 @@ def __init__(
         """
         super().__init__()
 
-        self.data_path = data_path
+        self.data_path = data_path / "SVHN"
         self.indexes = sample_ids
         self.split = "train" if train else "test"
 
@@ -41,7 +40,7 @@ def __init__(
         if not os.path.exists(
             os.path.join(self.data_path, f"svhn_{self.split}data.h5")
         ):
-            self._download_data(self.data_path)
+            self._create_h5py(self.data_path)
 
         assert os.path.exists(
             os.path.join(self.data_path, f"svhn_{self.split}data.h5")
@@ -53,15 +52,14 @@ def __init__(
 
         self.num_classes = len(np.unique(self.labels))
 
-    def _download_data(self, path: str):
+    def _create_h5py(self, path: str):
         """
         Downloads the SVHN dataset to the specified directory.
         Args:
             path (str): The directory where the dataset will be downloaded.
         """
         print(f"Downloading SVHN data into {path}")
 
-        SVHN(path, split=self.split, download=True)
         data = loadmat(os.path.join(path, f"{self.split}_32x32.mat"))
 
         images, labels = data["X"], data["y"]
diff --git a/CollaborativeCoding/dataloaders/uspsh5_7_9.py b/CollaborativeCoding/dataloaders/uspsh5_7_9.py
@@ -102,4 +102,3 @@ def __getitem__(self, id):
             image = self.transform(image)
 
         return image, label
-
diff --git a/CollaborativeCoding/metrics/F1.py b/CollaborativeCoding/metrics/F1.py
@@ -23,7 +23,6 @@ def __init__(self, num_classes, macro_averaging=False):
         self.y_true = []
         self.y_pred = []
 
-
     def forward(self, target, preds):
         """
         Stores predictions and targets for computing the F1 score.
@@ -57,7 +56,11 @@ def compute_f1(self):
         y_true = torch.cat(self.y_true)
         y_pred = torch.cat(self.y_pred)
 
-        return self._macro_F1(y_true, y_pred) if self.macro_averaging else self._micro_F1(y_true, y_pred)
+        return (
+            self._macro_F1(y_true, y_pred)
+            if self.macro_averaging
+            else self._micro_F1(y_true, y_pred)
+        )
 
     def _micro_F1(self, target, preds):
         """Computes Micro F1 Score (global TP, FP, FN)."""
@@ -111,9 +114,13 @@ def __returnmetric__(self):
         y_true = torch.cat([t.unsqueeze(0) if t.dim() == 0 else t for t in self.y_true])
         y_pred = torch.cat([t.unsqueeze(0) if t.dim() == 0 else t for t in self.y_pred])
 
-        return self._macro_F1(y_true, y_pred) if self.macro_averaging else self._micro_F1(y_true, y_pred)
+        return (
+            self._macro_F1(y_true, y_pred)
+            if self.macro_averaging
+            else self._micro_F1(y_true, y_pred)
+        )
 
     def __reset__(self):
         """Resets stored predictions and targets."""
         self.y_true = []
-        self.y_pred = []
+        self.y_pred = []
diff --git a/README.md b/README.md
@@ -3,6 +3,26 @@
 # Collaborative-Coding-Exam
 Repository for final evaluation in the FYS-8805 Reproducible Research and Collaborative coding course
 
+## Installation
+
+Install from:
+
+```sh
+pip install git+https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam.git
+```
+
+or using [uv](https://docs.astral.sh/uv/):
+
+```sh
+uv add git+https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam.git
+```
+
+To verify:
+
+```sh
+python -c "import CollaborativeCoding"
+```
+
 ## Usage
 
 TODO: Fill in
diff --git a/doc/Magnus_page.md b/doc/Magnus_page.md
@@ -26,24 +26,19 @@ Each input is flattened over the channel, height and width channels. Then they a
 
 
 ## Entropy Metric In-Depth
-
-The EntropyPrediction class' main job is to take some inputs and return the Shannon Entropy metric of those inputs. The class has four methods with the following jobs: 
+The EntropyPrediction class' main job is to take some inputs from the MetricWrapper class and store the batchwise Shannon Entropy metric of those inputs. The class has four methods with the following jobs: 
 * __init__ : Initialize the class.
 * __call__ : Main method which is used to calculate and store the batchwise shannon entropy.
 * __returnmetric__ : Returns the collected metric. 
 * __reset__ : Removes all the stored values up until that point. Readies the instance for storing values from a new epoch. 
 
-The class is initialized with a single parameter called "averages". This is inspired from other PyTorch and NumPy implementations and controlls how values from different batches or within batches will be combined. The __init__ method checks the value of this argument with an assertion, which must be one of three string. We only allow "mean", "sum" and "none" as methods of combining the different entropy values. We'll come back to the specifics here.  
-Furthermore, this method will also store the different Shannon Entropy values as we pass values into the __call__ method. 
+The __init__ method has two arguments, both present for compatability issues. However, the num_classes argument is used as a check in the __call__ method to assert the input is of correctly assumed size. 
+
+In __call__ we get both true labels and model logit scores for each sample in the batch as input. We're calculating Shannon Entropy, not KL-divergence, so the true labels aren't actually needed. 
+With permission I've used the scipy implementation to calculate entropy here. We apply a softmax over the logit values, then calculate the Shannon Entropy, and make sure to remove any Inf values which might arise from a perfect guess/distribution.
+
 
-In __call__ we get both true labels and model logit scores for each sample in the batch as input. We're calculating Shannon Entropy, not KL-divergence, so the true labels aren't needed. 
-With permission I've used the scipy implementation to calculate entropy here. We apply a softmax over the logit values, then calculate the Shannon Entropy, and make sure to remove any NaN or Inf values which might arise from a perfect guess/distribution.
 
-Next we have the __returnmetric__ method which is used to retrive the stored metric. Here the averages argument comes into play. 
-Depending on what has been chosen as the averaging metric when initializing the class, one of the following operations will be applied to the stored values:
-* Mean: Calculate the mean of the stored entropy values.
-* Sum: Sum the stored entropy values.
-* None: Do nothing with the stored entropy values. 
-Then the value(s) are returned. 
+Next we have the __returnmetric__ method which is used to retrive the stored metric. This returns the mean over all stored values. Effectively, this will return the average Shannon Entropy of the dataset. 
 
 Lastly we have the __reset__ method which simply emptied the variable which stores the entropy values to prepare it for the next epoch. 
diff --git a/main.py b/main.py
@@ -1,11 +1,11 @@
 import numpy as np
 import torch as th
 import torch.nn as nn
+import wandb
 from torch.utils.data import DataLoader
 from torchvision import transforms
 from tqdm import tqdm
 
-import wandb
 from CollaborativeCoding import (
     MetricWrapper,
     createfolders,
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "collaborative-coding-exam"
 version = "0.1.0"
 description = "Exam project in the collaborative coding course."
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.11.5"
 dependencies = [
     "black>=25.1.0",
     "h5py>=3.12.1",

Original file line number	Diff line number	Diff line change
`@@ -102,4 +102,3 @@ def __getitem__(self, id):`
`102`	`102`	`image = self.transform(image)`
`103`	`103`
`104`	`104`	`return image, label`
`105`		`-`