From 367b44c181e2012a3a90b1e421602ebfa5ebcb92 Mon Sep 17 00:00:00 2001 From: Jan Francu Date: Wed, 16 Jul 2025 10:25:03 +0200 Subject: [PATCH 1/4] Fixed label/dataset balancing logic. --- .../deepfake_detection_datamodule.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/training_and_evaluation/lightning_data_modules/deepfake_detection_datamodule.py b/training_and_evaluation/lightning_data_modules/deepfake_detection_datamodule.py index f72a946..02f349e 100644 --- a/training_and_evaluation/lightning_data_modules/deepfake_detection_datamodule.py +++ b/training_and_evaluation/lightning_data_modules/deepfake_detection_datamodule.py @@ -365,6 +365,9 @@ def setup_val(self): real_indices = [i for i, gen in enumerate(generator_labels) if gen == ""] fake_indices = [i for i, gen in enumerate(generator_labels) if gen != ""] + selected_real_images: List[int] = [] + selected_fake_images: List[int] = [] + # Balance images (stratified) if n_real > n_fake: n_to_select = n_fake @@ -382,6 +385,9 @@ def setup_val(self): stratify=[generator_labels[i] for i in fake_indices], random_state=self.data_management_seed + 1, ) + else: + selected_real_images = real_indices + selected_fake_images = fake_indices subset_indices = selected_real_images + selected_fake_images validation_dataset = validation_dataset.select(subset_indices) @@ -495,6 +501,9 @@ def setup_test(self): real_indices = [i for i, gen in enumerate(generator_labels) if gen == ""] fake_indices = [i for i, gen in enumerate(generator_labels) if gen != ""] + selected_real_images: List[int] = [] + selected_fake_images: List[int] = [] + # Balance images (stratified) if n_real > n_fake: n_to_select = n_fake @@ -512,6 +521,9 @@ def setup_test(self): stratify=[generator_labels[i] for i in fake_indices], random_state=self.data_management_seed + 1, ) + else: + selected_real_images = real_indices + selected_fake_images = fake_indices subset_indices = selected_real_images + selected_fake_images test_dataset = test_dataset.select(subset_indices) From a36e0e247d7ba920c1e9b1ae250585ec7ab90704 Mon Sep 17 00:00:00 2001 From: Jan Francu Date: Wed, 16 Jul 2025 10:25:46 +0200 Subject: [PATCH 2/4] Fixed requirements.txt --- requirements_train_and_evaluation.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_train_and_evaluation.txt b/requirements_train_and_evaluation.txt index be3737f..c8b38cc 100644 --- a/requirements_train_and_evaluation.txt +++ b/requirements_train_and_evaluation.txt @@ -9,7 +9,7 @@ ruamel.yaml img2dataset click torch==2.7.0 -torchvision=0.22.0 +torchvision==0.22.0 xformers tensorboard lightning[pytorch-extra] From ea6d2a0a0899fe4777b824a29265e30dc1ca5a3d Mon Sep 17 00:00:00 2001 From: Jan Francu Date: Wed, 16 Jul 2025 10:30:55 +0200 Subject: [PATCH 3/4] Check for existence of root directory of RAISE dataset. --- dataset_creation/real_builders/raise_image_builder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dataset_creation/real_builders/raise_image_builder.py b/dataset_creation/real_builders/raise_image_builder.py index 91b1003..8f120bd 100644 --- a/dataset_creation/real_builders/raise_image_builder.py +++ b/dataset_creation/real_builders/raise_image_builder.py @@ -1,6 +1,7 @@ from collections import OrderedDict from functools import lru_cache import json +import os from pathlib import Path from typing import Iterable, List, Optional import warnings @@ -26,6 +27,8 @@ def __init__( self.root_path = Path(root_path) self.convert_to_jpeg = convert_to_jpeg self.tmp_cache_dir = Path(tmp_cache_dir) if tmp_cache_dir is not None else None + if not self.root_path.exists(): + raise FileNotFoundError(f"Root path does not exist: {self.root_path}") def get_prefix(self) -> str: return "RAISE" From d210f425e49fc5a118650acc3df00304a67ea496 Mon Sep 17 00:00:00 2001 From: Jan Francu Date: Wed, 16 Jul 2025 10:32:20 +0200 Subject: [PATCH 4/4] Fixed openai_clip model factory registration. --- .../algorithms/models/openai_clip_image.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/training_and_evaluation/algorithms/models/openai_clip_image.py b/training_and_evaluation/algorithms/models/openai_clip_image.py index 2a34243..9c6b400 100644 --- a/training_and_evaluation/algorithms/models/openai_clip_image.py +++ b/training_and_evaluation/algorithms/models/openai_clip_image.py @@ -105,11 +105,14 @@ def forward_features(self, x: Tensor) -> Tensor: def forward_head(self, x: Tensor) -> Tensor: return self.fc(x) - -ModelFactoryRegistry().register_model_factory( - "openai_clip_image", make_openai_clip_image_model -) - +import clip +for arch in clip.available_models(): + ModelFactoryRegistry().register_model_factory( + f"{arch}_tune", make_openai_clip_image_model + ) + ModelFactoryRegistry().register_model_factory( + f"{arch}_probe", make_openai_clip_image_model + ) __all__ = [ "make_openai_clip_image_model", @@ -121,5 +124,5 @@ def forward_head(self, x: Tensor) -> Tensor: if __name__ == "__main__": model = make_openai_clip_image_model("RN50_tune", num_classes=1) print(model) - model = make_openai_clip_image_model("RN50_probe", num_classes=1) + model = make_openai_clip_image_model("ViT-L/14_probe", num_classes=1) print(model)