EpiGenomicsCode
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 30 additions & 2 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 30 additions & 2 deletions
diff --git a/‎manuscripts/Posion25/1_trainModel.py‎
Lines changed: 28 additions & 0 deletions b/‎manuscripts/Posion25/1_trainModel.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎manuscripts/Posion25/2_attackModel.py‎
Lines changed: 52 additions & 0 deletions b/‎manuscripts/Posion25/2_attackModel.py‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎manuscripts/Posion25/3_stats.py‎
Lines changed: 144 additions & 0 deletions b/‎manuscripts/Posion25/3_stats.py‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎manuscripts/Posion25/analysis.py‎
Lines changed: 0 additions & 126 deletions b/‎manuscripts/Posion25/analysis.py‎
Lines changed: 0 additions & 126 deletions
@@ -7,6 +7,34 @@
 		"dockerfile": "Dockerfile"
 	},
 	"features": {
-		"ghcr.io/rocker-org/devcontainer-features/miniforge:2": {}
-	}
+		"ghcr.io/devcontainers/features/anaconda:1": {
+			"version": "latest"
+		},
+		"ghcr.io/devcontainers/features/nvidia-cuda:2": {
+			"installCudnn": true,
+			"installCudnnDev": true,
+			"installNvtx": true,
+			"installToolkit": true,
+			"cudaVersion": "11.8",
+			"cudnnVersion": "automatic"
+		},
+		"ghcr.io/raucha/devcontainer-features/pytorch:1": {}
+	},
+	"runArgs": [
+		"--gpus=all"
+	]
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "python --version",
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "root"
 }
@@ -0,0 +1,28 @@
+import argparse
+import os
+from train import load_data, train_model, evaluate_model, train_model_and_save
+
+
+def main():
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+
+    # Data and model type arguments
+    parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset to use')
+    parser.add_argument('--model_type', type=str, choices=['normal', 'complex', 'complex_augmented'], required=True, help='Model type to use')
+
+    # Training information arguments
+    parser.add_argument('--batch_size', type=int, default=32, help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=10, help='Number of epochs for training')
+
+    # Folder saving argument
+    parser.add_argument('--save_dir', type=str, default='results', help='Directory to save model and results')
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Train the model and evaluate it
+    train_model_and_save(args)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,52 @@
+import argparse
+import os
+import pickle
+from taint import adversarial_attack_blackbox
+from analysis import *
+from train import train_model_and_save 
+
+def attack_model(args, model, test_ds, save_dir):
+    # Path to the pickle file that stores the attacker object
+    pickle_path = os.path.join(save_dir, 'attacker.pkl')
+    
+    # Check if the adversarial attack has already been performed (if pickle exists)
+    if os.path.exists(pickle_path):
+        # If pickle exists, load the attacker from the file
+        with open(pickle_path, 'rb') as f:
+            attacker = pickle.load(f)
+        print(f"Loaded attacker from {pickle_path}")
+    else:
+        # If pickle does not exist, run the attack and save the attacker
+        print("Running adversarial attack...")
+        adversarial_attack_blackbox(
+            model, test_ds, image_index=0, output_dir=save_dir,
+            num_iterations=args.iterations, num_particles=args.particles
+        )
+        
+
+def main():
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+
+    # Data and model type arguments (to align with the ones used in the training script)
+    parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset to use')
+    parser.add_argument('--model_type', type=str, choices=['normal', 'complex', 'complex_augmented'], required=True, help='Model type to use')
+
+    # Attack parameters
+    parser.add_argument('--iterations', type=int, default=10, help='Number of iterations for attack')
+    parser.add_argument('--particles', type=int, default=100, help='Number of particles for attack')
+
+    # Folder saving argument
+    parser.add_argument('--save_dir', type=str, default='results', help='Directory to save model and results')
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # First, train the model and get the necessary details for attack
+    model, test_ds, save_dir, model_path = train_model_and_save(args)
+
+    # Perform the adversarial attack
+    attack_model(args, model, test_ds, save_dir)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,144 @@
+import argparse
+import os
+import numpy as np
+import pickle
+import tensorflow as tf
+import json
+from tqdm import tqdm
+from taint import adversarial_attack_blackbox
+from train import train_model_and_save
+from analysis import get_softmax_stats, save_softmax_stats
+
+
+def collect_statistics(model, dataset, model_type, attack_iterations=10, attack_particles=100, image_index=0, output_dir='results'):
+    """
+    Run adversarial attack for the given model and dataset combination and collect statistics.
+    
+    Args:
+    - model: The model to attack.
+    - dataset: The test dataset.
+    - model_type: The model type (for logging).
+    - attack_iterations: Number of iterations for attack.
+    - attack_particles: Number of particles for attack.
+    - image_index: Index of the image to perform the attack on.
+    - output_dir: Directory to save results.
+    
+    Returns:
+    - statistics: A dictionary with softmax output, attack success, and other relevant data.
+    """
+    pickle_path = os.path.join(output_dir, f"{model_type}_attacker.pkl")
+
+    dataset_list = list(dataset.as_numpy_iterator())
+    all_images, all_labels = zip(*dataset_list)
+    all_images = np.concatenate(all_images, axis=0)
+    all_labels = np.concatenate(all_labels, axis=0)
+
+    if image_index < 0 or image_index >= len(all_images):
+        raise ValueError(f"Image index {image_index} out of range")
+
+    single_input = all_images[image_index]
+    single_target = np.argmax(all_labels[image_index])
+    target_class = (single_target + 1) % 10  # Attack a different class
+
+    # Perform the attack (check if pickle exists first)
+    if os.path.exists(pickle_path):
+        with open(pickle_path, 'rb') as f:
+            attacker = pickle.load(f)
+        print(f"Loaded attacker from {pickle_path}")
+    else:
+        adversarial_attack_blackbox(
+            model, dataset, image_index=image_index, output_dir=output_dir,
+            num_iterations=attack_iterations, num_particles=attack_particles
+        )
+        with open(pickle_path, 'wb') as f:
+            pickle.dump(attacker, f)
+        print(f"Saved attacker to {pickle_path}")
+
+    # Analyze the attack results
+    softmax_output, max_val, max_class = get_softmax_stats(model, single_input)
+    attack_success = max_class != target_class
+
+    stats = {
+        "model_type": model_type,
+        "target_class": target_class,
+        "attack_success": attack_success,
+        "softmax_output": softmax_output.tolist(),
+        "max_confidence": max_val,
+        "max_class": max_class,
+    }
+
+    # Save softmax statistics for this model and image
+    save_softmax_stats(os.path.join(output_dir, f"{model_type}_softmax_stats.tsv"), softmax_output, max_class, max_val, target_class)
+
+    return stats
+
+
+def get_model_types_for_dataset(dataset):
+    """
+    Dynamically search for model types for a given dataset.
+    
+    Args:
+    - dataset: The dataset name, e.g., 'MNIST' or 'AudioMNIST'.
+    
+    Returns:
+    - model_types: List of available model types for the dataset.
+    """
+    model_types = ['normal', 'complex', 'complex_augmented']  # Can be extended if needed
+
+    return model_types
+
+
+def run_statistics(args):
+    # Define output directory to save results
+    results_dir = os.path.join(args.save_dir, f"{args.data}_stats")
+    os.makedirs(results_dir, exist_ok=True)
+
+    # Store statistics for all combinations of dataset and model types
+    all_stats = []
+
+    # Get all possible model types for the dataset
+    model_types = get_model_types_for_dataset(args.data)
+
+    # Iterate through all pairs of model types
+    for model_type in model_types:
+        print(f"Training model: {model_type}...")
+        model, test_ds, _, model_path = train_model_and_save(args)  # Train the model for the current type
+
+        # Run adversarial attack and collect stats for each combination of different model types
+        for other_model_type in model_types:
+            if model_type != other_model_type:  # Skip 1-1 combinations
+                print(f"Attacking {other_model_type} model with {model_type} dataset...")
+                stats = collect_statistics(model, test_ds, other_model_type, attack_iterations=args.iterations, attack_particles=args.particles, output_dir=results_dir)
+                all_stats.append(stats)
+
+    # Save the collected statistics as a JSON file for later analysis
+    stats_file = os.path.join(results_dir, f"{args.data}_attack_stats.json")
+    with open(stats_file, 'w') as f:
+        json.dump(all_stats, f, indent=4)
+
+    print(f"Statistics saved to {stats_file}")
+
+
+def main():
+    # Command-line arguments
+    parser = argparse.ArgumentParser()
+
+    # Data and model type arguments
+    parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset to use')
+
+    # Attack parameters
+    parser.add_argument('--iterations', type=int, default=10, help='Number of iterations for attack')
+    parser.add_argument('--particles', type=int, default=100, help='Number of particles for attack')
+
+    # Folder saving argument
+    parser.add_argument('--save_dir', type=str, default='results', help='Directory to save model and results')
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Run the statistics collection
+    run_statistics(args)
+
+
+if __name__ == '__main__':
+    main()
@@ -124,129 +124,3 @@ def save_softmax_stats(path, softmax_output, max_class, max_val, target):
             f.write(f"{i}\t{val}\n")
         f.write(f"\nBest Class\t{max_class}\nMax Confidence\t{max_val}\nTarget Class\t{target}\n")
 
-
-def best_analysis(attacker, original_data, target):
-    adv = attacker.global_best_position.numpy()
-    save_dir = attacker.save_dir
-    ensure_dir(save_dir)
-
-    # save the original data
-    save_array_csv(os.path.join(save_dir, "original_data.csv"), original_data)
-    save_ndarray_visualization(os.path.join(save_dir, "original_data.png"), original_data)
-    
-    # Save best particle
-    save_array_csv(os.path.join(save_dir, "best_particle.csv"), adv)
-    save_ndarray_visualization(os.path.join(save_dir, "best_particle.png"), adv)
-
-    # Save difference
-    diff = original_data - adv
-    save_array_csv(os.path.join(save_dir, "attack_vector_best_particle.csv"), diff)
-    save_ndarray_visualization(
-        os.path.join(save_dir, "attack_vector_best_particle.png"),
-        diff, mode="auto", cmap="seismic", vmin=-1, vmax=1
-    )
-
-    # Save stats
-    softmax_output, max_val, max_class = get_softmax_stats(attacker.model, adv)
-    save_softmax_stats(os.path.join(save_dir, "best_particle_stats.tsv"), softmax_output, max_class, max_val, target)
-
-
-def denoise_analysis(attacker, original_data, denoised_data, target):
-    save_dir = attacker.save_dir
-    ensure_dir(save_dir)
-
-    save_array_csv(os.path.join(save_dir, "best_particle-clean.csv"), denoised_data)
-    save_ndarray_visualization(os.path.join(save_dir, "best_particle-clean.png"), denoised_data)
-
-    diff = original_data - denoised_data
-    save_array_csv(os.path.join(save_dir, "attack_vector_best_particle-clean.csv"), diff)
-    save_ndarray_visualization(
-        os.path.join(save_dir, "attack_vector_best_particle-clean.png"),
-        diff, mode="auto", cmap="seismic", vmin=-1, vmax=1
-    )
-
-    softmax_output, max_val, max_class = get_softmax_stats(attacker.model, denoised_data)
-    save_softmax_stats(os.path.join(save_dir, "best_particle-clean_stats.tsv"), softmax_output, max_class, max_val, target)
-
-
-def reduce_excess_perturbations(attacker, original_data, adv_data, target_label):
-    """
-    Reduce unnecessary perturbations in adversarial data while maintaining misclassification.
-    Works for data of any shape.
-    """
-    original_data = np.squeeze(original_data)
-    adv_data = np.squeeze(adv_data)
-
-    if original_data.shape != adv_data.shape:
-        raise ValueError("Original and adversarial data must have the same shape after squeezing.")
-
-    adv_data = adv_data.copy()
-    changed = True
-
-    # Wrap the iteration with tqdm to monitor progress
-    while changed:
-        changed = False
-        indices = list(np.ndindex(original_data.shape))
-        for idx in tqdm(indices, desc="Reducing perturbations"):
-            if np.isclose(original_data[idx], adv_data[idx]):
-                continue
-
-            original_val = original_data[idx]
-            current_val = adv_data[idx]
-
-            # Try reverting completely
-            adv_data[idx] = original_val
-            pred = predict_class(attacker.model, adv_data)
-
-            if pred != target_label:
-                # Try partial revert
-                adv_data[idx] = current_val + 0.5 * (original_val - current_val)
-                pred = predict_class(attacker.model, adv_data)
-                if pred != target_label:
-                    adv_data[idx] = current_val
-                else:
-                    changed = True
-            else:
-                changed = True
-
-    return adv_data
-
-
-def full_analysis(attacker, input_data, target):
-    """
-    Save full analysis of all particles' histories and confidences.
-    """
-    analysis = {
-        "original_misclassification_input": input_data.tolist(),
-        "original_misclassification_target": int(target),
-        "particles": []
-    }
-
-    for i, particle in tqdm(enumerate(attacker.particles), total=len(attacker.particles), desc="Full Analysis"):
-        pdata = {
-            "particle_index": i,
-            "positions": [],
-            "confidence_values": [],
-            "max_output_values": [],
-            "max_output_classes": [],
-            "differences_from_original": []
-        }
-
-        for pos in tqdm(particle.history, desc=f"Particle {i} history", leave=False):
-            pos_np = pos.numpy() if isinstance(pos, tf.Tensor) else np.array(pos)
-            softmax, max_val, max_class = get_softmax_stats(attacker.model, pos_np)
-            diff = float(np.linalg.norm(pos_np - input_data))
-
-            pdata["positions"].append(pos_np.tolist())
-            pdata["confidence_values"].append(softmax.tolist())
-            pdata["max_output_values"].append(max_val)
-            pdata["max_output_classes"].append(max_class)
-            pdata["differences_from_original"].append(diff)
-
-        analysis["particles"].append(pdata)
-
-    path = os.path.join(attacker.save_dir, "attack_analysis.json")
-    with open(path, "w") as f:
-        json.dump(analysis, f, indent=4)
-
-    print(f"Full analysis saved to {path}")