EpiGenomicsCode
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 8 additions & 14 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 8 additions & 14 deletions
diff --git a/‎manuscripts/Posion25/1_trainModel.py‎ ‎manuscripts/Posion25/0_trainModel.py‎manuscripts/Posion25/1_trainModel.py renamed to manuscripts/Posion25/0_trainModel.py b/‎manuscripts/Posion25/1_trainModel.py‎ ‎manuscripts/Posion25/0_trainModel.py‎manuscripts/Posion25/1_trainModel.py renamed to manuscripts/Posion25/0_trainModel.py
diff --git a/‎manuscripts/Posion25/00_train_models.sh‎ ‎manuscripts/Posion25/0_train_models.sh‎manuscripts/Posion25/00_train_models.sh renamed to manuscripts/Posion25/0_train_models.sh
Lines changed: 4 additions & 2 deletions b/‎manuscripts/Posion25/00_train_models.sh‎ ‎manuscripts/Posion25/0_train_models.sh‎manuscripts/Posion25/00_train_models.sh renamed to manuscripts/Posion25/0_train_models.sh
Lines changed: 4 additions & 2 deletions
diff --git a/‎manuscripts/Posion25/1_dataset.sh‎
Lines changed: 28 additions & 0 deletions b/‎manuscripts/Posion25/1_dataset.sh‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎manuscripts/Posion25/1_dataset_label_tool.py‎
Lines changed: 111 additions & 0 deletions b/‎manuscripts/Posion25/1_dataset_label_tool.py‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎manuscripts/Posion25/2_attackModel.py‎
Lines changed: 53 additions & 84 deletions b/‎manuscripts/Posion25/2_attackModel.py‎
Lines changed: 53 additions & 84 deletions
@@ -7,22 +7,16 @@
 		"dockerfile": "Dockerfile"
 	},
 	"features": {
-		"ghcr.io/devcontainers/features/anaconda:1": {
-			"version": "latest"
-		},
-		"ghcr.io/devcontainers/features/nvidia-cuda:2": {
-			"installCudnn": true,
-			"installCudnnDev": true,
-			"installNvtx": true,
-			"installToolkit": true,
-			"cudaVersion": "11.8",
-			"cudnnVersion": "automatic"
-		},
-		"ghcr.io/raucha/devcontainer-features/pytorch:1": {}
+		"ghcr.io/devcontainers/features/anaconda:1": {},
+		"ghcr.io/devcontainers/features/nvidia-cuda:2": {},
+		"ghcr.io/rocker-org/devcontainer-features/miniforge:2": {}
 	},
 	"runArgs": [
-		"--gpus=all"
-	]
+		"--gpus", "all"
+	],
+	// allow gpu
+
+
 	// Features to add to the dev container. More info: https://containers.dev/features.
 	// "features": {},
 
 
@@ -18,11 +18,13 @@ for model_type in "${MODEL_TYPES[@]}"; do
     if [[ "$model_type" == "complex_adversarial" ]]; then
         for adv in "${ADVERSARIAL_MODES[@]:1}"; do
             echo "Training $DATASET with model_type=$model_type and adversarial=$adv"
-            python main.py --data $DATASET --model_type $model_type --adversarial $adv --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
+            echo "Command: python 0_trainModel.py --data $DATASET --model_type $model_type --adversarial $adv --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR"
+            # python 0_trainModel.py --data $DATASET --model_type $model_type --adversarial $adv --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
         done
     else
         echo "Training $DATASET with model_type=$model_type (no adversarial)"
-        python main.py --data $DATASET --model_type $model_type --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
+        echo "Command: python 0_trainModel.py --data $DATASET --model_type $model_type --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR"
+        # python 0_trainModel.py --data $DATASET --model_type $model_type --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
     fi
 done
 
 
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Configuration
+SEED=42
+BATCH_SIZE=32
+DATASETS=("mnist")
+SPLITS=("train" "test")
+
+# Step 1: Generate label TSVs
+echo "Generating label TSVs..."
+for dataset in "${DATASETS[@]}"; do
+    echo "Processing $dataset..."
+    python 1_dataset_label_tool.py --generate --dataset $dataset --batch_size $BATCH_SIZE
+done
+
+# Step 2: Generate false labels
+echo "Generating false labels..."
+for dataset in "${DATASETS[@]}"; do
+    for split in "${SPLITS[@]}"; do
+        input_file="${dataset^^}_${split}_labels.tsv"         # e.g., MNIST_train_labels.tsv
+        output_file="${dataset^^}_${split}_false_labels.tsv"  # e.g., MNIST_train_false_labels.tsv
+
+        echo "Generating false labels for $input_file..."
+        python 1_dataset_label_tool.py --input "$input_file" --output "$output_file" --seed $SEED
+    done
+done
+
+echo "All label and false label files generated."
@@ -0,0 +1,111 @@
+import argparse
+import os
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.datasets import cifar10, mnist
+from tensorflow.keras.utils import to_categorical
+
+# -------------------- Utility Functions --------------------
+
+def generate_false_labels(df, seed=None):
+    if seed is not None:
+        np.random.seed(seed)
+
+    num_classes = 10
+    false_labels = []
+
+    for true_label in df['Label']:
+        choices = [i for i in range(num_classes) if i != true_label]
+        false_label = np.random.choice(choices)
+        false_labels.append(false_label)
+
+    df_out = pd.DataFrame({
+        'index': df['Index'],
+        'trueLabel': df['Label'],
+        'falseLabel': false_labels
+    })
+    return df_out
+
+def standardize_data(x):
+    """Normalize data to [0, 1] range."""
+    return x / 255.0
+
+def write_labels_to_tsv(dataset, output_file):
+    with open(output_file, 'w') as f:
+        f.write("Index\tLabel\n")
+        for idx, (_, label) in enumerate(dataset.unbatch()):
+            true_label = int(np.argmax(label.numpy()))
+            f.write(f"{idx}\t{true_label}\n")
+    print(f"Wrote labels to {output_file}")
+
+# -------------------- Dataset Loaders --------------------
+
+def load_mnist(batch_size):
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+    x_train = standardize_data(x_train.reshape(-1, 28, 28, 1).astype('float32'))
+    x_test = standardize_data(x_test.reshape(-1, 28, 28, 1).astype('float32'))
+    y_train = to_categorical(y_train, 10)
+    y_test = to_categorical(y_test, 10)
+    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
+    test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
+    return train_dataset, test_dataset
+
+def load_cifar10(batch_size):
+    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
+    x_train = standardize_data(x_train.astype('float32'))
+    x_test = standardize_data(x_test.astype('float32'))
+    y_train = to_categorical(y_train, 10)
+    y_test = to_categorical(y_test, 10)
+    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
+    test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
+    return train_dataset, test_dataset
+
+# -------------------- Main Script --------------------
+
+def main():
+    parser = argparse.ArgumentParser(description="Dataset label handler.")
+    parser.add_argument('--dataset', choices=['mnist', 'cifar10'], help="Dataset to process.")
+    parser.add_argument('--generate', action='store_true', help="Generate TSV files from dataset.")
+    parser.add_argument('--input', help="Input TSV file (for false label generation).")
+    parser.add_argument('--output', help="Output TSV file name (saved in false_data/).")
+    parser.add_argument('--seed', type=int, default=None, help="Random seed for reproducibility.")
+    parser.add_argument('--batch_size', type=int, default=32, help="Batch size for data loading.")
+
+    args = parser.parse_args()
+
+    # Create folder for false labels
+    false_data_dir = "false_data"
+    os.makedirs(false_data_dir, exist_ok=True)
+
+    # Generate TSV files from the dataset
+    if args.generate:
+        if args.dataset == 'mnist':
+            train_ds, test_ds = load_mnist(args.batch_size)
+            write_labels_to_tsv(train_ds, os.path.join(false_data_dir, "MNIST_train_labels.tsv"))
+            write_labels_to_tsv(test_ds, os.path.join(false_data_dir, "MNIST_test_labels.tsv"))
+        elif args.dataset == 'cifar10':
+            train_ds, test_ds = load_cifar10(args.batch_size)
+            write_labels_to_tsv(train_ds, os.path.join(false_data_dir, "CIFAR10_train_labels.tsv"))
+            write_labels_to_tsv(test_ds, os.path.join(false_data_dir, "CIFAR10_test_labels.tsv"))
+        else:
+            print("Please specify a valid dataset with --dataset.")
+
+    # Generate false labels from input TSV
+    if args.input and args.output:
+        input_path = args.input
+        if not os.path.exists(input_path):
+            input_path = os.path.join(false_data_dir, args.input)
+
+        if not os.path.exists(input_path):
+            raise FileNotFoundError(f"Input file not found: {input_path}")
+
+        df = pd.read_csv(input_path, sep='\t')
+        df_out = generate_false_labels(df, seed=args.seed)
+
+        output_path = os.path.join(false_data_dir, args.output)
+        df_out.to_csv(output_path, sep='\t', index=False)
+        print(f"False labels saved to {output_path}")
+
+if __name__ == '__main__':
+    main()
@@ -1,102 +1,71 @@
 import argparse
 import os
 import pickle
-from taint import adversarial_attack_blackbox
-from analysis import *
-from train import train_model_and_save
-import torch
 import tensorflow as tf
+import torch
+from taint import adversarial_attack_blackbox
+
+
+def load_model(model_path):
+    # Assumes it's a Keras model (update if using PyTorch)
+    return tf.keras.models.load_model(model_path)
+
+
+def get_test_dataset(data_name):
+    # Import here to avoid unnecessary dependencies if unused
+    from train import get_data  # Ensure get_data returns (train_ds, test_ds)
+
+    train_ds, test_ds = get_data(data_name)
+    return test_ds
 
-def attack_model(args, model, test_ds, save_dir, num_data=10):
-    # Get the labels by iterating through a batch from the test_ds
-    first_batch = next(iter(test_ds))  # Get the first batch
-    images, labels = first_batch  # Unpack the images and labels from the first batch
-    
-    # Check if labels are a TensorFlow tensor or PyTorch tensor
-    if isinstance(labels, tf.Tensor):
-        # If using TensorFlow, convert labels to class indices (from one-hot encoded)
-        labels = tf.argmax(labels, axis=1).numpy()  # Get class indices from one-hot encoded labels
-    elif isinstance(labels, torch.Tensor):
-        # If using PyTorch, convert labels to class indices (from one-hot encoded)
-        labels = torch.argmax(labels, dim=1).cpu().numpy()  # Get class indices from one-hot encoded labels
-
-    # Convert labels to a set of unique outputs
-    unique_outputs = set(labels)  # Convert to a Python set for unique labels
-
-    # Continue with the rest of the attack logic
-    for output in unique_outputs:
-        instances = [i for i, label in enumerate(labels) if label == output][:num_data]  # Select `num_data` instances with the current output label
-        
-        for image_index in instances:
-            # Create a subdirectory for each image_index and its original output label
-            sub_dir = os.path.join(save_dir, f'image_{image_index}_label_{output}')
-            
-            # Ensure the directory exists
-            os.makedirs(sub_dir, exist_ok=True)
-
-            # Correct dynamic pickle filename to include the original and target class
-            pickle_filename = f'attacker_{image_index}_{output}.pkl'
-            pickle_path = os.path.join(sub_dir, pickle_filename)
-            
-            # Check if the attacker pickle already exists for this image_index and output
-            if os.path.exists(pickle_path):
-                with open(pickle_path, 'rb') as f:
-                    attacker = pickle.load(f)
-                print(f"Loaded attacker for image {image_index} with label {output} from {pickle_path}")
-            else:
-                print(f"Running adversarial attack for image {image_index} with label {output}...")
-                
-                # For the current `output`, target all other classes
-                for target_output in unique_outputs:
-                    if target_output != output:  # We want to target all other outputs
-                        for _ in range(num_data):  # Attack the target output `num_data` times
-                            target_sub_dir = os.path.join(sub_dir, f'target_{target_output}')
-                            os.makedirs(target_sub_dir, exist_ok=True)  # Create a subdir for each target class
-
-                            # Correct dynamic pickle filename to include the original and target class
-                            target_pickle_filename = f'attacker_{image_index}_{output}_to_{target_output}.pkl'
-                            target_pickle_path = os.path.join(target_sub_dir, target_pickle_filename)
-
-                            # Perform the adversarial attack targeting `target_output`
-                            attacker = adversarial_attack_blackbox(
-                                model=model,
-                                dataset=test_ds,
-                                image_index=image_index,
-                                output_dir=target_sub_dir,
-                                num_iterations=args.iterations,
-                                num_particles=args.particles,
-                                target_class=target_output  # Specify the target class for the attack
-                            )
-                            print(f"Adversarial attack completed for image {image_index} targeting class {target_output}")
-
-                            # After performing the attack, save the attacker object to a pickle file
-                            with open(target_pickle_path, 'wb') as f:
-                                pickle.dump(attacker, f)
-                            print(f"Saved attacker for image {image_index} with label {output} targeting {target_output} to {target_pickle_path}")
 
 def main():
-    # Command-line arguments
     parser = argparse.ArgumentParser()
 
-    # Data and model type arguments (to align with the ones used in the training script)
-    parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset to use')
-    parser.add_argument('--model_type', type=str, choices=['normal', 'complex', 'complex_augmented'], required=True, help='Model type to use')
+    # Required args
+    parser.add_argument('--model_path', type=str, required=True, help='Path to saved model (.keras)')
+    parser.add_argument('--save_dir', type=str, required=True, help='Directory to save attack results')
+    parser.add_argument('--source_index', type=int, required=True, help='Index of image to attack')
+    parser.add_argument('--target', type=int, required=True, help='Target class for adversarial attack')
 
-    # Attack parameters
-    parser.add_argument('--iterations', type=int, default=10, help='Number of iterations for attack')
-    parser.add_argument('--particles', type=int, default=100, help='Number of particles for attack')
+    # Dataset config
+    parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset name')
 
-    # Folder saving argument
-    parser.add_argument('--save_dir', type=str, default='results', help='Directory to save model and results')
+    # Attack config
+    parser.add_argument('--iterations', type=int, default=30, help='Number of attack iterations')
+    parser.add_argument('--particles', type=int, default=100, help='Number of swarm particles')
 
-    # Parse arguments
     args = parser.parse_args()
 
-    # First, train the model and get the necessary details for attack
-    model, test_ds, save_dir, model_path = train_model_and_save(args)
+    # Load model and dataset
+    model = load_model(args.model_path)
+    test_ds = get_test_dataset(args.data)
+
+    # Create output directory
+    os.makedirs(args.save_dir, exist_ok=True)
+
+    # Run the blackbox adversarial attack
+    try:
+        attacker = adversarial_attack_blackbox(
+            model=model,
+            dataset=test_ds,
+            image_index=args.source_index,
+            output_dir=args.save_dir,
+            num_iterations=args.iterations,
+            num_particles=args.particles,
+            target_class=args.target
+        )
+
+        # Save attacker object
+        output_path = os.path.join(args.save_dir, f'attacker_{args.source_index}_to_{args.target}.pkl')
+        with open(output_path, 'wb') as f:
+            pickle.dump(attacker, f)
+
+        print(f"Attack complete. Saved attacker to: {output_path}")
+
+    except Exception as e:
+        print(f"Error during attack: {e}")
 
-    # Perform the adversarial attack
-    attack_model(args, model, test_ds, save_dir)
 
 if __name__ == '__main__':
     main()