training and attacking manuscript 2

jamilgafur · jamilgafur · commit 6dbeb8c132ae · 2025-08-03T15:28:06.000Z
diff --git a/manuscripts/Posion25/1_trainModel.py b/manuscripts/Posion25/1_trainModel.py
@@ -12,8 +12,8 @@ def main():
     parser.add_argument('--model_type', type=str, choices=['normal', 'complex', 'complex_augmented'], required=True, help='Model type to use')
 
     # Training information arguments
-    parser.add_argument('--batch_size', type=int, default=32, help='Batch size for training')
-    parser.add_argument('--epochs', type=int, default=10, help='Number of epochs for training')
+    parser.add_argument('--batch_size', type=int, default=1024, help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=5, help='Number of epochs for training')
 
     # Folder saving argument
     parser.add_argument('--save_dir', type=str, default='results', help='Directory to save model and results')
diff --git a/manuscripts/Posion25/2_attackModel.py b/manuscripts/Posion25/2_attackModel.py
@@ -4,34 +4,75 @@
 from taint import adversarial_attack_blackbox
 from analysis import *
 from train import train_model_and_save
+import torch
+import tensorflow as tf
 
 def attack_model(args, model, test_ds, save_dir, num_data=10):
-    # Path to the pickle file that stores the attacker object
-    pickle_path = os.path.join(save_dir, 'attacker.pkl')
+    # Get the labels by iterating through a batch from the test_ds
+    first_batch = next(iter(test_ds))  # Get the first batch
+    images, labels = first_batch  # Unpack the images and labels from the first batch
     
-    # Check if the adversarial attack has already been performed (if pickle exists)
-    if os.path.exists(pickle_path):
-        # If pickle exists, load the attacker from the file
-        with open(pickle_path, 'rb') as f:
-            attacker = pickle.load(f)
-        print(f"Loaded attacker from {pickle_path}")
-    else:
-        # If pickle does not exist, run the attack and save the attacker
-        print("Running adversarial attack...")
-
-        # First, identify unique outputs in the dataset
-        unique_outputs = set(test_ds.labels)  # assuming `test_ds.labels` contains the true labels
-
-        for output in unique_outputs:
-            # Find the first 10 instances of this output in the dataset
-            instances = [i for i, label in enumerate(test_ds.labels) if label == output][:num_data]            
-            # Perform the attack on each of these instances
-            for image_index in instances:
-                adversarial_attack_blackbox(
-                    model, test_ds, image_index=image_index, output_dir=save_dir,
-                    num_iterations=args.iterations, num_particles=args.particles
-                )
-                print(f"Attacked image {image_index} with label {output}")
+    # Check if labels are a TensorFlow tensor or PyTorch tensor
+    if isinstance(labels, tf.Tensor):
+        # If using TensorFlow, convert labels to class indices (from one-hot encoded)
+        labels = tf.argmax(labels, axis=1).numpy()  # Get class indices from one-hot encoded labels
+    elif isinstance(labels, torch.Tensor):
+        # If using PyTorch, convert labels to class indices (from one-hot encoded)
+        labels = torch.argmax(labels, dim=1).cpu().numpy()  # Get class indices from one-hot encoded labels
+
+    # Convert labels to a set of unique outputs
+    unique_outputs = set(labels)  # Convert to a Python set for unique labels
+
+    # Continue with the rest of the attack logic
+    for output in unique_outputs:
+        instances = [i for i, label in enumerate(labels) if label == output][:num_data]  # Select `num_data` instances with the current output label
+        
+        for image_index in instances:
+            # Create a subdirectory for each image_index and its original output label
+            sub_dir = os.path.join(save_dir, f'image_{image_index}_label_{output}')
+            
+            # Ensure the directory exists
+            os.makedirs(sub_dir, exist_ok=True)
+
+            # Correct dynamic pickle filename to include the original and target class
+            pickle_filename = f'attacker_{image_index}_{output}.pkl'
+            pickle_path = os.path.join(sub_dir, pickle_filename)
+            
+            # Check if the attacker pickle already exists for this image_index and output
+            if os.path.exists(pickle_path):
+                with open(pickle_path, 'rb') as f:
+                    attacker = pickle.load(f)
+                print(f"Loaded attacker for image {image_index} with label {output} from {pickle_path}")
+            else:
+                print(f"Running adversarial attack for image {image_index} with label {output}...")
+                
+                # For the current `output`, target all other classes
+                for target_output in unique_outputs:
+                    if target_output != output:  # We want to target all other outputs
+                        for _ in range(num_data):  # Attack the target output `num_data` times
+                            target_sub_dir = os.path.join(sub_dir, f'target_{target_output}')
+                            os.makedirs(target_sub_dir, exist_ok=True)  # Create a subdir for each target class
+
+                            # Correct dynamic pickle filename to include the original and target class
+                            target_pickle_filename = f'attacker_{image_index}_{output}_to_{target_output}.pkl'
+                            target_pickle_path = os.path.join(target_sub_dir, target_pickle_filename)
+
+                            # Perform the adversarial attack targeting `target_output`
+                            attacker = adversarial_attack_blackbox(
+                                model=model,
+                                dataset=test_ds,
+                                image_index=image_index,
+                                output_dir=target_sub_dir,
+                                num_iterations=args.iterations,
+                                num_particles=args.particles,
+                                target_class=target_output  # Specify the target class for the attack
+                            )
+                            print(f"Adversarial attack completed for image {image_index} targeting class {target_output}")
+
+                            # After performing the attack, save the attacker object to a pickle file
+                            with open(target_pickle_path, 'wb') as f:
+                                pickle.dump(attacker, f)
+                            print(f"Saved attacker for image {image_index} with label {output} targeting {target_output} to {target_pickle_path}")
 
 def main():
     # Command-line arguments
diff --git a/manuscripts/Posion25/3_stats.py b/manuscripts/Posion25/3_stats.py
@@ -13,7 +13,7 @@
 def collect_statistics(model, dataset, model_type, attack_iterations=10, attack_particles=100, image_index=0, output_dir='results'):
     """
     Run adversarial attack for the given model and dataset combination and collect statistics.
-    
+
     Args:
     - model: The model to attack.
     - dataset: The test dataset.
@@ -22,7 +22,7 @@ def collect_statistics(model, dataset, model_type, attack_iterations=10, attack_
     - attack_particles: Number of particles for attack.
     - image_index: Index of the image to perform the attack on.
     - output_dir: Directory to save results.
-    
+
     Returns:
     - statistics: A dictionary with softmax output, attack success, and other relevant data.
     """
@@ -40,12 +40,13 @@ def collect_statistics(model, dataset, model_type, attack_iterations=10, attack_
     single_target = np.argmax(all_labels[image_index])
     target_class = (single_target + 1) % 10  # Attack a different class
 
-    # Perform the attack (check if pickle exists first)
+    # Load the attacker if pickle exists
     if os.path.exists(pickle_path):
         with open(pickle_path, 'rb') as f:
             attacker = pickle.load(f)
         print(f"Loaded attacker from {pickle_path}")
     else:
+        # If attacker doesn't exist, run the attack and save it
         adversarial_attack_blackbox(
             model, dataset, image_index=image_index, output_dir=output_dir,
             num_iterations=attack_iterations, num_particles=attack_particles
@@ -56,7 +57,7 @@ def collect_statistics(model, dataset, model_type, attack_iterations=10, attack_
 
     # Analyze the attack results
     softmax_output, max_val, max_class = get_softmax_stats(model, single_input)
-    attack_success = max_class != target_class
+    attack_success = max_class != target_class  # Attack success is when max_class differs from target class
 
     stats = {
         "model_type": model_type,
@@ -76,10 +77,10 @@ def collect_statistics(model, dataset, model_type, attack_iterations=10, attack_
 def get_model_types_for_dataset(dataset):
     """
     Dynamically search for model types for a given dataset.
-    
+
     Args:
     - dataset: The dataset name, e.g., 'MNIST' or 'AudioMNIST'.
-    
+
     Returns:
     - model_types: List of available model types for the dataset.
     """
@@ -125,6 +126,7 @@ def main():
 
     # Data and model type arguments
     parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset to use')
+    parser.add_argument('--model_type', type=str, choices=['normal', 'complex', 'complex_augmented'], required=True, help='Model type to use')
 
     # Attack parameters
     parser.add_argument('--iterations', type=int, default=10, help='Number of iterations for attack')
diff --git a/manuscripts/Posion25/runall.sh b/manuscripts/Posion25/runall.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 
 # Define available datasets and model types
-datasets=("MNIST" "MNIST_Audio")
+datasets=("MNIST")
+#  "MNIST_Audio")
 model_types=("normal" "complex")
 #  "complex_augmented")
 save_dir="results"
@@ -19,11 +20,7 @@ for dataset in "${datasets[@]}"; do
         
         echo "Finished training and attacking for $dataset - $model_type"
     done
+        # python 3_stats.py --data $dataset --model_type $model_type --save_dir $save_dir
+        # echo "Statistics collected for $dataset - $model_type"
 done
 
-# Step 3: Collect statistics after all attacks have been run
-echo "Collecting statistics..."
-python 3_stats.py --data MNIST --iterations 10 --particles 100 --save_dir $save_dir
-python 3_stats.py --data MNIST_Audio --iterations 10 --particles 100 --save_dir $save_dir
-
-echo "Statistics collection complete."
diff --git a/manuscripts/Posion25/taint.py b/manuscripts/Posion25/taint.py
@@ -6,7 +6,7 @@
 from Adversarial_Observation.Swarm import ParticleSwarm
 from analysis import *
 
-def adversarial_attack_blackbox(model, dataset, image_index, output_dir='results', num_iterations=30, num_particles=100):
+def adversarial_attack_blackbox(model, dataset, image_index, output_dir='results', num_iterations=30, num_particles=100, target_class=None):
     
     pickle_path = os.path.join(output_dir, 'attacker.pkl')
 
@@ -20,7 +20,8 @@ def adversarial_attack_blackbox(model, dataset, image_index, output_dir='results
 
     single_input = all_images[image_index]
     single_target = np.argmax(all_labels[image_index])
-    target_class = (single_target + 1) % 10
+    if target_class is None:
+        target_class = (single_target + 1) % 10
 
     input_set = np.stack([
         single_input + (np.random.uniform(0, 1, single_input.shape) * (np.random.rand(*single_input.shape) < 0.9))
@@ -45,6 +46,7 @@ def adversarial_attack_blackbox(model, dataset, image_index, output_dir='results
         print(f"Saved attacker to {pickle_path}")
     print("Adversarial attack completed. Analyzing results...")
     analyze_attack(attacker, single_input, target_class)
+    return attacker
 
 def best_analysis(attacker, original_data, target):
     adv = attacker.global_best_position.numpy()
diff --git a/manuscripts/Posion25/train.py b/manuscripts/Posion25/train.py
@@ -130,6 +130,7 @@ def evaluate_model(model, test_dataset):
 def train_model_and_save(args):
     # Create folder name based on dataset and model type
     folder_name = f"{args.data}_{args.model_type}"
+
     save_dir = os.path.join(args.save_dir, folder_name)
 
     # Ensure the save directory exists