Skip to content

Commit 7f59c03

Browse files
authored
Updating the code to produce an adversarial dataset for a given image (#7)
* updating code * fin for now * validating run * created adversarial dataloader * adding seeds * reducing run time
1 parent 1e0d7f6 commit 7f59c03

File tree

10 files changed

+719
-230
lines changed

10 files changed

+719
-230
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,6 @@
77
"dockerfile": "Dockerfile"
88
},
99
"features": {
10-
"ghcr.io/devcontainers/features/python:1": {}
10+
"ghcr.io/rocker-org/devcontainer-features/miniforge:2": {}
1111
}
12-
13-
// Features to add to the dev container. More info: https://containers.dev/features.
14-
// "features": {},
15-
16-
// Use 'forwardPorts' to make a list of ports inside the container available locally.
17-
// "forwardPorts": [],
18-
19-
// Use 'postCreateCommand' to run commands after the container is created.
20-
// "postCreateCommand": "python --version",
21-
22-
// Configure tool-specific properties.
23-
// "customizations": {},
24-
25-
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
26-
// "remoteUser": "root"
2712
}

Adversarial_Observation/Attacks.py

Lines changed: 71 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,76 @@
1-
import numpy as np
21
import torch
3-
import torch.nn.functional as F
4-
import matplotlib.pyplot as plt
52
import logging
3+
import os
4+
from datetime import datetime
5+
from torch.nn import Softmax
6+
from .utils import fgsm_attack, pgd_attack, compute_success_rate, log_metrics, visualize_adversarial_examples
7+
from .utils import seed_everything
68

7-
# Set up logging
8-
logging.basicConfig(level=logging.INFO)
9-
10-
def fgsm_attack(input_batch_data: torch.Tensor, model: torch.nn.Module, input_shape: tuple, epsilon: float) -> torch.Tensor:
11-
"""
12-
Apply the FGSM attack to input images given a pre-trained PyTorch model.
13-
14-
Args:
15-
input_batch_data (torch.Tensor): Batch of input images.
16-
model (torch.nn.Module): Pre-trained PyTorch model to be attacked.
17-
input_shape (tuple): Shape of the input array.
18-
epsilon (float): Magnitude of the perturbation for the attack.
19-
20-
Returns:
21-
torch.Tensor: Adversarial images generated by the FGSM attack.
22-
"""
23-
model.eval()
24-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25-
input_batch_data = input_batch_data.to(device).detach().requires_grad_(True)
26-
27-
adversarial_batch_data = torch.clone(input_batch_data).detach()
28-
29-
for img in input_batch_data:
30-
preds = model(img.reshape(input_shape))
31-
target = torch.argmax(preds)
32-
loss = F.cross_entropy(preds, target.unsqueeze(0))
33-
34-
model.zero_grad()
35-
loss.backward()
36-
37-
adversarial_img = img + epsilon * img.grad.sign()
38-
adversarial_img = torch.clamp(adversarial_img, 0, 1)
39-
adversarial_batch_data.append(adversarial_img)
40-
41-
return adversarial_batch_data
42-
43-
def compute_gradients(model, img, target_class):
44-
preds = model(img)
45-
target_score = preds[0, target_class]
46-
return torch.autograd.grad(target_score, img)[0]
47-
48-
def generate_adversarial_examples(input_batch_data, model, method='fgsm', **kwargs):
49-
if method == 'fgsm':
50-
return fgsm_attack(input_batch_data, model, **kwargs)
51-
# Implement other attack methods as needed
52-
53-
def visualize_adversarial_examples(original, adversarial):
54-
# Code to visualize original vs adversarial images
55-
pass
56-
57-
def log_metrics(success_rate, average_perturbation):
58-
logging.info(f'Success Rate: {success_rate}, Average Perturbation: {average_perturbation}')
59-
60-
class Config:
61-
def __init__(self, epsilon=0.1, attack_method='fgsm'):
9+
class AdversarialTester:
10+
def __init__(self, model: torch.nn.Module, epsilon: float = 0.1, attack_method: str = 'fgsm', alpha: float = 0.01,
11+
num_steps: int = 40, device=None, save_dir: str = './results', seed: int = 42):
12+
seed_everything(seed)
13+
self.model = model
6214
self.epsilon = epsilon
6315
self.attack_method = attack_method
16+
self.alpha = alpha
17+
self.num_steps = num_steps
18+
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
19+
self.save_dir = save_dir
20+
21+
# Create save directory if it doesn't exist
22+
os.makedirs(self.save_dir, exist_ok=True)
23+
self.model.to(self.device)
24+
self.model.eval()
25+
26+
self._setup_logging()
27+
28+
def _setup_logging(self):
29+
log_file = os.path.join(self.save_dir, f"attack_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
30+
logging.basicConfig(filename=log_file, level=logging.DEBUG)
31+
logging.info(f"Started adversarial testing at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
32+
logging.info(f"Using model: {self.model.__class__.__name__}")
33+
logging.info(f"Attack Method: {self.attack_method}, Epsilon: {self.epsilon}, Alpha: {self.alpha}, Steps: {self.num_steps}")
34+
35+
def test_attack(self, input_batch_data: torch.Tensor):
36+
input_batch_data = input_batch_data.to(self.device)
37+
adversarial_images = self._generate_adversarial_images(input_batch_data)
38+
39+
# Save and log images
40+
self._save_images(input_batch_data, adversarial_images)
41+
self._compute_and_log_metrics(input_batch_data, adversarial_images)
42+
43+
def _generate_adversarial_images(self, input_batch_data: torch.Tensor):
44+
logging.info(f"Starting attack with method: {self.attack_method}")
45+
if self.attack_method == 'fgsm':
46+
return fgsm_attack(input_batch_data, self.model, self.epsilon, self.device)
47+
elif self.attack_method == 'pgd':
48+
return pgd_attack(input_batch_data, self.model, self.epsilon, self.alpha, self.num_steps, self.device)
49+
else:
50+
raise ValueError(f"Unsupported attack method: {self.attack_method}")
51+
52+
def _save_images(self, original_images: torch.Tensor, adversarial_images: torch.Tensor):
53+
for i in range(original_images.size(0)):
54+
original_image_path = os.path.join(self.save_dir, f"original_{i}.png")
55+
adversarial_image_path = os.path.join(self.save_dir, f"adversarial_{i}.png")
56+
visualize_adversarial_examples(original_images, adversarial_images, original_image_path, adversarial_image_path)
57+
58+
def _compute_and_log_metrics(self, original_images: torch.Tensor, adversarial_images: torch.Tensor):
59+
original_predictions = torch.argmax(self.model(original_images), dim=1)
60+
adversarial_predictions = torch.argmax(self.model(adversarial_images), dim=1)
61+
62+
success_rate = compute_success_rate(original_predictions, adversarial_predictions)
63+
average_perturbation = torch.mean(torch.abs(adversarial_images - original_images)).item()
64+
65+
log_metrics(success_rate, average_perturbation)
66+
self._save_metrics(success_rate, average_perturbation)
67+
68+
logging.info(f"Success Rate: {success_rate:.4f}, Average Perturbation: {average_perturbation:.4f}")
69+
70+
def _save_metrics(self, success_rate: float, avg_perturbation: float):
71+
"""
72+
Save the metrics (success rate and average perturbation) to a file.
73+
"""
74+
metrics_file = os.path.join(self.save_dir, "attack_metrics.txt")
75+
with open(metrics_file, 'a') as f:
76+
f.write(f"Success Rate: {success_rate:.4f}, Average Perturbation: {avg_perturbation:.4f}\n")
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import torch
2+
import numpy as np
3+
from torch import nn
4+
5+
6+
class BirdParticle:
7+
"""
8+
Represents a particle in the Particle Swarm Optimization (PSO) algorithm for adversarial attacks.
9+
10+
The BirdParticle class encapsulates the state of each particle, including its position, velocity,
11+
fitness evaluation, and the updates to its velocity and position based on the PSO algorithm.
12+
"""
13+
14+
def __init__(self, model: nn.Module, input_data: torch.Tensor, target_class: int, epsilon: float,
15+
velocity: torch.Tensor = None, inertia_weight: float = 0.5,
16+
cognitive_weight: float = 1.0, social_weight: float = 1.0, momentum: float = 0.9,
17+
velocity_clamp: float = 0.1):
18+
"""
19+
Initialize a particle in the PSO algorithm.
20+
21+
Args:
22+
model (nn.Module): The model to attack.
23+
input_data (torch.Tensor): The input data (image) to attack.
24+
target_class (int): The target class for misclassification.
25+
epsilon (float): The perturbation bound (maximum amount the image can be altered).
26+
velocity (torch.Tensor, optional): The initial velocity for the particle's movement. Defaults to zero velocity if not provided.
27+
inertia_weight (float): The inertia weight for the velocity update. Default is 0.5.
28+
cognitive_weight (float): The cognitive weight for the velocity update. Default is 1.0.
29+
social_weight (float): The social weight for the velocity update. Default is 1.0.
30+
momentum (float): The momentum for the velocity update. Default is 0.9.
31+
velocity_clamp (float): The velocity clamp for limiting the maximum velocity. Default is 0.1.
32+
"""
33+
self.model = model
34+
self.original_data = input_data.clone().detach()
35+
self.target_class = target_class
36+
self.epsilon = epsilon
37+
self.best_position = input_data.clone().detach()
38+
self.best_score = -np.inf
39+
self.position = input_data.clone().detach()
40+
self.velocity = velocity if velocity is not None else torch.zeros_like(input_data)
41+
self.history = []
42+
43+
# Class attributes
44+
self.inertia_weight = inertia_weight
45+
self.cognitive_weight = cognitive_weight
46+
self.social_weight = social_weight
47+
self.momentum = momentum
48+
self.velocity_clamp = velocity_clamp
49+
50+
def fitness(self) -> float:
51+
"""
52+
Compute the fitness score for the particle, which is the softmax probability of the target class.
53+
54+
Higher fitness scores correspond to better success in the attack (misclassifying the image into the target class).
55+
56+
Returns:
57+
float: Fitness score for this particle (higher is better).
58+
"""
59+
with torch.no_grad():
60+
output = self.model(self.position)
61+
probabilities = torch.softmax(output, dim=1) # Get probabilities for each class
62+
target_prob = probabilities[:, self.target_class] # Target class probability
63+
64+
return target_prob.item() # Return the target class probability as fitness score
65+
66+
def update_velocity(self, global_best_position: torch.Tensor) -> None:
67+
"""
68+
Update the velocity of the particle based on the PSO update rule.
69+
70+
Args:
71+
global_best_position (torch.Tensor): The global best position found by the swarm.
72+
"""
73+
inertia = self.inertia_weight * self.velocity
74+
cognitive = self.cognitive_weight * torch.rand_like(self.position) * (self.best_position - self.position)
75+
social = self.social_weight * torch.rand_like(self.position) * (global_best_position - self.position)
76+
77+
self.velocity = inertia + cognitive + social # Update velocity based on PSO formula
78+
79+
# Apply momentum and velocity clamping
80+
self.velocity = self.velocity * self.momentum # Apply momentum
81+
self.velocity = torch.clamp(self.velocity, -self.velocity_clamp, self.velocity_clamp) # Apply velocity clamp
82+
83+
def update_position(self) -> None:
84+
"""
85+
Update the position of the particle based on the updated velocity.
86+
87+
Ensures that the position stays within the valid input range [0, 1] (normalized pixel values).
88+
"""
89+
self.position = torch.clamp(self.position + self.velocity, 0, 1) # Ensure position stays within bounds
90+
self.history.append(self.position.clone().detach())
91+
92+
def evaluate(self) -> None:
93+
"""
94+
Evaluate the fitness of the current particle and update its personal best.
95+
96+
The fitness score is calculated using the target class probability. If the current fitness score
97+
is better than the personal best, update the personal best position and score.
98+
"""
99+
score = self.fitness() # Get the current fitness score based on the perturbation
100+
if score > self.best_score: # If score is better than the personal best, update the best position
101+
self.best_score = score
102+
self.best_position = self.position.clone().detach()

0 commit comments

Comments
 (0)