Skip to content

Commit 03da00f

Browse files
committed
generating cohort
1 parent e3b315b commit 03da00f

File tree

12 files changed

+433
-124
lines changed

12 files changed

+433
-124
lines changed

.devcontainer/devcontainer.json

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,16 @@
77
"dockerfile": "Dockerfile"
88
},
99
"features": {
10-
"ghcr.io/devcontainers/features/anaconda:1": {
11-
"version": "latest"
12-
},
13-
"ghcr.io/devcontainers/features/nvidia-cuda:2": {
14-
"installCudnn": true,
15-
"installCudnnDev": true,
16-
"installNvtx": true,
17-
"installToolkit": true,
18-
"cudaVersion": "11.8",
19-
"cudnnVersion": "automatic"
20-
},
21-
"ghcr.io/raucha/devcontainer-features/pytorch:1": {}
10+
"ghcr.io/devcontainers/features/anaconda:1": {},
11+
"ghcr.io/devcontainers/features/nvidia-cuda:2": {},
12+
"ghcr.io/rocker-org/devcontainer-features/miniforge:2": {}
2213
},
2314
"runArgs": [
24-
"--gpus=all"
25-
]
15+
"--gpus", "all"
16+
],
17+
// allow gpu
18+
19+
2620
// Features to add to the dev container. More info: https://containers.dev/features.
2721
// "features": {},
2822

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ for model_type in "${MODEL_TYPES[@]}"; do
1818
if [[ "$model_type" == "complex_adversarial" ]]; then
1919
for adv in "${ADVERSARIAL_MODES[@]:1}"; do
2020
echo "Training $DATASET with model_type=$model_type and adversarial=$adv"
21-
python main.py --data $DATASET --model_type $model_type --adversarial $adv --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
21+
echo "Command: python 0_trainModel.py --data $DATASET --model_type $model_type --adversarial $adv --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR"
22+
# python 0_trainModel.py --data $DATASET --model_type $model_type --adversarial $adv --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
2223
done
2324
else
2425
echo "Training $DATASET with model_type=$model_type (no adversarial)"
25-
python main.py --data $DATASET --model_type $model_type --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
26+
echo "Command: python 0_trainModel.py --data $DATASET --model_type $model_type --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR"
27+
# python 0_trainModel.py --data $DATASET --model_type $model_type --batch_size $BATCH_SIZE --epochs $EPOCHS --save_dir $SAVE_DIR
2628
fi
2729
done
2830

manuscripts/Posion25/1_dataset.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
# Configuration
4+
SEED=42
5+
BATCH_SIZE=32
6+
DATASETS=("mnist")
7+
SPLITS=("train" "test")
8+
9+
# Step 1: Generate label TSVs
10+
echo "Generating label TSVs..."
11+
for dataset in "${DATASETS[@]}"; do
12+
echo "Processing $dataset..."
13+
python 1_dataset_label_tool.py --generate --dataset $dataset --batch_size $BATCH_SIZE
14+
done
15+
16+
# Step 2: Generate false labels
17+
echo "Generating false labels..."
18+
for dataset in "${DATASETS[@]}"; do
19+
for split in "${SPLITS[@]}"; do
20+
input_file="${dataset^^}_${split}_labels.tsv" # e.g., MNIST_train_labels.tsv
21+
output_file="${dataset^^}_${split}_false_labels.tsv" # e.g., MNIST_train_false_labels.tsv
22+
23+
echo "Generating false labels for $input_file..."
24+
python 1_dataset_label_tool.py --input "$input_file" --output "$output_file" --seed $SEED
25+
done
26+
done
27+
28+
echo "All label and false label files generated."
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import argparse
2+
import os
3+
import pandas as pd
4+
import numpy as np
5+
import tensorflow as tf
6+
from tensorflow.keras.datasets import cifar10, mnist
7+
from tensorflow.keras.utils import to_categorical
8+
9+
# -------------------- Utility Functions --------------------
10+
11+
def generate_false_labels(df, seed=None):
12+
if seed is not None:
13+
np.random.seed(seed)
14+
15+
num_classes = 10
16+
false_labels = []
17+
18+
for true_label in df['Label']:
19+
choices = [i for i in range(num_classes) if i != true_label]
20+
false_label = np.random.choice(choices)
21+
false_labels.append(false_label)
22+
23+
df_out = pd.DataFrame({
24+
'index': df['Index'],
25+
'trueLabel': df['Label'],
26+
'falseLabel': false_labels
27+
})
28+
return df_out
29+
30+
def standardize_data(x):
31+
"""Normalize data to [0, 1] range."""
32+
return x / 255.0
33+
34+
def write_labels_to_tsv(dataset, output_file):
35+
with open(output_file, 'w') as f:
36+
f.write("Index\tLabel\n")
37+
for idx, (_, label) in enumerate(dataset.unbatch()):
38+
true_label = int(np.argmax(label.numpy()))
39+
f.write(f"{idx}\t{true_label}\n")
40+
print(f"Wrote labels to {output_file}")
41+
42+
# -------------------- Dataset Loaders --------------------
43+
44+
def load_mnist(batch_size):
45+
(x_train, y_train), (x_test, y_test) = mnist.load_data()
46+
x_train = standardize_data(x_train.reshape(-1, 28, 28, 1).astype('float32'))
47+
x_test = standardize_data(x_test.reshape(-1, 28, 28, 1).astype('float32'))
48+
y_train = to_categorical(y_train, 10)
49+
y_test = to_categorical(y_test, 10)
50+
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
51+
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
52+
return train_dataset, test_dataset
53+
54+
def load_cifar10(batch_size):
55+
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
56+
x_train = standardize_data(x_train.astype('float32'))
57+
x_test = standardize_data(x_test.astype('float32'))
58+
y_train = to_categorical(y_train, 10)
59+
y_test = to_categorical(y_test, 10)
60+
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
61+
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
62+
return train_dataset, test_dataset
63+
64+
# -------------------- Main Script --------------------
65+
66+
def main():
67+
parser = argparse.ArgumentParser(description="Dataset label handler.")
68+
parser.add_argument('--dataset', choices=['mnist', 'cifar10'], help="Dataset to process.")
69+
parser.add_argument('--generate', action='store_true', help="Generate TSV files from dataset.")
70+
parser.add_argument('--input', help="Input TSV file (for false label generation).")
71+
parser.add_argument('--output', help="Output TSV file name (saved in false_data/).")
72+
parser.add_argument('--seed', type=int, default=None, help="Random seed for reproducibility.")
73+
parser.add_argument('--batch_size', type=int, default=32, help="Batch size for data loading.")
74+
75+
args = parser.parse_args()
76+
77+
# Create folder for false labels
78+
false_data_dir = "false_data"
79+
os.makedirs(false_data_dir, exist_ok=True)
80+
81+
# Generate TSV files from the dataset
82+
if args.generate:
83+
if args.dataset == 'mnist':
84+
train_ds, test_ds = load_mnist(args.batch_size)
85+
write_labels_to_tsv(train_ds, os.path.join(false_data_dir, "MNIST_train_labels.tsv"))
86+
write_labels_to_tsv(test_ds, os.path.join(false_data_dir, "MNIST_test_labels.tsv"))
87+
elif args.dataset == 'cifar10':
88+
train_ds, test_ds = load_cifar10(args.batch_size)
89+
write_labels_to_tsv(train_ds, os.path.join(false_data_dir, "CIFAR10_train_labels.tsv"))
90+
write_labels_to_tsv(test_ds, os.path.join(false_data_dir, "CIFAR10_test_labels.tsv"))
91+
else:
92+
print("Please specify a valid dataset with --dataset.")
93+
94+
# Generate false labels from input TSV
95+
if args.input and args.output:
96+
input_path = args.input
97+
if not os.path.exists(input_path):
98+
input_path = os.path.join(false_data_dir, args.input)
99+
100+
if not os.path.exists(input_path):
101+
raise FileNotFoundError(f"Input file not found: {input_path}")
102+
103+
df = pd.read_csv(input_path, sep='\t')
104+
df_out = generate_false_labels(df, seed=args.seed)
105+
106+
output_path = os.path.join(false_data_dir, args.output)
107+
df_out.to_csv(output_path, sep='\t', index=False)
108+
print(f"False labels saved to {output_path}")
109+
110+
if __name__ == '__main__':
111+
main()
Lines changed: 53 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,71 @@
11
import argparse
22
import os
33
import pickle
4-
from taint import adversarial_attack_blackbox
5-
from analysis import *
6-
from train import train_model_and_save
7-
import torch
84
import tensorflow as tf
5+
import torch
6+
from taint import adversarial_attack_blackbox
7+
8+
9+
def load_model(model_path):
10+
# Assumes it's a Keras model (update if using PyTorch)
11+
return tf.keras.models.load_model(model_path)
12+
13+
14+
def get_test_dataset(data_name):
15+
# Import here to avoid unnecessary dependencies if unused
16+
from train import get_data # Ensure get_data returns (train_ds, test_ds)
17+
18+
train_ds, test_ds = get_data(data_name)
19+
return test_ds
920

10-
def attack_model(args, model, test_ds, save_dir, num_data=10):
11-
# Get the labels by iterating through a batch from the test_ds
12-
first_batch = next(iter(test_ds)) # Get the first batch
13-
images, labels = first_batch # Unpack the images and labels from the first batch
14-
15-
# Check if labels are a TensorFlow tensor or PyTorch tensor
16-
if isinstance(labels, tf.Tensor):
17-
# If using TensorFlow, convert labels to class indices (from one-hot encoded)
18-
labels = tf.argmax(labels, axis=1).numpy() # Get class indices from one-hot encoded labels
19-
elif isinstance(labels, torch.Tensor):
20-
# If using PyTorch, convert labels to class indices (from one-hot encoded)
21-
labels = torch.argmax(labels, dim=1).cpu().numpy() # Get class indices from one-hot encoded labels
22-
23-
# Convert labels to a set of unique outputs
24-
unique_outputs = set(labels) # Convert to a Python set for unique labels
25-
26-
# Continue with the rest of the attack logic
27-
for output in unique_outputs:
28-
instances = [i for i, label in enumerate(labels) if label == output][:num_data] # Select `num_data` instances with the current output label
29-
30-
for image_index in instances:
31-
# Create a subdirectory for each image_index and its original output label
32-
sub_dir = os.path.join(save_dir, f'image_{image_index}_label_{output}')
33-
34-
# Ensure the directory exists
35-
os.makedirs(sub_dir, exist_ok=True)
36-
37-
# Correct dynamic pickle filename to include the original and target class
38-
pickle_filename = f'attacker_{image_index}_{output}.pkl'
39-
pickle_path = os.path.join(sub_dir, pickle_filename)
40-
41-
# Check if the attacker pickle already exists for this image_index and output
42-
if os.path.exists(pickle_path):
43-
with open(pickle_path, 'rb') as f:
44-
attacker = pickle.load(f)
45-
print(f"Loaded attacker for image {image_index} with label {output} from {pickle_path}")
46-
else:
47-
print(f"Running adversarial attack for image {image_index} with label {output}...")
48-
49-
# For the current `output`, target all other classes
50-
for target_output in unique_outputs:
51-
if target_output != output: # We want to target all other outputs
52-
for _ in range(num_data): # Attack the target output `num_data` times
53-
target_sub_dir = os.path.join(sub_dir, f'target_{target_output}')
54-
os.makedirs(target_sub_dir, exist_ok=True) # Create a subdir for each target class
55-
56-
# Correct dynamic pickle filename to include the original and target class
57-
target_pickle_filename = f'attacker_{image_index}_{output}_to_{target_output}.pkl'
58-
target_pickle_path = os.path.join(target_sub_dir, target_pickle_filename)
59-
60-
# Perform the adversarial attack targeting `target_output`
61-
attacker = adversarial_attack_blackbox(
62-
model=model,
63-
dataset=test_ds,
64-
image_index=image_index,
65-
output_dir=target_sub_dir,
66-
num_iterations=args.iterations,
67-
num_particles=args.particles,
68-
target_class=target_output # Specify the target class for the attack
69-
)
70-
print(f"Adversarial attack completed for image {image_index} targeting class {target_output}")
71-
72-
# After performing the attack, save the attacker object to a pickle file
73-
with open(target_pickle_path, 'wb') as f:
74-
pickle.dump(attacker, f)
75-
print(f"Saved attacker for image {image_index} with label {output} targeting {target_output} to {target_pickle_path}")
7621

7722
def main():
78-
# Command-line arguments
7923
parser = argparse.ArgumentParser()
8024

81-
# Data and model type arguments (to align with the ones used in the training script)
82-
parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset to use')
83-
parser.add_argument('--model_type', type=str, choices=['normal', 'complex', 'complex_augmented'], required=True, help='Model type to use')
25+
# Required args
26+
parser.add_argument('--model_path', type=str, required=True, help='Path to saved model (.keras)')
27+
parser.add_argument('--save_dir', type=str, required=True, help='Directory to save attack results')
28+
parser.add_argument('--source_index', type=int, required=True, help='Index of image to attack')
29+
parser.add_argument('--target', type=int, required=True, help='Target class for adversarial attack')
8430

85-
# Attack parameters
86-
parser.add_argument('--iterations', type=int, default=10, help='Number of iterations for attack')
87-
parser.add_argument('--particles', type=int, default=100, help='Number of particles for attack')
31+
# Dataset config
32+
parser.add_argument('--data', type=str, choices=['MNIST', 'MNIST_Audio'], required=True, help='Dataset name')
8833

89-
# Folder saving argument
90-
parser.add_argument('--save_dir', type=str, default='results', help='Directory to save model and results')
34+
# Attack config
35+
parser.add_argument('--iterations', type=int, default=30, help='Number of attack iterations')
36+
parser.add_argument('--particles', type=int, default=100, help='Number of swarm particles')
9137

92-
# Parse arguments
9338
args = parser.parse_args()
9439

95-
# First, train the model and get the necessary details for attack
96-
model, test_ds, save_dir, model_path = train_model_and_save(args)
40+
# Load model and dataset
41+
model = load_model(args.model_path)
42+
test_ds = get_test_dataset(args.data)
43+
44+
# Create output directory
45+
os.makedirs(args.save_dir, exist_ok=True)
46+
47+
# Run the blackbox adversarial attack
48+
try:
49+
attacker = adversarial_attack_blackbox(
50+
model=model,
51+
dataset=test_ds,
52+
image_index=args.source_index,
53+
output_dir=args.save_dir,
54+
num_iterations=args.iterations,
55+
num_particles=args.particles,
56+
target_class=args.target
57+
)
58+
59+
# Save attacker object
60+
output_path = os.path.join(args.save_dir, f'attacker_{args.source_index}_to_{args.target}.pkl')
61+
with open(output_path, 'wb') as f:
62+
pickle.dump(attacker, f)
63+
64+
print(f"Attack complete. Saved attacker to: {output_path}")
65+
66+
except Exception as e:
67+
print(f"Error during attack: {e}")
9768

98-
# Perform the adversarial attack
99-
attack_model(args, model, test_ds, save_dir)
10069

10170
if __name__ == '__main__':
10271
main()

0 commit comments

Comments
 (0)