DynamicBackdoorGAN implementation with workflow fix

prachi0606 · prachi0606 · commit de191be5b27d · 2025-08-18T23:41:33.000Z
Signed-off-by: Prachi Panwar &lt;prachipanwar0606@gmail.com&gt;
diff --git a/.github/workflows/ci-huggingface.yml b/.github/workflows/ci-huggingface.yml
@@ -51,13 +51,14 @@ jobs:
           sudo apt-get update
           sudo apt-get -y -q install ffmpeg libavcodec-extra
           python -m pip install --upgrade pip setuptools wheel
-          pip install -q -r <(sed '/^tensorflow/d;/^keras/d;/^torch/d;/^torchvision/d;/^torchaudio/d;/^transformers/d' requirements_test.txt)
+          pip install -q -r <(sed '/^tensorflow/d;/^keras/d;/^torch/d;/^torchvision/d;/^torchaudio/d;/^transformers/d;/^safetensors/d' requirements_test.txt)
           pip install tensorflow==2.18.1
           pip install keras==3.10.0
           pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cpu
           pip install torchvision==${{ matrix.torchvision }} --index-url https://download.pytorch.org/whl/cpu
           pip install torchaudio==${{ matrix.torchaudio }} --index-url https://download.pytorch.org/whl/cpu
           pip install transformers==${{ matrix.transformers }}
+          pip install safetensors==0.5.3
           pip list
 
       - name: Cache CIFAR-10 dataset
diff --git a/.github/workflows/ci-legacy.yml b/.github/workflows/ci-legacy.yml
@@ -34,9 +34,9 @@ jobs:
             python: '3.10'
             tensorflow: 2.18.1
             keras: 3.10.0
-            torch: 2.7.0
-            torchvision: 0.22.0
-            torchaudio: 2.7.0
+            torch: 2.8.0
+            torchvision: 0.23.0
+            torchaudio: 2.8.0
             scikit-learn: 1.6.1
 
     name: Run ${{ matrix.module }} ${{ matrix.name }} Tests
diff --git a/.github/workflows/ci-pytorch.yml b/.github/workflows/ci-pytorch.yml
@@ -28,18 +28,18 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - name: PyTorch 2.6.0 (Python 3.10)
-            framework: pytorch
-            python: '3.10'
-            torch: 2.6.0
-            torchvision: 0.21.0
-            torchaudio: 2.6.0
           - name: PyTorch 2.7.1 (Python 3.10)
             framework: pytorch
             python: '3.10'
             torch: 2.7.1
             torchvision: 0.22.1
             torchaudio: 2.7.1
+          - name: PyTorch 2.8.0 (Python 3.10)
+            framework: pytorch
+            python: '3.10'
+            torch: 2.8.0
+            torchvision: 0.23.0
+            torchaudio: 2.8.0
 
     name: ${{ matrix.name }}
     steps:
diff --git a/.github/workflows/ci-tensorflow-v1.yml b/.github/workflows/ci-tensorflow-v1.yml
@@ -0,0 +1,15 @@
+name: CI TensorFlow v1
+
+on:
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+jobs:
+  sanity:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v4
+      - name: Say hello
+        run: echo "Workflow is wired up and running."
diff --git a/art/attacks/poisoning/__init__.py b/art/attacks/poisoning/__init__.py
@@ -19,3 +19,5 @@
 from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_pytorch import HiddenTriggerBackdoorPyTorch
 from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_keras import HiddenTriggerBackdoorKeras
 from art.attacks.poisoning.sleeper_agent_attack import SleeperAgentAttack
+from art.attacks.poisoning.dynamic_backdoor_gan import DynamicBackdoorGAN
+
diff --git a/art/attacks/poisoning/dynamic_backdoor_gan.py b/art/attacks/poisoning/dynamic_backdoor_gan.py
@@ -0,0 +1,77 @@
+
+# Imports
+!pip install adversarial-robustness-toolbox
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.utils.data import Subset
+from torchvision import datasets, transforms, models
+from art.estimators.classification import PyTorchClassifier
+from art.utils import to_categorical
+from art.attacks.poisoning import PoisoningAttackBackdoor
+
+# Trigger Generator:A small CNN that learns to generate input-specific triggers
+class TriggerGenerator(nn.Module):
+    def __init__(self, input_channels=3):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(32, input_channels, kernel_size=3, padding=1),
+            nn.Tanh()
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+# Custom Poisoning Attack: DynamicBackdoorGAN-This class defines how to poison data using the GAN trigger generator
+class DynamicBackdoorGAN(PoisoningAttackBackdoor):
+    def __init__(self, generator, target_label, backdoor_rate, classifier, epsilon=0.5):
+        super().__init__(perturbation=lambda x: x)
+        self.classifier = classifier
+        self.generator = generator.to(classifier.device)
+        self.target_label = target_label
+        self.backdoor_rate = backdoor_rate
+        self.epsilon = epsilon
+# Add trigger to a given image batch
+    def apply_trigger(self, images):
+        self.generator.eval()
+        with torch.no_grad():
+            images = nn.functional.interpolate(images, size=(32, 32), mode='bilinear')  # Resize images to ensure uniform dimension
+            triggers = self.generator(images.to(self.classifier.device)) #Generate dynamic, input-specific triggers using the trained TriggerGenerator
+            poisoned = (images.to(self.classifier.device) + self.epsilon * triggers).clamp(0, 1) # Clamp the pixel values to ensure they stay in the valid [0, 1] range.
+        return poisoned
+# Poison the training data by injecting dynamic triggers and changing labels
+    def poison(self, x, y):
+        # Convert raw image data (x) to torch tensors (float), and convert one-hot labels (y) to class indices-required by ART
+        x_tensor = torch.tensor(x).float()
+        y_tensor = torch.tensor(np.argmax(y, axis=1))
+        # Calculate total number of samples and how many should be poisoned(posion ratio=backdoor_rate)
+        batch_size = x_tensor.shape[0]
+        n_poison = int(self.backdoor_rate * batch_size)
+         # Apply the learned trigger to the first 'n_poison' samples
+        poisoned = self.apply_trigger(x_tensor[:n_poison])
+        # The remaining samples remain clean
+        clean = x_tensor[n_poison:].to(self.classifier.device)
+         # Combine poisoned and clean samples into a single batch
+        poisoned_images = torch.cat([poisoned, clean], dim=0).cpu().numpy()
+        # Modify the labels of poisoned samples to the attacker's target class
+        new_labels = y_tensor.clone()
+        new_labels[:n_poison] = self.target_label # Set the poisoned labels to the desired misclassification
+        # Convert all labels back to one-hot encoding (required by ART classifiers)
+        new_labels = to_categorical(new_labels.numpy(), nb_classes=self.classifier.nb_classes)
+        return poisoned_images.astype(np.float32), new_labels.astype(np.float32)
+#Evaluate the attack's success on test data
+    def evaluate(self, x_clean, y_clean):
+        x_tensor = torch.tensor(x_clean).float()
+        poisoned_test = self.apply_trigger(x_tensor).cpu().numpy().astype(np.float32)# Apply the trigger to every test image to create a poisoned test set
+
+        preds = self.classifier.predict(poisoned_test)
+        true_target = np.full((len(preds),), self.target_label)
+        pred_labels = np.argmax(preds, axis=1)
+
+        success = np.sum(pred_labels == true_target)
+        asr = 100.0 * success / len(pred_labels)
+        return asr
diff --git a/art/estimators/certification/deep_z/pytorch.py b/art/estimators/certification/deep_z/pytorch.py
@@ -169,7 +169,7 @@ def concrete_forward(self, in_x: np.ndarray | "torch.Tensor") -> "torch.Tensor":
             # as reshapes are not modules we infer when the reshape from convolutional to dense occurs
             if self.reshape_op_num == op_num:
                 x = x.reshape((x.shape[0], -1))
-            x = op.concrete_forward(x)
+            x = op.concrete_forward(x)  # type: ignore
         return x
 
     def set_forward_mode(self, mode: str) -> None:
diff --git a/art/estimators/certification/interval/pytorch.py b/art/estimators/certification/interval/pytorch.py
@@ -179,7 +179,7 @@ def concrete_forward(self, in_x: np.ndarray | "torch.Tensor") -> "torch.Tensor":
             if isinstance(op, PyTorchIntervalConv2D) and self.forward_mode == "attack":
                 x = op.conv_forward(x)
             else:
-                x = op.concrete_forward(x)
+                x = op.concrete_forward(x)  # type: ignore
         return x
 
     def set_forward_mode(self, mode: str) -> None:
diff --git a/examples/dynamicbackdoorgan_demo.py b/examples/dynamicbackdoorgan_demo.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+"""DynamicBackdoorGAN_Demo.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1aMV5GZ7Z0cwuUl36NxFUsBU5RoJunCGA
+"""
+
+pip install adversarial-robustness-toolbox
+
+# Imports
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.utils.data import Subset
+from torchvision import datasets, transforms, models
+from art.estimators.classification import PyTorchClassifier
+from art.utils import to_categorical
+from art.attacks.poisoning import PoisoningAttackBackdoor
+
+# User Config
+config = {
+    "dataset": "CIFAR10",          # CIFAR10, CIFAR100, MNIST
+    "model_name": "resnet18",      # resnet18, resnet50, mobilenetv2, densenet121
+    "poison_ratio": 0.1,
+    "target_label": 0,             # Target label to which poisoned samples are mapped
+    "epochs": 30,
+    "batch_size": 128,
+    "epsilon": 0.5                 # Trigger strength
+}
+
+# #Trigger Generator:A small CNN that learns to generate input-specific triggers
+class TriggerGenerator(nn.Module):
+    def __init__(self, input_channels=3):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(32, input_channels, kernel_size=3, padding=1),
+            nn.Tanh()
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+# Custom Poisoning Attack: DynamicBackdoorGAN-This class defines how to poison data using the GAN trigger generator
+class DynamicBackdoorGAN(PoisoningAttackBackdoor):
+    def __init__(self, generator, target_label, backdoor_rate, classifier, epsilon=0.5):
+        super().__init__(perturbation=lambda x: x)
+        self.classifier = classifier
+        self.generator = generator.to(classifier.device)
+        self.target_label = target_label
+        self.backdoor_rate = backdoor_rate
+        self.epsilon = epsilon
+# Add trigger to a given image batch
+    def apply_trigger(self, images):
+        self.generator.eval()
+        with torch.no_grad():
+            images = nn.functional.interpolate(images, size=(32, 32), mode='bilinear')  # Resize images to ensure uniform dimension
+            triggers = self.generator(images.to(self.classifier.device)) #Generate dynamic, input-specific triggers using the trained TriggerGenerator
+            poisoned = (images.to(self.classifier.device) + self.epsilon * triggers).clamp(0, 1) # Clamp the pixel values to ensure they stay in the valid [0, 1] range.
+        return poisoned
+# Poison the training data by injecting dynamic triggers and changing labels
+    def poison(self, x, y):
+        # Convert raw image data (x) to torch tensors (float), and convert one-hot labels (y) to class indices-required by ART
+        x_tensor = torch.tensor(x).float()
+        y_tensor = torch.tensor(np.argmax(y, axis=1))
+        # Calculate total number of samples and how many should be poisoned(posion ratio=backdoor_rate)
+        batch_size = x_tensor.shape[0]
+        n_poison = int(self.backdoor_rate * batch_size)
+         # Apply the learned trigger to the first 'n_poison' samples
+        poisoned = self.apply_trigger(x_tensor[:n_poison])
+        # The remaining samples remain clean
+        clean = x_tensor[n_poison:].to(self.classifier.device)
+         # Combine poisoned and clean samples into a single batch
+        poisoned_images = torch.cat([poisoned, clean], dim=0).cpu().numpy()
+        # Modify the labels of poisoned samples to the attacker's target class
+        new_labels = y_tensor.clone()
+        new_labels[:n_poison] = self.target_label # Set the poisoned labels to the desired misclassification
+        # Convert all labels back to one-hot encoding (required by ART classifiers)
+        new_labels = to_categorical(new_labels.numpy(), nb_classes=self.classifier.nb_classes)
+        return poisoned_images.astype(np.float32), new_labels.astype(np.float32)
+#Evaluate the attack's success on test data
+    def evaluate(self, x_clean, y_clean):
+        x_tensor = torch.tensor(x_clean).float()
+        poisoned_test = self.apply_trigger(x_tensor).cpu().numpy().astype(np.float32)# Apply the trigger to every test image to create a poisoned test set
+
+        preds = self.classifier.predict(poisoned_test)
+        true_target = np.full((len(preds),), self.target_label)
+        pred_labels = np.argmax(preds, axis=1)
+
+        success = np.sum(pred_labels == true_target)
+        asr = 100.0 * success / len(pred_labels)
+        return asr
+
+# ✅ Utility: Load Data
+def get_data(dataset="CIFAR10", train_subset=None, test_subset=None):
+    if dataset in ["CIFAR10", "CIFAR100"]:
+        transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
+    elif dataset == "MNIST":
+        transform = transforms.Compose([
+            transforms.Grayscale(num_output_channels=3),
+            transforms.Resize((32, 32)),
+            transforms.ToTensor()
+        ])
+    else:
+        raise ValueError("Unsupported dataset")
+
+    if dataset == "CIFAR10":
+        dataset_cls = datasets.CIFAR10
+        num_classes = 10
+    elif dataset == "CIFAR100":
+        dataset_cls = datasets.CIFAR100
+        num_classes = 100
+    elif dataset == "MNIST":
+        dataset_cls = datasets.MNIST
+        num_classes = 10
+
+    train_set = dataset_cls(root="./data", train=True, download=True, transform=transform)
+    test_set = dataset_cls(root="./data", train=False, download=True, transform=transform)
+
+    if train_subset is not None:
+        train_set = Subset(train_set, range(train_subset))
+    if test_subset is not None:
+        test_set = Subset(test_set, range(test_subset))
+
+    x_train = torch.stack([x for x, _ in train_set]).numpy()
+    y_train = to_categorical([y for _, y in train_set], nb_classes=num_classes)
+
+    x_test = torch.stack([x for x, _ in test_set]).numpy()
+    y_test = to_categorical([y for _, y in test_set], nb_classes=num_classes)
+
+    return x_train, y_train, x_test, y_test, num_classes
+
+#  Utility: Get ART Classifier:Returns an ART-compatible classifier wrapped around a selected PyTorch model
+def get_classifier(config):
+    model_name = config["model_name"]
+    nb_classes = config["nb_classes"]
+    input_shape = config["input_shape"]
+    lr = config.get("learning_rate", 0.001)
+
+    if model_name == "resnet18":
+        model = models.resnet18(num_classes=nb_classes)
+    elif model_name == "resnet50":
+        model = models.resnet50(num_classes=nb_classes)
+    elif model_name == "mobilenetv2":
+        model = models.mobilenet_v2(num_classes=nb_classes)
+    elif model_name == "densenet121":
+        model = models.densenet121(num_classes=nb_classes)
+    else:
+        raise ValueError(f"Unsupported model: {model_name}")
+
+    loss = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+
+    classifier = PyTorchClassifier(
+        model=model,
+        loss=loss,
+        optimizer=optimizer,
+        input_shape=input_shape,
+        nb_classes=nb_classes,
+        clip_values=(0.0, 1.0),
+        device_type="gpu" if torch.cuda.is_available() else "cpu"
+    )
+    return classifier
+
+# Full Experiment:Runs both clean training and poisoned training, and evaluates the effectiveness of the backdoor attack
+def run_dynamic_backdoor_experiment(config):
+    x_train, y_train, x_test, y_test, num_classes = get_data(
+        dataset=config["dataset"],
+        train_subset=config.get("train_subset"),
+        test_subset=config.get("test_subset")
+    )
+    config["nb_classes"] = num_classes
+    config["input_shape"] = x_train.shape[1:]
+
+    classifier = get_classifier(config)
+
+    # Clean training
+    classifier.fit(x_train, y_train, nb_epochs=config["epochs"], batch_size=config["batch_size"])
+    clean_acc = np.mean(np.argmax(classifier.predict(x_test), axis=1) == np.argmax(y_test, axis=1))
+    print(f"Clean Accuracy: {clean_acc * 100:.2f}%")
+
+    # Poison training
+    generator = TriggerGenerator()
+    attack = DynamicBackdoorGAN(
+        generator,
+        config["target_label"],
+        config["poison_ratio"],
+        classifier,
+        epsilon=config["epsilon"]
+    )
+    x_poison, y_poison = attack.poison(x_train, y_train)
+
+    classifier.fit(x_poison, y_poison, nb_epochs=config["epochs"], batch_size=config["batch_size"])
+    poisoned_acc = np.mean(np.argmax(classifier.predict(x_test), axis=1) == np.argmax(y_test, axis=1))
+    print(f"Poisoned Accuracy: {poisoned_acc * 100:.2f}%")
+
+    asr = attack.evaluate(x_test, y_test)
+    print(f" Attack Success Rate (ASR): {asr:.2f}%")
+
+# ✅ Run
+run_dynamic_backdoor_experiment(config)
diff --git a/requirements_test.txt b/requirements_test.txt