Skip to content

Commit 8aa0aa6

Browse files
authored
Merge pull request #1740 from Swanand-Kadhe/development_audio_backdoor
Dirty-Label Backdoor Poisoning Attack for Audio
2 parents 520bf0b + 1ec261e commit 8aa0aa6

File tree

6 files changed

+1520
-1
lines changed

6 files changed

+1520
-1
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
Adversarial perturbations designed to work for images.
20+
"""
21+
import numpy as np
22+
import librosa
23+
24+
25+
def insert_tone_trigger(
26+
x: np.ndarray,
27+
sampling_rate: int = 16000,
28+
frequency: int = 440,
29+
duration: float = 0.1,
30+
random: bool = False,
31+
shift: int = 0,
32+
scale: float = 0.1,
33+
) -> np.ndarray:
34+
"""
35+
Adds a 'tone' with a given frequency to audio example. Works for a single example or a batch of examples.
36+
37+
:param x: N x L matrix or length L array, where N is number of examples, L is the length in number of samples.
38+
X is in range [-1,1].
39+
:param sampling_rate: Positive integer denoting the sampling rate for x.
40+
:param frequency: Frequency of the tone to be added.
41+
:param duration: Duration of the tone to be added.
42+
:param random: Flag indicating whether the trigger should be randomly placed.
43+
:param shift: Number of samples from the left to shift the trigger (when not using random placement).
44+
:param scale: Scaling factor for mixing the trigger.
45+
:return: Backdoored audio.
46+
"""
47+
n_dim = len(x.shape)
48+
if n_dim > 2:
49+
raise ValueError("Invalid array shape " + str(x.shape))
50+
51+
if n_dim == 2:
52+
return np.array(
53+
[
54+
insert_tone_trigger(single_audio, sampling_rate, frequency, duration, random, shift, scale)
55+
for single_audio in x
56+
]
57+
)
58+
59+
original_dtype = x.dtype
60+
audio = np.copy(x)
61+
length = audio.shape[0]
62+
63+
tone_trigger = librosa.tone(frequency, sr=sampling_rate, duration=duration)
64+
65+
bd_length = tone_trigger.shape[0]
66+
if bd_length > length:
67+
print("audio shape:", audio.shape)
68+
print("trigger shape:", tone_trigger.shape)
69+
raise ValueError("Backdoor audio does not fit inside the original audio.")
70+
71+
if random:
72+
shift = np.random.randint(length - bd_length)
73+
74+
if shift + bd_length > length:
75+
raise ValueError("Shift + Backdoor length is greater than audio's length.")
76+
77+
trigger_shifted = np.zeros_like(audio)
78+
trigger_shifted[shift : shift + bd_length] = np.copy(tone_trigger)
79+
80+
audio += scale * trigger_shifted
81+
82+
return audio.astype(original_dtype)
83+
84+
85+
def insert_audio_trigger(
86+
x: np.ndarray,
87+
sampling_rate: int = 16000,
88+
backdoor_path: str = "../../../utils/data/backdoors/cough_trigger.wav",
89+
duration: float = 1.0,
90+
random: bool = False,
91+
shift: int = 0,
92+
scale: float = 0.1,
93+
) -> np.ndarray:
94+
"""
95+
Adds an audio backdoor trigger to a set of audio examples. Works for a single example or a batch of examples.
96+
97+
:param x: N x L matrix or length L array, where N is number of examples, L is the length in number of samples.
98+
X is in range [-1,1].
99+
:param sampling_rate: Positive integer denoting the sampling rate for x.
100+
:param backdoor_path: The path to the audio to insert as a trigger.
101+
:param duration: Duration of the trigger in seconds. Default `None` if full trigger is to be used.
102+
:param random: Flag indicating whether the trigger should be randomly placed.
103+
:param shift: Number of samples from the left to shift the trigger (when not using random placement).
104+
:param scale: Scaling factor for mixing the trigger.
105+
:return: Backdoored audio.
106+
"""
107+
n_dim = len(x.shape)
108+
if n_dim > 2:
109+
raise ValueError("Invalid array shape " + str(x.shape))
110+
111+
if n_dim == 2:
112+
return np.array(
113+
[
114+
insert_audio_trigger(single_audio, sampling_rate, backdoor_path, duration, random, shift, scale)
115+
for single_audio in x
116+
]
117+
)
118+
119+
original_dtype = x.dtype
120+
audio = np.copy(x)
121+
122+
length = audio.shape[0]
123+
124+
trigger, bd_sampling_rate = librosa.load(backdoor_path, mono=True, sr=None, duration=duration)
125+
126+
if sampling_rate != bd_sampling_rate:
127+
print(
128+
"Backdoor sampling rate does not match with the sampling rate provided. "
129+
"Resampling the backdoor to match the sampling rate."
130+
)
131+
trigger, _ = librosa.load(backdoor_path, mono=True, sr=sampling_rate, duration=duration)
132+
133+
bd_length = trigger.shape[0]
134+
135+
if bd_length > length:
136+
raise ValueError("Backdoor audio does not fit inside the original audio.")
137+
138+
if random:
139+
shift = np.random.randint(length - bd_length)
140+
141+
if shift + bd_length > length:
142+
raise ValueError("Shift + Backdoor length is greater than audio's length.")
143+
144+
trigger_shifted = np.zeros_like(audio)
145+
trigger_shifted[shift : shift + bd_length] = np.copy(trigger)
146+
147+
audio += scale * trigger_shifted
148+
149+
return audio.astype(original_dtype)

notebooks/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ shows how to create an adversarial attack on a video action recognition classifi
2222
[adversarial_audio_examples.ipynb](adversarial_audio_examples.ipynb) [[on nbviewer](https://nbviewer.jupyter.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/adversarial_audio_examples.ipynb)]
2323
shows how to create adversarial examples of audio data with ART. Experiments in this notebook show how the waveform of a spoken digit of the AudioMNIST dataset can be modified with almost imperceptible changes so that the waveform gets mis-classified as different digit.
2424

25+
[poisoning_attack_backdoor_audio.ipynb](poisoning_attack_backdoor_audio.ipynb) [[on nbviewer](https://nbviewer.jupyter.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/poisoning_attack_backdoor_audio.ipynb)]
26+
demonstrates the dirty-label backdoor attack on a TensorflowV2 estimator for speech classification.
27+
2528
<p align="center">
2629
<img src="../utils/data/images/adversarial_audio_waveform.png?raw=true" width="200" title="adversarial_audio_waveform">
2730
</p>

notebooks/poisoning_attack_backdoor_audio.ipynb

Lines changed: 1238 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
from __future__ import absolute_import, division, print_function, unicode_literals
19+
20+
import logging
21+
import numpy as np
22+
import pytest
23+
import os
24+
25+
from art.attacks.poisoning.perturbations.audio_perturbations import insert_tone_trigger, insert_audio_trigger
26+
27+
from tests.utils import ARTTestException
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
@pytest.mark.framework_agnostic
33+
def test_insert_tone_trigger(art_warning):
34+
try:
35+
# test single example
36+
audio = insert_tone_trigger(x=np.zeros(3200), sampling_rate=16000)
37+
assert audio.shape == (3200,)
38+
assert np.max(audio) != 0
39+
40+
# test single example with differet duration, frequency, and scale
41+
audio = insert_tone_trigger(x=np.zeros(3200), sampling_rate=16000, frequency=16000, duration=0.2, scale=0.5)
42+
assert audio.shape == (3200,)
43+
assert np.max(audio) != 0
44+
45+
# test a batch of examples
46+
audio = insert_tone_trigger(x=np.zeros((10, 3200)), sampling_rate=16000)
47+
assert audio.shape == (10, 3200)
48+
assert np.max(audio) != 0
49+
50+
# test single example with shift
51+
audio = insert_tone_trigger(x=np.zeros(3200), sampling_rate=16000, shift=10)
52+
assert audio.shape == (3200,)
53+
assert np.max(audio) != 0
54+
assert np.sum(audio[:10]) == 0
55+
56+
# test a batch of examples with random shift
57+
audio = insert_tone_trigger(x=np.zeros((10, 3200)), sampling_rate=16000, random=True)
58+
assert audio.shape == (10, 3200)
59+
assert np.max(audio) != 0
60+
61+
# test when length of backdoor is larger than that of audio signal
62+
with pytest.raises(ValueError):
63+
_ = insert_tone_trigger(x=np.zeros(3200), sampling_rate=16000, duration=0.3)
64+
65+
# test when shift + backdoor is larger than that of audio signal
66+
with pytest.raises(ValueError):
67+
_ = insert_tone_trigger(x=np.zeros(3200), sampling_rate=16000, duration=0.2, shift=5)
68+
69+
except ARTTestException as e:
70+
art_warning(e)
71+
72+
73+
@pytest.mark.framework_agnostic
74+
def test_insert_audio_trigger(art_warning):
75+
file_path = os.path.join(os.getcwd(), "utils/data/backdoors/cough_trigger.wav")
76+
try:
77+
# test single example
78+
audio = insert_audio_trigger(x=np.zeros(32000), sampling_rate=16000, backdoor_path=file_path)
79+
assert audio.shape == (32000,)
80+
assert np.max(audio) != 0
81+
82+
# test single example with differet duration and scale
83+
audio = insert_audio_trigger(
84+
x=np.zeros(32000),
85+
sampling_rate=16000,
86+
backdoor_path=file_path,
87+
duration=0.8,
88+
scale=0.5,
89+
)
90+
assert audio.shape == (32000,)
91+
assert np.max(audio) != 0
92+
93+
# test a batch of examples
94+
audio = insert_audio_trigger(x=np.zeros((10, 16000)), sampling_rate=16000, backdoor_path=file_path)
95+
assert audio.shape == (10, 16000)
96+
assert np.max(audio) != 0
97+
98+
# test single example with shift
99+
audio = insert_audio_trigger(x=np.zeros(32000), sampling_rate=16000, backdoor_path=file_path, shift=10)
100+
assert audio.shape == (32000,)
101+
assert np.max(audio) != 0
102+
assert np.sum(audio[:10]) == 0
103+
104+
# test a batch of examples with random shift
105+
audio = insert_audio_trigger(
106+
x=np.zeros((10, 32000)),
107+
sampling_rate=16000,
108+
backdoor_path=file_path,
109+
random=True,
110+
)
111+
assert audio.shape == (10, 32000)
112+
assert np.max(audio) != 0
113+
114+
# test when length of backdoor is larger than that of audio signal
115+
with pytest.raises(ValueError):
116+
_ = insert_audio_trigger(x=np.zeros(15000), sampling_rate=16000, backdoor_path=file_path)
117+
118+
# test when shift + backdoor is larger than that of audio signal
119+
with pytest.raises(ValueError):
120+
_ = insert_audio_trigger(
121+
x=np.zeros(16000),
122+
sampling_rate=16000,
123+
backdoor_path=file_path,
124+
duration=1,
125+
shift=5,
126+
)
127+
128+
except ARTTestException as e:
129+
art_warning(e)

tests/attacks/poison/test_backdoor_attack_dgm_red.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_poison_estimator_red(art_warning, image_dl_generator, x_target):
4242

4343
generator = red_attack.poison_estimator(z_trigger=z_trigger, x_target=x_target, max_iter=2)
4444
assert isinstance(generator, TensorFlowV2Generator)
45-
np.testing.assert_approx_equal(round(red_attack.fidelity(z_trigger, x_target).numpy(), 4), 0.33)
45+
np.testing.assert_approx_equal(round(red_attack.fidelity(z_trigger, x_target).numpy(), 4), 0.33, significant=2)
4646

4747
except ARTTestException as e:
4848
art_warning(e)
46.9 KB
Binary file not shown.

0 commit comments

Comments
 (0)