Skip to content

Commit 24b1633

Browse files
authored
Merge pull request #605 from Trusted-AI/asr_attack_pytorch
ASR attack pytorch
2 parents aa16e05 + c810d53 commit 24b1633

File tree

12 files changed

+2156
-0
lines changed

12 files changed

+2156
-0
lines changed

art/attacks/evasion/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@
4242
from art.attacks.evasion.square_attack import SquareAttack
4343
from art.attacks.evasion.simba import SimBA
4444
from art.attacks.evasion.shapeshifter import ShapeShifter
45+
from art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch import ImperceptibleASRPytorch

art/attacks/evasion/imperceptible_asr/__init__.py

Whitespace-only changes.

art/attacks/evasion/imperceptible_asr/imperceptible_asr_pytorch.py

Lines changed: 856 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""
2+
Module containing estimators for speech recognition.
3+
"""
4+
from art.estimators.speech_recognition.speech_recognizer import SpeechRecognizerMixin
5+
6+
from art.estimators.speech_recognition.pytorch_deep_speech import PyTorchDeepSpeech

art/estimators/speech_recognition/pytorch_deep_speech.py

Lines changed: 644 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements mixin abstract base class for all speech recognizers in ART.
20+
"""
21+
22+
from abc import ABC
23+
24+
25+
class SpeechRecognizerMixin(ABC):
26+
"""
27+
Mix-in Base class for ART speech recognizers.
28+
"""

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ Supported Machine Learning Libraries
8080
modules/estimators/generation
8181
modules/estimators/object_detection
8282
modules/estimators/regression
83+
modules/estimators/speech_recognition
8384
modules/metrics
8485
modules/wrappers
8586
modules/data_generators
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
:mod:`art.estimators.speech_recognition`
2+
========================================
3+
.. automodule:: art.estimators.speech_recognition
4+
5+
Mixin Base Class Speech Recognizer
6+
----------------------------------
7+
.. autoclass:: SpeechRecognizerMixin
8+
:members:
9+
:special-members: __init__
10+
:inherited-members:
11+
12+
Speech Recognizer Deep Speech
13+
-----------------------------
14+
.. autoclass:: PyTorchDeepSpeech
15+
:members:
16+
:special-members: __init__
17+
:inherited-members:

run_tests.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ declare -a classifiers=("tests/estimators/certification/test_randomized_smoothin
9999

100100
declare -a object_detectors=("tests/estimators/object_detection/test_tensorflow_faster_rcnn.py")
101101

102+
declare -a speech_recognizers=("tests/estimators/speech_recognition/test_pytorch_deep_speech.py")
103+
102104
declare -a defences=("tests/defences/test_adversarial_trainer.py" \
103105
"tests/defences/test_adversarial_trainer_madry_pgd.py" \
104106
"tests/defences/test_class_labels.py" \
@@ -137,6 +139,7 @@ declare -a art=("tests/test_data_generators.py" \
137139
tests_modules=("attacks" \
138140
"classifiers" \
139141
"object_detectors" \
142+
"speech_recognizers" \
140143
"defences" \
141144
"metrics" \
142145
"wrappers" \
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
from __future__ import absolute_import, division, print_function, unicode_literals
19+
20+
import logging
21+
import importlib
22+
23+
import torch
24+
import numpy as np
25+
import pytest
26+
27+
from tests.utils import master_seed
28+
29+
deepspeech_pytorch_spec = importlib.util.find_spec("deepspeech_pytorch")
30+
deepspeech_pytorch_found = deepspeech_pytorch_spec is not None
31+
32+
apex_spec = importlib.util.find_spec("apex")
33+
if apex_spec is not None:
34+
amp_spec = importlib.util.find_spec("apex.amp")
35+
else:
36+
amp_spec = None
37+
amp_found = amp_spec is not None
38+
39+
logger = logging.getLogger(__name__)
40+
41+
42+
@pytest.mark.skipif(
43+
not deepspeech_pytorch_found,
44+
reason="Skip unittests if deep speech module is not found because of pre-trained model.",
45+
)
46+
@pytest.mark.skipif(not amp_found, reason="Skip unittests if apex module is not found.")
47+
class TestImperceptibleASRPytorch:
48+
"""
49+
This class tests the ImperceptibleASRPytorch attack.
50+
"""
51+
52+
@pytest.fixture
53+
def setup_class(self):
54+
master_seed(seed=1234)
55+
56+
# Small data for testing
57+
x1 = np.array(
58+
[
59+
-1.0376293e-03,
60+
-1.0681478e-03,
61+
-1.0986663e-03,
62+
-1.1291848e-03,
63+
-1.1291848e-03,
64+
-1.1291848e-03,
65+
-1.1902219e-03,
66+
-1.1597034e-03,
67+
-1.1902219e-03,
68+
-1.1291848e-03,
69+
-1.1291848e-03,
70+
-1.0681478e-03,
71+
-9.1555528e-04,
72+
]
73+
* 100
74+
)
75+
76+
x2 = np.array(
77+
[
78+
-1.8311106e-04,
79+
-1.2207404e-04,
80+
-6.1037019e-05,
81+
0.0000000e00,
82+
3.0518509e-05,
83+
0.0000000e00,
84+
-3.0518509e-05,
85+
0.0000000e00,
86+
0.0000000e00,
87+
9.1555528e-05,
88+
2.1362957e-04,
89+
3.3570360e-04,
90+
4.2725913e-04,
91+
4.5777764e-04,
92+
-1.8311106e-04,
93+
]
94+
* 100
95+
)
96+
97+
x3 = np.array(
98+
[
99+
-8.2399976e-04,
100+
-7.0192572e-04,
101+
-5.4933317e-04,
102+
-4.2725913e-04,
103+
-3.6622211e-04,
104+
-2.7466659e-04,
105+
-2.1362957e-04,
106+
5.4933317e-04,
107+
5.7985168e-04,
108+
6.1037019e-04,
109+
6.7140721e-04,
110+
7.0192572e-04,
111+
6.7140721e-04,
112+
-1.5259255e-04,
113+
]
114+
* 100
115+
)
116+
117+
self.x = np.array([x1, x2, x3])
118+
119+
# Create labels
120+
self.y = np.array(["S", "I", "GD"])
121+
122+
@pytest.mark.only_with_platform("pytorch")
123+
def test_all(self, _test_all):
124+
pass
125+
126+
@pytest.fixture(params=[False, True])
127+
def _test_all(self, request, setup_class):
128+
# Only import if deep speech module is available
129+
from art.estimators.speech_recognition.pytorch_deep_speech import PyTorchDeepSpeech
130+
from art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch import ImperceptibleASRPytorch
131+
132+
# Without amp
133+
if request.param is False:
134+
# Create DeepSpeech estimator
135+
speech_recognizer = PyTorchDeepSpeech(pretrained_model="librispeech")
136+
137+
# Create attack
138+
asr_attack = ImperceptibleASRPytorch(
139+
estimator=speech_recognizer,
140+
initial_eps=0.001,
141+
max_iter_1st_stage=50,
142+
max_iter_2nd_stage=50,
143+
learning_rate_1st_stage=0.00001,
144+
learning_rate_2nd_stage=0.001,
145+
optimizer_1st_stage=torch.optim.SGD,
146+
optimizer_2nd_stage=torch.optim.SGD,
147+
global_max_length=2000,
148+
initial_rescale=1.0,
149+
rescale_factor=0.8,
150+
num_iter_adjust_rescale=5,
151+
initial_alpha=0.01,
152+
increase_factor_alpha=1.2,
153+
num_iter_increase_alpha=5,
154+
decrease_factor_alpha=0.8,
155+
num_iter_decrease_alpha=5,
156+
batch_size=2,
157+
use_amp=False,
158+
opt_level="O1",
159+
loss_scale=1,
160+
)
161+
162+
# With amp
163+
else:
164+
# Create DeepSpeech estimator
165+
speech_recognizer = PyTorchDeepSpeech(pretrained_model="librispeech", device_type="gpu", use_amp=True)
166+
167+
# Create attack
168+
asr_attack = ImperceptibleASRPytorch(
169+
estimator=speech_recognizer,
170+
initial_eps=0.001,
171+
max_iter_1st_stage=50,
172+
max_iter_2nd_stage=50,
173+
learning_rate_1st_stage=0.00001,
174+
learning_rate_2nd_stage=0.001,
175+
optimizer_1st_stage=torch.optim.SGD,
176+
optimizer_2nd_stage=torch.optim.SGD,
177+
global_max_length=2000,
178+
initial_rescale=1.0,
179+
rescale_factor=0.8,
180+
num_iter_adjust_rescale=2,
181+
initial_alpha=0.01,
182+
increase_factor_alpha=1.2,
183+
num_iter_increase_alpha=2,
184+
decrease_factor_alpha=0.8,
185+
num_iter_decrease_alpha=2,
186+
batch_size=2,
187+
use_amp=True,
188+
opt_level="O1",
189+
loss_scale=1,
190+
)
191+
192+
# Test transcription output
193+
transcriptions = speech_recognizer.predict(self.x, batch_size=2, transcription_output=True)
194+
195+
expected_transcriptions = np.array(["", "", ""])
196+
assert (expected_transcriptions == transcriptions).all()
197+
198+
# Generate attack
199+
x_adv = asr_attack.generate(self.x, self.y)
200+
201+
# Test shape
202+
for i in range(3):
203+
assert x_adv[i].shape == self.x[i].shape
204+
205+
# Test transcription adversarial output
206+
# This test is commented by now because of the difference in the prediction function of the estimator
207+
# in the eval() mode vs the train() mode. This test is already tested with the train() mode of the estimator
208+
# and it passed. For the eval() mode, we need to test on much larger data sets, i.e., with increasing
209+
# batch size to hundreds.
210+
211+
# adv_transcriptions = speech_recognizer.predict(x_adv, batch_size=2, transcription_output=True)
212+
# assert (adv_transcriptions == self.y).all()
213+
214+
215+
if __name__ == "__main__":
216+
pytest.cmdline.main("-q -s {} --mlFramework=pytorch --durations=0".format(__file__).split(" "))

0 commit comments

Comments
 (0)