|
| 1 | +# MIT License |
| 2 | +# |
| 3 | +# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022 |
| 4 | +# |
| 5 | +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated |
| 6 | +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the |
| 7 | +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit |
| 8 | +# persons to whom the Software is furnished to do so, subject to the following conditions: |
| 9 | +# |
| 10 | +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the |
| 11 | +# Software. |
| 12 | +# |
| 13 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
| 14 | +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 15 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 16 | +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 17 | +# SOFTWARE. |
| 18 | +""" |
| 19 | +Adversarial perturbations designed to work for images. |
| 20 | +""" |
| 21 | +import numpy as np |
| 22 | +import librosa |
| 23 | + |
| 24 | + |
| 25 | +def insert_tone_trigger( |
| 26 | + x: np.ndarray, |
| 27 | + sampling_rate: int = 16000, |
| 28 | + frequency: int = 440, |
| 29 | + duration: float = 0.1, |
| 30 | + random: bool = False, |
| 31 | + shift: int = 0, |
| 32 | + scale: float = 0.1, |
| 33 | +) -> np.ndarray: |
| 34 | + """ |
| 35 | + Adds a 'tone' with a given frequency to audio example. Works for a single example or a batch of examples. |
| 36 | +
|
| 37 | + :param x: N x L matrix or length L array, where N is number of examples, L is the length in number of samples. |
| 38 | + X is in range [-1,1]. |
| 39 | + :param sampling_rate: Positive integer denoting the sampling rate for x. |
| 40 | + :param frequency: Frequency of the tone to be added. |
| 41 | + :param duration: Duration of the tone to be added. |
| 42 | + :param random: Flag indicating whether the trigger should be randomly placed. |
| 43 | + :param shift: Number of samples from the left to shift the trigger (when not using random placement). |
| 44 | + :param scale: Scaling factor for mixing the trigger. |
| 45 | + :return: Backdoored audio. |
| 46 | + """ |
| 47 | + n_dim = len(x.shape) |
| 48 | + if n_dim > 2: |
| 49 | + raise ValueError("Invalid array shape " + str(x.shape)) |
| 50 | + |
| 51 | + if n_dim == 2: |
| 52 | + return np.array( |
| 53 | + [ |
| 54 | + insert_tone_trigger(single_audio, sampling_rate, frequency, duration, random, shift, scale) |
| 55 | + for single_audio in x |
| 56 | + ] |
| 57 | + ) |
| 58 | + |
| 59 | + original_dtype = x.dtype |
| 60 | + audio = np.copy(x) |
| 61 | + length = audio.shape[0] |
| 62 | + |
| 63 | + tone_trigger = librosa.tone(frequency, sr=sampling_rate, duration=duration) |
| 64 | + |
| 65 | + bd_length = tone_trigger.shape[0] |
| 66 | + if bd_length > length: |
| 67 | + print("audio shape:", audio.shape) |
| 68 | + print("trigger shape:", tone_trigger.shape) |
| 69 | + raise ValueError("Backdoor audio does not fit inside the original audio.") |
| 70 | + |
| 71 | + if random: |
| 72 | + shift = np.random.randint(length - bd_length) |
| 73 | + |
| 74 | + if shift + bd_length > length: |
| 75 | + raise ValueError("Shift + Backdoor length is greater than audio's length.") |
| 76 | + |
| 77 | + trigger_shifted = np.zeros_like(audio) |
| 78 | + trigger_shifted[shift : shift + bd_length] = np.copy(tone_trigger) |
| 79 | + |
| 80 | + audio += scale * trigger_shifted |
| 81 | + |
| 82 | + return audio.astype(original_dtype) |
| 83 | + |
| 84 | + |
| 85 | +def insert_audio_trigger( |
| 86 | + x: np.ndarray, |
| 87 | + sampling_rate: int = 16000, |
| 88 | + backdoor_path: str = "../../../utils/data/backdoors/cough_trigger.wav", |
| 89 | + duration: float = 1.0, |
| 90 | + random: bool = False, |
| 91 | + shift: int = 0, |
| 92 | + scale: float = 0.1, |
| 93 | +) -> np.ndarray: |
| 94 | + """ |
| 95 | + Adds an audio backdoor trigger to a set of audio examples. Works for a single example or a batch of examples. |
| 96 | +
|
| 97 | + :param x: N x L matrix or length L array, where N is number of examples, L is the length in number of samples. |
| 98 | + X is in range [-1,1]. |
| 99 | + :param sampling_rate: Positive integer denoting the sampling rate for x. |
| 100 | + :param backdoor_path: The path to the audio to insert as a trigger. |
| 101 | + :param duration: Duration of the trigger in seconds. Default `None` if full trigger is to be used. |
| 102 | + :param random: Flag indicating whether the trigger should be randomly placed. |
| 103 | + :param shift: Number of samples from the left to shift the trigger (when not using random placement). |
| 104 | + :param scale: Scaling factor for mixing the trigger. |
| 105 | + :return: Backdoored audio. |
| 106 | + """ |
| 107 | + n_dim = len(x.shape) |
| 108 | + if n_dim > 2: |
| 109 | + raise ValueError("Invalid array shape " + str(x.shape)) |
| 110 | + |
| 111 | + if n_dim == 2: |
| 112 | + return np.array( |
| 113 | + [ |
| 114 | + insert_audio_trigger(single_audio, sampling_rate, backdoor_path, duration, random, shift, scale) |
| 115 | + for single_audio in x |
| 116 | + ] |
| 117 | + ) |
| 118 | + |
| 119 | + original_dtype = x.dtype |
| 120 | + audio = np.copy(x) |
| 121 | + |
| 122 | + length = audio.shape[0] |
| 123 | + |
| 124 | + trigger, bd_sampling_rate = librosa.load(backdoor_path, mono=True, sr=None, duration=duration) |
| 125 | + |
| 126 | + if sampling_rate != bd_sampling_rate: |
| 127 | + print( |
| 128 | + "Backdoor sampling rate does not match with the sampling rate provided. " |
| 129 | + "Resampling the backdoor to match the sampling rate." |
| 130 | + ) |
| 131 | + trigger, _ = librosa.load(backdoor_path, mono=True, sr=sampling_rate, duration=duration) |
| 132 | + |
| 133 | + bd_length = trigger.shape[0] |
| 134 | + |
| 135 | + if bd_length > length: |
| 136 | + raise ValueError("Backdoor audio does not fit inside the original audio.") |
| 137 | + |
| 138 | + if random: |
| 139 | + shift = np.random.randint(length - bd_length) |
| 140 | + |
| 141 | + if shift + bd_length > length: |
| 142 | + raise ValueError("Shift + Backdoor length is greater than audio's length.") |
| 143 | + |
| 144 | + trigger_shifted = np.zeros_like(audio) |
| 145 | + trigger_shifted[shift : shift + bd_length] = np.copy(trigger) |
| 146 | + |
| 147 | + audio += scale * trigger_shifted |
| 148 | + |
| 149 | + return audio.astype(original_dtype) |
0 commit comments