Apply review comments

hesseltuinhof · hesseltuinhof · commit 43890d183951 · 2020-11-27T21:32:08.000Z
Signed-off-by: Hessel Tuinhof &lt;hessel.tuinhof@ibm.com&gt;
diff --git a/art/attacks/evasion/imperceptible_asr/imperceptible_asr.py b/art/attacks/evasion/imperceptible_asr/imperceptible_asr.py
@@ -40,6 +40,8 @@
     from tensorflow.compat.v1 import Tensor
     from torch import Tensor as PTensor
 
+    from art.utils import SPEECH_RECOGNIZER_TYPE
+
 logger = logging.getLogger(__name__)
 
 
@@ -65,7 +67,7 @@ class ImperceptibleASR(EvasionAttack):
 
     def __init__(
         self,
-        estimator: Union["PyTorchEstimator", "TensorFlowV2Estimator"],
+        estimator: "SPEECH_RECOGNIZER_TYPE",
         masker: "PsychoacousticMasker",
         eps: float = 2000.0,
         learning_rate_1: float = 100.0,
@@ -349,7 +351,7 @@ def _loss_gradient_masking_threshold(
 
     def _loss_gradient_masking_threshold_tf(
         self, perturbation: "Tensor", psd_maximum_stabilized: "Tensor", masking_threshold_stabilized: "Tensor"
-    ) -> "Tensor":
+    ) -> Union["Tensor", "Tensor"]:
         """
         Compute loss gradient of the masking threshold loss in TensorFlow.
 
@@ -471,7 +473,6 @@ def _approximate_power_spectral_density_torch(
         psd_matrix_approximated = pow(10.0, 9.6) / torch.unsqueeze(psd_maximum_stabilized, 1) * psd_matrix
 
         # return PSD matrix such that shape is (batch_size, window_size // 2 + 1, frame_length)
-        # return torch.transpose(psd_matrix_approximated, 1, 2)
         return psd_matrix_approximated
 
     def _check_params(self) -> None:
diff --git a/art/utils.py b/art/utils.py
@@ -93,6 +93,9 @@
     from art.estimators.object_detection.pytorch_faster_rcnn import PyTorchFasterRCNN
     from art.estimators.object_detection.tensorflow_faster_rcnn import TensorFlowFasterRCNN
 
+    from art.estimators.speech_recognition.pytorch_deep_speech import PyTorchDeepSpeech
+    from art.estimators.speech_recognition.tensorflow_lingvo import TensorFlowLingvoAsr
+
     CLASSIFIER_TYPE = Union[
         Classifier,
         BlackBoxClassifier,
@@ -172,6 +175,10 @@
         ObjectDetector, PyTorchFasterRCNN, TensorFlowFasterRCNN,
     ]
 
+    SPEECH_RECOGNIZER_TYPE = Union[
+        PyTorchDeepSpeech,
+        TensorFlowLingvoAsr,
+    ]
 
 # --------------------------------------------------------------------------------------------------------- DEPRECATION
 
diff --git a/tests/attacks/evasion/conftest.py b/tests/attacks/evasion/conftest.py
@@ -69,7 +69,7 @@ def audio_batch_padded():
 def asr_dummy_estimator(framework):
     def _asr_dummy_estimator(**kwargs):
         asr_dummy = None
-        if framework == "tensorflow2":
+        if framework == "tensorflow2v1":
 
             class TensorFlowV2AsrDummy(TensorFlowV2Estimator, SpeechRecognizerMixin):
                 def get_activations():