Skip to content

Commit 8ba938d

Browse files
authored
Merge pull request #1702 from Trusted-AI/development_issue_1683
Add support for framework-specific preprocessing of object arrays
2 parents c8c6f6d + 1c81523 commit 8ba938d

File tree

10 files changed

+1955
-1305
lines changed

10 files changed

+1955
-1305
lines changed

.github/actions/deepspeech-v2/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ RUN cd warp-ctc/pytorch_binding && python setup.py install
3737

3838
RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
3939
RUN cd deepspeech.pytorch && git checkout V2.1
40-
RUN cd deepspeech.pytorch && pip install -r requirements_test.txt
40+
RUN cd deepspeech.pytorch && pip install -r requirements.txt
4141
RUN cd deepspeech.pytorch && pip install -e .
4242

4343
RUN pip install numba==0.50.0
4444
RUN pip install pytest-cov
45+
RUN pip install pydub==0.25.1

.github/actions/deepspeech-v3/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ RUN pip install torchaudio==0.6.0
3434
RUN pip install --no-build-isolation fairscale
3535

3636
RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
37-
RUN cd deepspeech.pytorch && pip install -r requirements_test.txt
37+
RUN cd deepspeech.pytorch && pip install -r requirements.txt
3838
RUN cd deepspeech.pytorch && pip install -e .
3939

4040
RUN pip install numba==0.50.0
4141
RUN pip install pytest-cov
42+
RUN pip install pydub==0.25.1

.github/workflows/ci-deepspeech-v2.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
test_deepspeech_v2:
2323
name: PyTorchDeepSpeech v2
2424
runs-on: ubuntu-latest
25-
container: minhitbk/art_testing_envs:deepspeech_v2
25+
container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v2
2626
steps:
2727
- name: Checkout Repo
2828
uses: actions/checkout@v3

.github/workflows/ci-deepspeech-v3.yml

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,13 @@ on:
1919
- cron: '0 8 * * 0'
2020

2121
jobs:
22-
test_deepspeech_v3:
23-
name: PyTorchDeepSpeech v3
24-
runs-on: ubuntu-latest
25-
container: minhitbk/art_testing_envs:deepspeech_v3
26-
steps:
27-
- name: Checkout Repo
28-
uses: actions/checkout@v3
29-
- name: Run Test Action
30-
uses: ./.github/actions/deepspeech-v3
31-
- name: Upload coverage to Codecov
32-
uses: codecov/codecov-action@v3
33-
with:
34-
fail_ci_if_error: true
3522
test_deepspeech_v3_torch_1_10:
3623
name: PyTorchDeepSpeech v3 / PyTorch 1.10
3724
runs-on: ubuntu-latest
3825
container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v3_torch_1_10
3926
steps:
4027
- name: Checkout Repo
41-
uses: actions/checkout@v2.4.0
28+
uses: actions/checkout@v3
4229
- name: Run Test Action
4330
uses: ./.github/actions/deepspeech-v3
4431
- name: Upload coverage to Codecov

art/defences/preprocessor/mp3_compression_pytorch.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,23 @@ def forward(
115115
:param y: Labels of the sample `x`. This function does not affect them in any way.
116116
:return: Compressed sample.
117117
"""
118+
import torch # lgtm [py/repeated-import]
119+
120+
ndim = x.ndim
121+
122+
if ndim == 1:
123+
x = torch.unsqueeze(x, dim=0)
124+
if self.channels_first:
125+
dim = 1
126+
else:
127+
dim = 2
128+
x = torch.unsqueeze(x, dim=dim)
129+
118130
x_compressed = self._compression_pytorch_numpy.apply(x)
131+
132+
if ndim == 1:
133+
x_compressed = torch.squeeze(x_compressed)
134+
119135
return x_compressed, y
120136

121137
def _check_params(self) -> None:

art/estimators/speech_recognition/pytorch_deep_speech.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -352,17 +352,17 @@ def predict(
352352
"""
353353
import torch # lgtm [py/repeated-import]
354354

355-
x_in = np.empty(len(x), dtype=object)
356-
x_in[:] = list(x)
355+
# Apply preprocessing
356+
x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
357+
358+
x_in = np.empty(len(x_preprocessed), dtype=object)
359+
x_in[:] = list(x_preprocessed)
357360

358361
# Put the model in the eval mode
359362
self._model.eval()
360363

361-
# Apply preprocessing
362-
x_preprocessed, _ = self._apply_preprocessing(x_in, y=None, fit=False)
363-
364364
# Transform x into the model input space
365-
inputs, _, input_rates, _, batch_idx = self._transform_model_input(x=x_preprocessed)
365+
inputs, _, input_rates, _, batch_idx = self._transform_model_input(x=x_in)
366366

367367
# Compute real input sizes
368368
input_sizes = input_rates.mul_(inputs.size()[-1]).int()
@@ -437,21 +437,19 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
437437
lengths. A possible example of `y` could be: `y = np.array(['SIXTY ONE', 'HELLO'])`.
438438
:return: Loss gradients of the same shape as `x`.
439439
"""
440-
x_in = np.empty(len(x), dtype=object)
441-
x_in[:] = list(x)
440+
# Apply preprocessing
441+
x_preprocessed, _ = self._apply_preprocessing(x, None, fit=False)
442+
443+
x_in = np.empty(len(x_preprocessed), dtype=object)
444+
x_in[:] = list(x_preprocessed)
442445

443446
# Put the model in the training mode, otherwise CUDA can't backpropagate through the model.
444447
# However, model uses batch norm layers which need to be frozen
445448
self._model.train()
446449
self.set_batchnorm(train=False)
447450

448-
# Apply preprocessing
449-
x_preprocessed, y_preprocessed = self._apply_preprocessing(x_in, y, fit=False)
450-
451451
# Transform data into the model input space
452-
inputs, targets, input_rates, target_sizes, _ = self._transform_model_input(
453-
x=x_preprocessed, y=y_preprocessed, compute_gradient=True
454-
)
452+
inputs, targets, input_rates, target_sizes, _ = self._transform_model_input(x=x_in, y=y, compute_gradient=True)
455453

456454
# Compute real input sizes
457455
input_sizes = input_rates.mul_(inputs.size()[-1]).int()
@@ -484,8 +482,8 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
484482

485483
# Get results
486484
results_list = []
487-
for i, _ in enumerate(x_preprocessed):
488-
results_list.append(x_preprocessed[i].grad.cpu().numpy().copy())
485+
for i, _ in enumerate(x_in):
486+
results_list.append(x_in[i].grad.cpu().numpy().copy())
489487

490488
results = np.array(results_list)
491489

@@ -494,7 +492,7 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
494492
results_[:] = list(results)
495493
results = results_
496494

497-
results = self._apply_preprocessing_gradient(x_in, results)
495+
results = self._apply_preprocessing_gradient(x, results)
498496

499497
if x.dtype != object:
500498
results = np.array([i for i in results], dtype=x.dtype) # pylint: disable=R1721
@@ -521,18 +519,19 @@ def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: in
521519
"""
522520
import random
523521

524-
x_in = np.empty(len(x), dtype=object)
525-
x_in[:] = list(x)
522+
# Apply preprocessing
523+
x_preprocessed, _ = self._apply_preprocessing(x, None, fit=True)
524+
y_preprocessed = y
525+
526+
x_in = np.empty(len(x_preprocessed), dtype=object)
527+
x_in[:] = list(x_preprocessed)
526528

527529
# Put the model in the training mode
528530
self._model.train()
529531

530532
if self.optimizer is None: # pragma: no cover
531533
raise ValueError("An optimizer is required to train the model, but none was provided.")
532534

533-
# Apply preprocessing
534-
x_preprocessed, y_preprocessed = self._apply_preprocessing(x_in, y, fit=True)
535-
536535
# Train with batch processing
537536
num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
538537
ind = np.arange(len(x_preprocessed))

0 commit comments

Comments
 (0)