Skip to content

Commit 3187fcb

Browse files
Remove augmentation effector (#4002)
* Remove effector from data augmentation tutorial * Remove phone recording section --------- Co-authored-by: Sam Anklesaria <[email protected]>
1 parent c99d0d1 commit 3187fcb

File tree

1 file changed

+2
-185
lines changed

1 file changed

+2
-185
lines changed

examples/tutorials/audio_data_augmentation_tutorial.py

Lines changed: 2 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -40,46 +40,16 @@
4040

4141

4242
######################################################################
43-
# Applying effects and filtering
43+
# Loading the data
4444
# ------------------------------
4545
#
46-
# :py:class:`torchaudio.io.AudioEffector` allows for directly applying
47-
# filters and codecs to Tensor objects, in a similar way as ``ffmpeg``
48-
# command
49-
#
50-
# `AudioEffector Usages <./effector_tutorial.html>` explains how to use
51-
# this class, so for the detail, please refer to the tutorial.
52-
#
5346

54-
# Load the data
5547
waveform1, sample_rate = torchaudio.load(SAMPLE_WAV, channels_first=False)
5648

57-
# Define effects
58-
effect = ",".join(
59-
[
60-
"lowpass=frequency=300:poles=1", # apply single-pole lowpass filter
61-
"atempo=0.8", # reduce the speed
62-
"aecho=in_gain=0.8:out_gain=0.9:delays=200:decays=0.3|delays=400:decays=0.3"
63-
# Applying echo gives some dramatic feeling
64-
],
65-
)
66-
67-
68-
# Apply effects
69-
def apply_effect(waveform, sample_rate, effect):
70-
effector = torchaudio.io.AudioEffector(effect=effect)
71-
return effector.apply(waveform, sample_rate)
72-
73-
74-
waveform2 = apply_effect(waveform1, sample_rate, effect)
75-
7649
print(waveform1.shape, sample_rate)
77-
print(waveform2.shape, sample_rate)
7850

7951
######################################################################
80-
# Note that the number of frames and number of channels are different from
81-
# those of the original after the effects are applied. Let’s listen to the
82-
# audio.
52+
# Let’s listen to the audio.
8353
#
8454

8555

@@ -124,24 +94,11 @@ def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
12494

12595

12696
######################################################################
127-
# Original
128-
# ~~~~~~~~
129-
#
13097

13198
plot_waveform(waveform1.T, sample_rate, title="Original", xlim=(-0.1, 3.2))
13299
plot_specgram(waveform1.T, sample_rate, title="Original", xlim=(0, 3.04))
133100
Audio(waveform1.T, rate=sample_rate)
134101

135-
######################################################################
136-
# Effects applied
137-
# ~~~~~~~~~~~~~~~
138-
#
139-
140-
plot_waveform(waveform2.T, sample_rate, title="Effects Applied", xlim=(-0.1, 3.2))
141-
plot_specgram(waveform2.T, sample_rate, title="Effects Applied", xlim=(0, 3.04))
142-
Audio(waveform2.T, rate=sample_rate)
143-
144-
145102
######################################################################
146103
# Simulating room reverberation
147104
# -----------------------------
@@ -265,143 +222,3 @@ def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
265222
plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
266223
plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
267224
Audio(noisy_speech, rate=sample_rate)
268-
269-
270-
######################################################################
271-
# Applying codec to Tensor object
272-
# -------------------------------
273-
#
274-
# :py:class:`torchaudio.io.AudioEffector` can also apply codecs to
275-
# a Tensor object.
276-
#
277-
278-
waveform, sample_rate = torchaudio.load(SAMPLE_SPEECH, channels_first=False)
279-
280-
281-
def apply_codec(waveform, sample_rate, format, encoder=None):
282-
encoder = torchaudio.io.AudioEffector(format=format, encoder=encoder)
283-
return encoder.apply(waveform, sample_rate)
284-
285-
286-
######################################################################
287-
# Original
288-
# ~~~~~~~~
289-
#
290-
291-
plot_waveform(waveform.T, sample_rate, title="Original")
292-
plot_specgram(waveform.T, sample_rate, title="Original")
293-
Audio(waveform.T, rate=sample_rate)
294-
295-
######################################################################
296-
# 8 bit mu-law
297-
# ~~~~~~~~~~~~
298-
#
299-
300-
mulaw = apply_codec(waveform, sample_rate, "wav", encoder="pcm_mulaw")
301-
plot_waveform(mulaw.T, sample_rate, title="8 bit mu-law")
302-
plot_specgram(mulaw.T, sample_rate, title="8 bit mu-law")
303-
Audio(mulaw.T, rate=sample_rate)
304-
305-
######################################################################
306-
# G.722
307-
# ~~~~~
308-
#
309-
310-
g722 = apply_codec(waveform, sample_rate, "g722")
311-
plot_waveform(g722.T, sample_rate, title="G.722")
312-
plot_specgram(g722.T, sample_rate, title="G.722")
313-
Audio(g722.T, rate=sample_rate)
314-
315-
######################################################################
316-
# Vorbis
317-
# ~~~~~~
318-
#
319-
320-
vorbis = apply_codec(waveform, sample_rate, "ogg", encoder="vorbis")
321-
plot_waveform(vorbis.T, sample_rate, title="Vorbis")
322-
plot_specgram(vorbis.T, sample_rate, title="Vorbis")
323-
Audio(vorbis.T, rate=sample_rate)
324-
325-
######################################################################
326-
# Simulating a phone recoding
327-
# ---------------------------
328-
#
329-
# Combining the previous techniques, we can simulate audio that sounds
330-
# like a person talking over a phone in a echoey room with people talking
331-
# in the background.
332-
#
333-
334-
sample_rate = 16000
335-
original_speech, sample_rate = torchaudio.load(SAMPLE_SPEECH)
336-
337-
plot_specgram(original_speech, sample_rate, title="Original")
338-
339-
# Apply RIR
340-
rir_applied = F.fftconvolve(speech, rir)
341-
342-
plot_specgram(rir_applied, sample_rate, title="RIR Applied")
343-
344-
# Add background noise
345-
# Because the noise is recorded in the actual environment, we consider that
346-
# the noise contains the acoustic feature of the environment. Therefore, we add
347-
# the noise after RIR application.
348-
noise, _ = torchaudio.load(SAMPLE_NOISE)
349-
noise = noise[:, : rir_applied.shape[1]]
350-
351-
snr_db = torch.tensor([8])
352-
bg_added = F.add_noise(rir_applied, noise, snr_db)
353-
354-
plot_specgram(bg_added, sample_rate, title="BG noise added")
355-
356-
# Apply filtering and change sample rate
357-
effect = ",".join(
358-
[
359-
"lowpass=frequency=4000:poles=1",
360-
"compand=attacks=0.02:decays=0.05:points=-60/-60|-30/-10|-20/-8|-5/-8|-2/-8:gain=-8:volume=-7:delay=0.05",
361-
]
362-
)
363-
364-
filtered = apply_effect(bg_added.T, sample_rate, effect)
365-
sample_rate2 = 8000
366-
367-
plot_specgram(filtered.T, sample_rate2, title="Filtered")
368-
369-
# Apply telephony codec
370-
codec_applied = apply_codec(filtered, sample_rate2, "g722")
371-
plot_specgram(codec_applied.T, sample_rate2, title="G.722 Codec Applied")
372-
373-
374-
######################################################################
375-
# Original speech
376-
# ~~~~~~~~~~~~~~~
377-
#
378-
379-
Audio(original_speech, rate=sample_rate)
380-
381-
######################################################################
382-
# RIR applied
383-
# ~~~~~~~~~~~
384-
#
385-
386-
Audio(rir_applied, rate=sample_rate)
387-
388-
######################################################################
389-
# Background noise added
390-
# ~~~~~~~~~~~~~~~~~~~~~~
391-
#
392-
393-
Audio(bg_added, rate=sample_rate)
394-
395-
######################################################################
396-
# Filtered
397-
# ~~~~~~~~
398-
#
399-
400-
Audio(filtered.T, rate=sample_rate2)
401-
402-
######################################################################
403-
# Codec applied
404-
# ~~~~~~~~~~~~~
405-
#
406-
407-
Audio(codec_applied.T, rate=sample_rate2)

0 commit comments

Comments
 (0)