Skip to content

Commit a04e5dc

Browse files
committed
adding audio conversion to appropriate format in the fly
1 parent 670203e commit a04e5dc

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

convert_wavs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@ def convert_audio(audio_path, target_path, remove=False):
1717
remove (bool): whether to remove the old file after converting
1818
Note that this function requires ffmpeg installed in your system."""
1919

20-
os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
20+
v = os.system(f"ffmpeg -i {audio_path} -ac 1 -ar 16000 {target_path}")
2121
# os.system(f"ffmpeg -i {audio_path} -ac 1 {target_path}")
2222
if remove:
2323
os.remove(audio_path)
24+
return v
2425

2526

2627
def convert_audios(path, target_path, remove=False):

utils.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import librosa
33
import numpy as np
44
import pickle
5+
import os
6+
from convert_wavs import convert_audio
57

68

79
AVAILABLE_EMOTIONS = {
@@ -59,7 +61,23 @@ def extract_feature(file_name, **kwargs):
5961
mel = kwargs.get("mel")
6062
contrast = kwargs.get("contrast")
6163
tonnetz = kwargs.get("tonnetz")
62-
with soundfile.SoundFile(file_name) as sound_file:
64+
try:
65+
with soundfile.SoundFile(file_name) as sound_file:
66+
pass
67+
except RuntimeError:
68+
# not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
69+
# get the basename
70+
basename = os.path.basename(file_name)
71+
dirname = os.path.dirname(file_name)
72+
name, ext = os.path.splitext(basename)
73+
new_basename = f"{name}_c.wav"
74+
new_filename = os.path.join(dirname, new_basename)
75+
v = convert_audio(file_name, new_filename)
76+
if v:
77+
raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.")
78+
else:
79+
new_filename = file_name
80+
with soundfile.SoundFile(new_filename) as sound_file:
6381
X = sound_file.read(dtype="float32")
6482
sample_rate = sound_file.samplerate
6583
if chroma or contrast:

0 commit comments

Comments
 (0)