⚡ Add single channel conversion in reading audio

nglehuy · nglehuy · commit 2a609555c4d0 · 2020-12-11T22:23:37.000+07:00
diff --git a/setup.py b/setup.py
@@ -37,7 +37,7 @@
 
 setuptools.setup(
     name="TensorFlowASR",
-    version="0.4.1",
+    version="0.4.2",
     author="Huy Le Nguyen",
     author_email="nlhuy.cs.16@gmail.com",
     description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",
diff --git a/tensorflow_asr/featurizers/speech_featurizers.py b/tensorflow_asr/featurizers/speech_featurizers.py
@@ -26,13 +26,14 @@
 
 def read_raw_audio(audio, sample_rate=16000):
     if isinstance(audio, str):
-        wave, _ = librosa.load(os.path.expanduser(audio), sr=sample_rate)
+        wave, _ = librosa.load(os.path.expanduser(audio), sr=sample_rate, mono=True)
     elif isinstance(audio, bytes):
         wave, sr = sf.read(io.BytesIO(audio))
+        if wave.ndim > 1: wave = np.mean(wave, axis=-1)
         wave = np.asfortranarray(wave)
-        if sr != sample_rate:
-            wave = librosa.resample(wave, sr, sample_rate)
+        if sr != sample_rate: wave = librosa.resample(wave, sr, sample_rate)
     elif isinstance(audio, np.ndarray):
+        if audio.ndim > 1: ValueError("input audio must be single channel")
         return audio
     else:
         raise ValueError("input audio must be either a path or bytes")