Skip to content

Commit 2a60955

Browse files
committed
⚡ Add single channel conversion in reading audio
1 parent 295b132 commit 2a60955

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
setuptools.setup(
3939
name="TensorFlowASR",
40-
version="0.4.1",
40+
version="0.4.2",
4141
author="Huy Le Nguyen",
4242
author_email="[email protected]",
4343
description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",

tensorflow_asr/featurizers/speech_featurizers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@
2626

2727
def read_raw_audio(audio, sample_rate=16000):
2828
if isinstance(audio, str):
29-
wave, _ = librosa.load(os.path.expanduser(audio), sr=sample_rate)
29+
wave, _ = librosa.load(os.path.expanduser(audio), sr=sample_rate, mono=True)
3030
elif isinstance(audio, bytes):
3131
wave, sr = sf.read(io.BytesIO(audio))
32+
if wave.ndim > 1: wave = np.mean(wave, axis=-1)
3233
wave = np.asfortranarray(wave)
33-
if sr != sample_rate:
34-
wave = librosa.resample(wave, sr, sample_rate)
34+
if sr != sample_rate: wave = librosa.resample(wave, sr, sample_rate)
3535
elif isinstance(audio, np.ndarray):
36+
if audio.ndim > 1: ValueError("input audio must be single channel")
3637
return audio
3738
else:
3839
raise ValueError("input audio must be either a path or bytes")

0 commit comments

Comments
 (0)