Skip to content
34 changes: 32 additions & 2 deletions nemo/collections/asr/parts/preprocessing/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@

except ModuleNotFoundError:
HAVE_PYDUB = False

import librosa

available_formats = sf.available_formats()
sf_supported_formats = ["." + i.lower() for i in available_formats.keys()]
sf_supported_formats.append(".opus")


ChannelSelectorType = Union[int, Iterable[int], str]
Expand Down Expand Up @@ -347,11 +348,40 @@ def from_file(
except RuntimeError as e:
logging.error(
f"Loading {audio_file} via SoundFile raised RuntimeError: `{e}`. "
f"NeMo will fallback to loading via pydub."
f"NeMo will fallback to loading via Librosa."
)

if hasattr(audio_file, "seek"):
audio_file.seek(0)
if samples is None:
try:
dtype = 'int32' if int_values else 'float32'
lr_sr = None if target_sr is None else target_sr

y, sr = librosa.load(
audio_file,
sr=lr_sr,
mono=False,
offset=(offset if offset is not None else 0.0),
duration=(duration if duration is not None and duration > 0 else None),
)

if y.ndim == 1:
samples = y.astype('float32')
else:

samples = y.T.astype('float32')
if int_values:

samples = (samples * (2**31)).astype('int32')
sample_rate = sr
print("Loaded using librosa")
except Exception as e:
logging.error(
f"Loading {audio_file} via librosa raised Exception: `{e}`. "
"NeMo will fallback to loading via pydub if available."
)
samples = None

if HAVE_PYDUB and samples is None:
try:
Expand Down