Skip to content

Commit 5465413

Browse files
Merge pull request #37 from pythonlessons/develop
fixing dependency with librosa library
2 parents 870aedc + 8686f95 commit 5465413

File tree

5 files changed

+31
-15
lines changed

5 files changed

+31
-15
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## [1.1.6] - 2022-10-30
2+
### Changed
3+
- Fixed dependencies with `librosa` library
4+
15
## [1.1.5] - 2022-10-17
26
### Changed
37
- Fixed dependencies with `librosa` library

mltu/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.1.5"
1+
__version__ = "1.1.6"
22

33
from .annotations.images import Image
44
from .annotations.images import CVImage

mltu/augmentors.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
from . import Image
77
from mltu.annotations.audio import Audio
88

9+
try:
10+
import librosa
11+
except:
12+
print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")
13+
914
"""
1015
Implemented image augmentors:
1116
- RandomBrightness
@@ -592,15 +597,14 @@ def __init__(
592597
self.max_n_steps = max_n_steps
593598

594599
try:
595-
import librosa
596-
# samplerate
600+
librosa.__version__
597601
except ImportError:
598602
raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")
599603

600604
def augment(self, audio: Audio) -> Audio:
601605
random_n_steps = np.random.randint(-self.max_n_steps, self.max_n_steps)
602606
# changing default res_type "kaiser_best" to "linear" for speed and memory efficiency
603-
shift_audio = self.librosa.effects.pitch_shift(
607+
shift_audio = librosa.effects.pitch_shift(
604608
audio.numpy(), sr=audio.sample_rate, n_steps=random_n_steps, res_type="linear"
605609
)
606610
audio.audio = shift_audio
@@ -631,13 +635,13 @@ def __init__(
631635
self.max_rate = max_rate
632636

633637
try:
634-
import librosa
638+
librosa.__version__
635639
except ImportError:
636640
raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")
637641

638642
def augment(self, audio: Audio) -> Audio:
639643
random_rate = np.random.uniform(self.min_rate, self.max_rate)
640-
stretch_audio = self.librosa.effects.time_stretch(audio.numpy(), rate=random_rate)
644+
stretch_audio = librosa.effects.time_stretch(audio.numpy(), rate=random_rate)
641645
audio.audio = stretch_audio
642646

643647
return audio

mltu/preprocessors.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
import matplotlib
66
import logging
77

8+
try:
9+
import librosa
10+
except:
11+
print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")
12+
813
from . import Image
914
from mltu.annotations.audio import Audio
1015

@@ -67,7 +72,6 @@ def __init__(
6772
self.logger.setLevel(log_level)
6873

6974
try:
70-
import librosa
7175
librosa.__version__
7276
except AttributeError:
7377
raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.")
@@ -89,7 +93,7 @@ def __call__(self, audio_path: str, label: typing.Any) -> typing.Tuple[np.ndarra
8993
else:
9094
raise TypeError(f"Audio {audio_path} is not a string.")
9195

92-
audio = Audio(audio_path, sample_rate=self.sample_rate, library=self.librosa)
96+
audio = Audio(audio_path, sample_rate=self.sample_rate, library=librosa)
9397

9498
if not audio.init_successful:
9599
audio = None
@@ -120,7 +124,6 @@ def __init__(
120124
matplotlib.interactive(False)
121125
# Check if librosa is installed
122126
try:
123-
import librosa
124127
librosa.__version__
125128
except AttributeError:
126129
raise ImportError("librosa is required to read WAV files. Please install it with `pip install librosa`.")
@@ -139,12 +142,12 @@ def get_spectrogram(wav_path: str, frame_length: int, frame_step: int, fft_lengt
139142
np.ndarray: Spectrogram of the WAV file.
140143
"""
141144
# Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
142-
audio, orig_sr = WavReader.librosa.load(wav_path)
145+
audio, orig_sr = librosa.load(wav_path)
143146

144147
# Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
145148
# The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
146149
# The resulting spectrogram is also transposed for convenience
147-
spectrogram = WavReader.librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T
150+
spectrogram = librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T
148151

149152
# Take the absolute value of the spectrogram to obtain the magnitude spectrum
150153
spectrogram = np.abs(spectrogram)
@@ -168,7 +171,7 @@ def plot_raw_audio(wav_path: str, title: str = None, sr: int = 16000) -> None:
168171
title (str, optional): Title
169172
"""
170173
# Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
171-
audio, orig_sr = WavReader.librosa.load(wav_path, sr=sr)
174+
audio, orig_sr = librosa.load(wav_path, sr=sr)
172175

173176
duration = len(audio) / orig_sr
174177

mltu/transformers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
import logging
44
import numpy as np
55

6+
try:
7+
import librosa
8+
except:
9+
print("librosa not found. Please install it with `pip install librosa` if you plan to use it.")
10+
611
from . import Image
712
from mltu.annotations.audio import Audio
813

@@ -231,7 +236,7 @@ def __call__(self, audio: Audio, label: typing.Any):
231236
if self.limit:
232237
padded_audios = padded_audios[:, :self.max_audio_length]
233238

234-
return padded_audios, np.array(label)
239+
return padded_audios, label
235240

236241
audio_numpy = audio.numpy()
237242
# limit audio if it exceed max_audio_length
@@ -265,7 +270,7 @@ def __init__(
265270
self.fft_length = fft_length
266271

267272
try:
268-
import librosa
273+
librosa.__version__
269274
except ImportError:
270275
raise ImportError("librosa is required to transform Audio. Please install it with `pip install librosa`.")
271276

@@ -284,7 +289,7 @@ def __call__(self, audio: Audio, label: typing.Any):
284289
# Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
285290
# The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
286291
# The resulting spectrogram is also transposed for convenience
287-
spectrogram = self.librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T
292+
spectrogram = librosa.stft(audio.numpy(), hop_length=self.frame_step, win_length=self.frame_length, n_fft=self.fft_length).T
288293

289294
# Take the absolute value of the spectrogram to obtain the magnitude spectrum
290295
spectrogram = np.abs(spectrogram)

0 commit comments

Comments
 (0)