Skip to content

Commit a2e70fc

Browse files
committed
Added repet.simonline function and example.
1 parent 4c7ebc9 commit a2e70fc

File tree

3 files changed

+81
-70
lines changed

3 files changed

+81
-70
lines changed

examples.ipynb

Lines changed: 27 additions & 6 deletions
Large diffs are not rendered by default.

images/repet_simonline.png

773 KB
Loading

repet.py

Lines changed: 54 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
http://zafarrafii.com
3131
https://github.com/zafarrafii
3232
https://www.linkedin.com/in/zafarrafii/
33-
01/12/21
33+
01/19/21
3434
"""
3535

3636
import numpy as np
@@ -775,7 +775,7 @@ def simonline(audio_signal, sampling_frequency):
775775
plt.show()
776776
"""
777777

778-
# Number of samples and channels
778+
# Get the number of samples and channels in the audio signal
779779
number_samples, number_channels = np.shape(audio_signal)
780780

781781
# Set the parameters for the STFT
@@ -785,39 +785,37 @@ def simonline(audio_signal, sampling_frequency):
785785
window_function = scipy.signal.hamming(window_length, sym=False)
786786
step_length = int(window_length / 2)
787787

788-
# Number of time frames
788+
# Derive the number of time frames
789789
number_times = int(np.ceil((number_samples - window_length) / step_length + 1))
790790

791-
# Buffer length in time frames
792-
buffer_length2 = int(
793-
round((buffer_length * sampling_frequency - window_length) / step_length + 1)
794-
)
791+
# Derive the number of frequency channels
792+
number_frequencies = int(window_length / 2 + 1)
795793

796-
# Initialize the buffer spectrogram
797-
buffer_spectrogram = np.zeros(
798-
(int(window_length / 2 + 1), buffer_length2, number_channels)
799-
)
794+
# Get the buffer length in time frames
795+
buffer_length2 = round((buffer_length * sampling_frequency) / step_length)
800796

801-
# Loop over the time frames to compute the buffer spectrogram (the last frame will be the frame to be processed)
802-
for time_index in range(0, buffer_length2 - 1):
797+
# Initialize the buffer spectrogram
798+
buffer_spectrogram = np.zeros((number_frequencies, buffer_length2, number_channels))
803799

804-
# Sample index in the signal
805-
sample_index = step_length * time_index
800+
# Loop over the time frames to compute the buffer spectrogram
801+
# (the last frame will be the frame to be processed)
802+
k = 0
803+
for j in range(buffer_length2 - 1):
806804

807805
# Loop over the channels
808-
for channel_index in range(0, number_channels):
806+
for i in range(number_channels):
809807

810808
# Compute the FT of the segment
811809
buffer_ft = np.fft.fft(
812-
audio_signal[sample_index : window_length + sample_index, channel_index]
813-
* window_function,
810+
audio_signal[k : k + window_length, i] * window_function,
814811
axis=0,
815812
)
816813

817-
# Derive the spectrum of the frame
818-
buffer_spectrogram[:, time_index, channel_index] = abs(
819-
buffer_ft[0 : int(window_length / 2 + 1)]
820-
)
814+
# Derive the magnitude spectrum and save it in the buffer spectrogram
815+
buffer_spectrogram[:, j, i] = abs(buffer_ft[0:number_frequencies])
816+
817+
# Update the index
818+
k = k + step_length
821819

822820
# Zero-pad the audio signal at the end
823821
audio_signal = np.pad(
@@ -827,12 +825,14 @@ def simonline(audio_signal, sampling_frequency):
827825
constant_values=0,
828826
)
829827

830-
# Similarity distance in time frames
831-
similarity_distance2 = int(round(similarity_distance * sample_rate / step_length))
828+
# Get the similarity distance in time frames
829+
similarity_distance2 = int(
830+
round(similarity_distance * sampling_frequency / step_length)
831+
)
832832

833-
# Cutoff frequency in frequency channels for the dual high-pass filter of the foreground
833+
# Get the cutoff frequency in frequency channels for the dual high-pass filter of the foreground
834834
cutoff_frequency2 = (
835-
int(np.ceil(cutoff_frequency * (window_length - 1) / sample_rate)) - 1
835+
int(np.ceil(cutoff_frequency * (window_length - 1) / sampling_frequency)) - 1
836836
)
837837

838838
# Initialize the background signal
@@ -841,41 +841,33 @@ def simonline(audio_signal, sampling_frequency):
841841
)
842842

843843
# Loop over the time frames to compute the background signal
844-
for time_index in range(buffer_length2 - 1, number_times):
845-
846-
# Sample index in the signal
847-
sample_index = step_length * time_index
844+
for j in range(buffer_length2 - 1, number_times):
848845

849-
# Time index of the current frame
850-
current_index = time_index % buffer_length2
846+
# Get the time index of the current frame
847+
j0 = j % buffer_length2
851848

852849
# Initialize the FT of the current segment
853850
current_ft = np.zeros((window_length, number_channels), dtype=complex)
854851

855852
# Loop over the channels
856-
for channel_index in range(0, number_channels):
853+
for i in range(number_channels):
857854

858855
# Compute the FT of the current segment
859-
current_ft[:, channel_index] = np.fft.fft(
860-
audio_signal[sample_index : window_length + sample_index, channel_index]
861-
* window_function,
856+
current_ft[:, i] = np.fft.fft(
857+
audio_signal[k : k + window_length, i] * window_function,
862858
axis=0,
863859
)
864860

865-
# Derive the spectrum of the current frame and update the buffer spectrogram
866-
buffer_spectrogram[:, current_index, channel_index] = np.abs(
867-
current_ft[0 : int(window_length / 2 + 1), channel_index]
868-
)
861+
# Derive the magnitude spectrum and update the buffer spectrogram
862+
buffer_spectrogram[:, j0, i] = np.abs(current_ft[0:number_frequencies, i])
869863

870-
# Cosine similarity between the spectrum of the current frame and the past frames, for all the channels
864+
# Compute the cosine similarity between the current frame and the past ones, for all the channels
871865
similarity_vector = _similaritymatrix(
872866
np.mean(buffer_spectrogram, axis=2),
873-
np.mean(
874-
buffer_spectrogram[:, current_index : current_index + 1, :], axis=2
875-
),
867+
np.mean(buffer_spectrogram[:, j0 : j0 + 1, :], axis=2),
876868
)
877869

878-
# Indices of the similar frames
870+
# Estimate the indices of the similar frames
879871
_, similarity_indices = _localmaxima(
880872
similarity_vector[:, 0],
881873
similarity_threshold,
@@ -884,46 +876,44 @@ def simonline(audio_signal, sampling_frequency):
884876
)
885877

886878
# Loop over the channels
887-
for channel_index in range(0, number_channels):
879+
for i in range(number_channels):
888880

889881
# Compute the repeating spectrum for the current frame
890882
repeating_spectrum = np.median(
891-
buffer_spectrogram[:, similarity_indices, channel_index], axis=1
883+
buffer_spectrogram[:, similarity_indices, i], axis=1
892884
)
893885

894886
# Refine the repeating spectrum
895887
repeating_spectrum = np.minimum(
896-
repeating_spectrum, buffer_spectrogram[:, current_index, channel_index]
888+
repeating_spectrum, buffer_spectrogram[:, j0, i]
897889
)
898890

899891
# Derive the repeating mask for the current frame
900892
repeating_mask = (repeating_spectrum + np.finfo(float).eps) / (
901-
buffer_spectrogram[:, current_index, channel_index]
902-
+ np.finfo(float).eps
893+
buffer_spectrogram[:, j0, i] + np.finfo(float).eps
903894
)
904895

905-
# High-pass filtering of the dual foreground
896+
# Perform a high-pass filtering of the dual foreground
906897
repeating_mask[1 : cutoff_frequency2 + 2] = 1
907898

908-
# Mirror the frequency channels
899+
# Recover the mirrored frequencies
909900
repeating_mask = np.concatenate((repeating_mask, repeating_mask[-2:0:-1]))
910901

911902
# Apply the mask to the FT of the current segment
912-
background_ft = repeating_mask * current_ft[:, channel_index]
913-
914-
# Inverse FT of the current segment
915-
background_signal[
916-
sample_index : window_length + sample_index, channel_index
917-
] = background_signal[
918-
sample_index : window_length + sample_index, channel_index
919-
] + np.real(
920-
np.fft.ifft(background_ft, axis=0)
921-
)
903+
background_ft = repeating_mask * current_ft[:, i]
922904

923-
# Truncate the signal to the original length
905+
# Take the inverse FT of the current segment
906+
background_signal[k : k + window_length, i] = background_signal[
907+
k : k + window_length, i
908+
] + np.real(np.fft.ifft(background_ft, axis=0))
909+
910+
# Update the index
911+
k = k + step_length
912+
913+
# Truncate the signal to the original number of samples
924914
background_signal = background_signal[0:number_samples, :]
925915

926-
# Un-window the signal (just in case)
916+
# Normalize the signal by the gain introduced by the COLA (if any)
927917
background_signal = background_signal / sum(
928918
window_function[0:window_length:step_length]
929919
)

0 commit comments

Comments
 (0)