Skip to content

Commit c6a7dd2

Browse files
swipe and yin
1 parent 36c814e commit c6a7dd2

File tree

2 files changed

+48
-6
lines changed

2 files changed

+48
-6
lines changed

pytch/audio.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import logging
99
import sounddevice
1010
import soundfile as sf
11+
import libf0
1112
from rtswipe import RTSwipe
1213
from scipy.ndimage import median_filter
1314
from datetime import datetime
@@ -75,7 +76,8 @@ def check_fs(device_index, fs):
7576
logger.debug(e)
7677
valid = False
7778

78-
return valid
79+
finally:
80+
return valid
7981

8082

8183
@njit
@@ -210,6 +212,7 @@ def __init__(
210212
fft_len=512,
211213
channels=None,
212214
device_no=None,
215+
f0_algorithm="YIN",
213216
out_path="",
214217
):
215218
"""Initialize audio processing.
@@ -220,6 +223,7 @@ def __init__(
220223
fft_len: FFT length in bins.
221224
channels: List of channels to record.
222225
device_no: Index of device to record from.
226+
f0_algorithm: F0 algorithm to use.
223227
out_path: Output directory for F0 trajectories.
224228
"""
225229
self.fs = fs
@@ -230,6 +234,7 @@ def __init__(
230234
self.fft_win = np.hanning(self.fft_len).reshape(-1, 1)
231235
self.channels = [0] if channels is None else channels
232236
self.device_no = device_no
237+
self.f0_algorithm = f0_algorithm
233238
self.out_path = out_path
234239
self.f0_lvl_threshold = -70 # minimum level in dB to compute f0 estimates
235240
self.frame_rate = self.fs / self.hop_len
@@ -402,10 +407,34 @@ def compute_f0(self, audio, lvl):
402407
conf: Confidence.
403408
404409
"""
405-
if np.all(lvl > self.f0_lvl_threshold):
406-
f0, conf = self.rtswipe(audio)
407-
else:
408-
f0 = conf = np.zeros((1, len(self.channels)))
410+
f0 = np.zeros((1, audio.shape[1]))
411+
conf = np.zeros((1, audio.shape[1]))
412+
413+
if self.f0_algorithm == "YIN":
414+
for c in range(audio.shape[1]):
415+
if lvl[0, c] < self.f0_lvl_threshold:
416+
continue
417+
418+
audio_tmp = np.concatenate(
419+
(audio[:, c][::-1], audio[:, c], audio[:, c][::-1])
420+
)
421+
f0_tmp, _, conf_tmp = libf0.yin(
422+
audio_tmp,
423+
Fs=self.fs,
424+
N=self.fft_len,
425+
H=self.fft_len,
426+
F_min=80.0,
427+
F_max=640.0,
428+
threshold=0.15,
429+
verbose=False,
430+
)
431+
f0[:, c] = np.mean(f0_tmp) # take the center frame
432+
conf[:, c] = 1 - np.mean(conf_tmp)
433+
elif self.f0_algorithm == "SWIPE":
434+
if np.all(lvl > self.f0_lvl_threshold):
435+
f0, conf = self.rtswipe(audio)
436+
f0 = f0.reshape(1, -1)
437+
conf = conf.reshape(1, -1)
409438

410439
return f0, conf
411440

pytch/gui.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path):
221221
self.fs = fs
222222
self.fft_size = fft_size
223223
self.out_path = out_path
224+
self.f0_algorithms = ["SWIPE", "YIN"]
224225
self.buf_len_sec = 30.0 # sec
225226
self.spec_scale_types = ["log", "linear"]
226227
self.ref_freq_modes = ["fixed", "highest", "lowest"]
@@ -243,7 +244,7 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path):
243244
self.cur_spec_scale_type = self.spec_scale_types[0]
244245
self.cur_ref_freq_mode = self.ref_freq_modes[0]
245246
self.cur_ref_freq = 220 # Hz
246-
self.cur_conf_threshold = 0.5
247+
self.cur_conf_threshold = 0.2
247248
self.cur_gradient_tol = 600 # Cents
248249
self.cur_smoothing_len = 3 # bins
249250
self.gui_refresh_ms = int(np.round(1000 / 60)) # 60 fps
@@ -270,6 +271,7 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path):
270271
fft_len=self.fft_size,
271272
channels=self.channels,
272273
device_no=self.sounddevice_idx,
274+
f0_algorithm=self.f0_algorithms[0],
273275
out_path=out_path,
274276
)
275277

@@ -473,6 +475,13 @@ def __init__(self, main_window: MainWindow):
473475
)
474476
layout.addWidget(self.box_show_tv, 10, 1, 1, 1)
475477

478+
layout.addWidget(qw.QLabel("F0 Algorithm"), 11, 0)
479+
self.select_algorithm = qw.QComboBox(self)
480+
self.select_algorithm.addItems(main_window.f0_algorithms)
481+
self.select_algorithm.setCurrentIndex(0)
482+
self.select_algorithm.currentTextChanged.connect(self.on_algorithm_select)
483+
layout.addWidget(self.select_algorithm, 11, 1, 1, 1)
484+
476485
layout.addWidget(qw.QLabel("Confidence Threshold"), 12, 0)
477486
self.noise_thresh_slider = qw.QSlider()
478487
self.noise_thresh_slider.setRange(0, 10)
@@ -552,6 +561,10 @@ def on_max_freq_changed(self, f):
552561
self.main_window.cur_disp_freq_lims
553562
)
554563

564+
def on_algorithm_select(self, algorithm):
565+
"""Update function for F0 algorithm on user interaction."""
566+
self.main_window.audio_processor.f0_algorithm = algorithm
567+
555568
def on_conf_threshold_changed(self, val):
556569
"""Update function for confidence threshold on user interaction."""
557570
self.noise_thresh_label.setText(str(val / 10.0))

0 commit comments

Comments
 (0)