swipe and yin

sebastianrosenzweig · sebastianrosenzweig · commit c6a7dd2b56f5 · 2026-01-04T22:37:30.000+01:00
diff --git a/pytch/audio.py b/pytch/audio.py
@@ -8,6 +8,7 @@
 import logging
 import sounddevice
 import soundfile as sf
+import libf0
 from rtswipe import RTSwipe
 from scipy.ndimage import median_filter
 from datetime import datetime
@@ -75,7 +76,8 @@ def check_fs(device_index, fs):
         logger.debug(e)
         valid = False
 
-    return valid
+    finally:
+        return valid
 
 
 @njit
@@ -210,6 +212,7 @@ def __init__(
         fft_len=512,
         channels=None,
         device_no=None,
+        f0_algorithm="YIN",
         out_path="",
     ):
         """Initialize audio processing.
@@ -220,6 +223,7 @@ def __init__(
             fft_len: FFT length in bins.
             channels: List of channels to record.
             device_no: Index of device to record from.
+            f0_algorithm: F0 algorithm to use.
             out_path: Output directory for F0 trajectories.
         """
         self.fs = fs
@@ -230,6 +234,7 @@ def __init__(
         self.fft_win = np.hanning(self.fft_len).reshape(-1, 1)
         self.channels = [0] if channels is None else channels
         self.device_no = device_no
+        self.f0_algorithm = f0_algorithm
         self.out_path = out_path
         self.f0_lvl_threshold = -70  # minimum level in dB to compute f0 estimates
         self.frame_rate = self.fs / self.hop_len
@@ -402,10 +407,34 @@ def compute_f0(self, audio, lvl):
             conf: Confidence.
 
         """
-        if np.all(lvl > self.f0_lvl_threshold):
-            f0, conf = self.rtswipe(audio)
-        else:
-            f0 = conf = np.zeros((1, len(self.channels)))
+        f0 = np.zeros((1, audio.shape[1]))
+        conf = np.zeros((1, audio.shape[1]))
+
+        if self.f0_algorithm == "YIN":
+            for c in range(audio.shape[1]):
+                if lvl[0, c] < self.f0_lvl_threshold:
+                    continue
+
+                audio_tmp = np.concatenate(
+                    (audio[:, c][::-1], audio[:, c], audio[:, c][::-1])
+                )
+                f0_tmp, _, conf_tmp = libf0.yin(
+                    audio_tmp,
+                    Fs=self.fs,
+                    N=self.fft_len,
+                    H=self.fft_len,
+                    F_min=80.0,
+                    F_max=640.0,
+                    threshold=0.15,
+                    verbose=False,
+                )
+                f0[:, c] = np.mean(f0_tmp)  # take the center frame
+                conf[:, c] = 1 - np.mean(conf_tmp)
+        elif self.f0_algorithm == "SWIPE":
+            if np.all(lvl > self.f0_lvl_threshold):
+                f0, conf = self.rtswipe(audio)
+                f0 = f0.reshape(1, -1)
+                conf = conf.reshape(1, -1)
 
         return f0, conf
 
diff --git a/pytch/gui.py b/pytch/gui.py
@@ -221,6 +221,7 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path):
         self.fs = fs
         self.fft_size = fft_size
         self.out_path = out_path
+        self.f0_algorithms = ["SWIPE", "YIN"]
         self.buf_len_sec = 30.0  # sec
         self.spec_scale_types = ["log", "linear"]
         self.ref_freq_modes = ["fixed", "highest", "lowest"]
@@ -243,7 +244,7 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path):
         self.cur_spec_scale_type = self.spec_scale_types[0]
         self.cur_ref_freq_mode = self.ref_freq_modes[0]
         self.cur_ref_freq = 220  # Hz
-        self.cur_conf_threshold = 0.5
+        self.cur_conf_threshold = 0.2
         self.cur_gradient_tol = 600  # Cents
         self.cur_smoothing_len = 3  # bins
         self.gui_refresh_ms = int(np.round(1000 / 60))  # 60 fps
@@ -270,6 +271,7 @@ def __init__(self, sounddevice_idx, channels, fs, fft_size, out_path):
             fft_len=self.fft_size,
             channels=self.channels,
             device_no=self.sounddevice_idx,
+            f0_algorithm=self.f0_algorithms[0],
             out_path=out_path,
         )
 
@@ -473,6 +475,13 @@ def __init__(self, main_window: MainWindow):
         )
         layout.addWidget(self.box_show_tv, 10, 1, 1, 1)
 
+        layout.addWidget(qw.QLabel("F0 Algorithm"), 11, 0)
+        self.select_algorithm = qw.QComboBox(self)
+        self.select_algorithm.addItems(main_window.f0_algorithms)
+        self.select_algorithm.setCurrentIndex(0)
+        self.select_algorithm.currentTextChanged.connect(self.on_algorithm_select)
+        layout.addWidget(self.select_algorithm, 11, 1, 1, 1)
+
         layout.addWidget(qw.QLabel("Confidence Threshold"), 12, 0)
         self.noise_thresh_slider = qw.QSlider()
         self.noise_thresh_slider.setRange(0, 10)
@@ -552,6 +561,10 @@ def on_max_freq_changed(self, f):
             self.main_window.cur_disp_freq_lims
         )
 
+    def on_algorithm_select(self, algorithm):
+        """Update function for F0 algorithm on user interaction."""
+        self.main_window.audio_processor.f0_algorithm = algorithm
+
     def on_conf_threshold_changed(self, val):
         """Update function for confidence threshold on user interaction."""
         self.noise_thresh_label.setText(str(val / 10.0))