Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 80 additions & 7 deletions src-tauri/src/audio_toolkit/audio/recorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ pub struct AudioRecorder {
worker_handle: Option<std::thread::JoinHandle<()>>,
vad: Option<Arc<Mutex<Box<dyn vad::VoiceActivityDetector>>>>,
level_cb: Option<Arc<dyn Fn(Vec<f32>) + Send + Sync + 'static>>,
auto_stop_cb: Option<Arc<dyn Fn() + Send + Sync + 'static>>,
auto_stop_timeout_secs: Arc<Mutex<Option<u64>>>,
}

impl AudioRecorder {
Expand All @@ -38,6 +40,8 @@ impl AudioRecorder {
worker_handle: None,
vad: None,
level_cb: None,
auto_stop_cb: None,
auto_stop_timeout_secs: Arc::new(Mutex::new(None)),
})
}

Expand All @@ -54,6 +58,20 @@ impl AudioRecorder {
self
}

pub fn with_auto_stop_callback<F>(mut self, cb: F) -> Self
where
F: Fn() + Send + Sync + 'static,
{
self.auto_stop_cb = Some(Arc::new(cb));
self
}

pub fn set_auto_stop_timeout(&self, timeout_secs: Option<u64>) {
if let Ok(mut guard) = self.auto_stop_timeout_secs.lock() {
*guard = timeout_secs;
}
}

pub fn open(&mut self, device: Option<Device>) -> Result<(), Box<dyn std::error::Error>> {
if self.worker_handle.is_some() {
return Ok(()); // already open
Expand All @@ -74,6 +92,8 @@ impl AudioRecorder {
let vad = self.vad.clone();
// Move the optional level callback into the worker thread
let level_cb = self.level_cb.clone();
let auto_stop_cb = self.auto_stop_cb.clone();
let auto_stop_timeout = self.auto_stop_timeout_secs.clone();

let worker = std::thread::spawn(move || {
let config = AudioRecorder::get_preferred_config(&thread_device)
Expand Down Expand Up @@ -117,7 +137,15 @@ impl AudioRecorder {
stream.play().expect("failed to start stream");

// keep the stream alive while we process samples
run_consumer(sample_rate, vad, sample_rx, cmd_rx, level_cb);
run_consumer(
sample_rate,
vad,
sample_rx,
cmd_rx,
level_cb,
auto_stop_cb,
auto_stop_timeout,
);
// stream is dropped here, after run_consumer returns
});

Expand Down Expand Up @@ -245,6 +273,8 @@ fn run_consumer(
sample_rx: mpsc::Receiver<Vec<f32>>,
cmd_rx: mpsc::Receiver<Cmd>,
level_cb: Option<Arc<dyn Fn(Vec<f32>) + Send + Sync + 'static>>,
auto_stop_cb: Option<Arc<dyn Fn() + Send + Sync + 'static>>,
auto_stop_timeout: Arc<Mutex<Option<u64>>>,
) {
let mut frame_resampler = FrameResampler::new(
in_sample_rate as usize,
Expand All @@ -254,6 +284,11 @@ fn run_consumer(

let mut processed_samples = Vec::<f32>::new();
let mut recording = false;
let mut auto_stop_triggered = false;

const FRAME_DURATION_MS: u64 = 30;
let mut consecutive_silence_frames: u64 = 0;
let mut has_detected_speech = false;

// ---------- spectrum visualisation setup ---------------------------- //
const BUCKETS: usize = 16;
Expand All @@ -271,19 +306,23 @@ fn run_consumer(
recording: bool,
vad: &Option<Arc<Mutex<Box<dyn vad::VoiceActivityDetector>>>>,
out_buf: &mut Vec<f32>,
) {
) -> bool {
if !recording {
return;
return false;
}

if let Some(vad_arc) = vad {
let mut det = vad_arc.lock().unwrap();
match det.push_frame(samples).unwrap_or(VadFrame::Speech(samples)) {
VadFrame::Speech(buf) => out_buf.extend_from_slice(buf),
VadFrame::Noise => {}
VadFrame::Speech(buf) => {
out_buf.extend_from_slice(buf);
true
}
VadFrame::Noise => false,
}
} else {
out_buf.extend_from_slice(samples);
true // No VAD means we assume all audio is speech
}
}

Expand All @@ -302,7 +341,35 @@ fn run_consumer(

// ---------- existing pipeline ------------------------------------ //
frame_resampler.push(&raw, &mut |frame: &[f32]| {
handle_frame(frame, recording, &vad, &mut processed_samples)
let is_speech = handle_frame(frame, recording, &vad, &mut processed_samples);

if recording && !auto_stop_triggered {
if is_speech {
has_detected_speech = true;
consecutive_silence_frames = 0;
} else if has_detected_speech {
consecutive_silence_frames += 1;

if let Ok(guard) = auto_stop_timeout.lock() {
if let Some(timeout_secs) = *guard {
let silence_duration_ms = consecutive_silence_frames * FRAME_DURATION_MS;
let timeout_ms = timeout_secs * 1000;

if silence_duration_ms >= timeout_ms {
auto_stop_triggered = true;
log::debug!(
"Auto-stop triggered: {}ms silence detected (threshold: {}ms)",
silence_duration_ms,
timeout_ms
);
if let Some(cb) = &auto_stop_cb {
cb();
}
}
}
}
}
}
});

// non-blocking check for a command
Expand All @@ -311,17 +378,23 @@ fn run_consumer(
Cmd::Start => {
processed_samples.clear();
recording = true;
auto_stop_triggered = false;
consecutive_silence_frames = 0;
has_detected_speech = false;
visualizer.reset(); // Reset visualization buffer
if let Some(v) = &vad {
v.lock().unwrap().reset();
}
}
Cmd::Stop(reply_tx) => {
recording = false;
auto_stop_triggered = false;
consecutive_silence_frames = 0;
has_detected_speech = false;

frame_resampler.finish(&mut |frame: &[f32]| {
// we still want to process the last few frames
handle_frame(frame, true, &vad, &mut processed_samples)
handle_frame(frame, true, &vad, &mut processed_samples);
});

let _ = reply_tx.send(std::mem::take(&mut processed_samples));
Expand Down
1 change: 1 addition & 0 deletions src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ pub fn run() {
shortcut::change_append_trailing_space_setting,
shortcut::change_app_language_setting,
shortcut::change_update_checks_setting,
shortcut::change_auto_stop_silence_timeout_setting,
trigger_update_check,
commands::cancel_operation,
commands::get_app_dir_path,
Expand Down
13 changes: 13 additions & 0 deletions src-tauri/src/managers/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ fn create_audio_recorder(
move |levels| {
utils::emit_levels(&app_handle, &levels);
}
})
.with_auto_stop_callback({
let app_handle = app_handle.clone();
move || {
debug!("Auto-stop callback triggered");
utils::trigger_auto_stop_transcription(&app_handle);
}
});

Ok(recorder)
Expand Down Expand Up @@ -345,6 +352,12 @@ impl AudioRecordingManager {
}

if let Some(rec) = self.recorder.lock().unwrap().as_ref() {
// Configure auto-stop timeout from settings
let settings = get_settings(&self.app_handle);
let timeout_secs = settings.auto_stop_silence_timeout.to_seconds();
rec.set_auto_stop_timeout(timeout_secs);
debug!("Auto-stop timeout set to: {:?} seconds", timeout_secs);

if rec.start().is_ok() {
*self.is_recording.lock().unwrap() = true;
*state = RecordingState::Recording {
Expand Down
31 changes: 31 additions & 0 deletions src-tauri/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,34 @@ pub enum OverlayPosition {
Bottom,
}

#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)]
#[serde(rename_all = "snake_case")]
pub enum AutoStopSilenceTimeout {
Disabled,
Sec2,
Sec3,
Sec5,
Sec10,
}

impl Default for AutoStopSilenceTimeout {
fn default() -> Self {
AutoStopSilenceTimeout::Disabled
}
}

impl AutoStopSilenceTimeout {
pub fn to_seconds(self) -> Option<u64> {
match self {
AutoStopSilenceTimeout::Disabled => None,
AutoStopSilenceTimeout::Sec2 => Some(2),
AutoStopSilenceTimeout::Sec3 => Some(3),
AutoStopSilenceTimeout::Sec5 => Some(5),
AutoStopSilenceTimeout::Sec10 => Some(10),
}
}
}

#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)]
#[serde(rename_all = "snake_case")]
pub enum ModelUnloadTimeout {
Expand Down Expand Up @@ -293,6 +321,8 @@ pub struct AppSettings {
pub append_trailing_space: bool,
#[serde(default = "default_app_language")]
pub app_language: String,
#[serde(default)]
pub auto_stop_silence_timeout: AutoStopSilenceTimeout,
}

fn default_model() -> String {
Expand Down Expand Up @@ -563,6 +593,7 @@ pub fn get_default_settings() -> AppSettings {
mute_while_recording: false,
append_trailing_space: false,
app_language: default_app_language(),
auto_stop_silence_timeout: AutoStopSilenceTimeout::default(),
}
}

Expand Down
30 changes: 28 additions & 2 deletions src-tauri/src/shortcut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use crate::actions::ACTION_MAP;
use crate::managers::audio::AudioRecordingManager;
use crate::settings::ShortcutBinding;
use crate::settings::{
self, get_settings, ClipboardHandling, LLMPrompt, OverlayPosition, PasteMethod, SoundTheme,
APPLE_INTELLIGENCE_DEFAULT_MODEL_ID, APPLE_INTELLIGENCE_PROVIDER_ID,
self, get_settings, AutoStopSilenceTimeout, ClipboardHandling, LLMPrompt, OverlayPosition,
PasteMethod, SoundTheme, APPLE_INTELLIGENCE_DEFAULT_MODEL_ID, APPLE_INTELLIGENCE_PROVIDER_ID,
};
use crate::tray;
use crate::ManagedToggleState;
Expand Down Expand Up @@ -731,6 +731,32 @@ pub fn change_app_language_setting(app: AppHandle, language: String) -> Result<(
Ok(())
}

#[tauri::command]
#[specta::specta]
pub fn change_auto_stop_silence_timeout_setting(
app: AppHandle,
timeout: String,
) -> Result<(), String> {
let mut settings = settings::get_settings(&app);
let parsed = match timeout.as_str() {
"disabled" => AutoStopSilenceTimeout::Disabled,
"sec2" => AutoStopSilenceTimeout::Sec2,
"sec3" => AutoStopSilenceTimeout::Sec3,
"sec5" => AutoStopSilenceTimeout::Sec5,
"sec10" => AutoStopSilenceTimeout::Sec10,
other => {
warn!(
"Invalid auto-stop silence timeout '{}', defaulting to disabled",
other
);
AutoStopSilenceTimeout::Disabled
}
};
settings.auto_stop_silence_timeout = parsed;
settings::write_settings(&app, settings);
Ok(())
}

/// Determine whether a shortcut string contains at least one non-modifier key.
/// We allow single non-modifier keys (e.g. "f5" or "space") but disallow
/// modifier-only combos (e.g. "ctrl" or "ctrl+shift").
Expand Down
30 changes: 30 additions & 0 deletions src-tauri/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::actions::ACTION_MAP;
use crate::managers::audio::AudioRecordingManager;
use crate::shortcut;
use crate::ManagedToggleState;
Expand Down Expand Up @@ -39,6 +40,35 @@ pub fn cancel_current_operation(app: &AppHandle) {
info!("Operation cancellation completed - returned to idle state");
}

/// Stops transcription when silence timeout is exceeded.
pub fn trigger_auto_stop_transcription(app: &AppHandle) {
let binding_id = "transcribe";
let shortcut_string = "auto-stop-silence";

let audio_manager = app.state::<Arc<AudioRecordingManager>>();
if !audio_manager.is_recording() {
return;
}

let toggle_state_manager = app.state::<ManagedToggleState>();
if let Ok(mut states) = toggle_state_manager.lock() {
if let Some(is_active) = states.active_toggles.get_mut(binding_id) {
if !*is_active {
return;
}
*is_active = false;
}
} else {
warn!("Auto-stop: Failed to lock toggle state");
return;
}

if let Some(action) = ACTION_MAP.get(binding_id) {
action.stop(app, binding_id, shortcut_string);
info!("Auto-stop: Transcription stopped due to silence");
}
}

/// Check if using the Wayland display server protocol
#[cfg(target_os = "linux")]
pub fn is_wayland() -> bool {
Expand Down
11 changes: 10 additions & 1 deletion src/bindings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,14 @@ async changeAppLanguageSetting(language: string) : Promise<Result<null, string>>
else return { status: "error", error: e as any };
}
},
async changeAutoStopSilenceTimeoutSetting(timeout: string) : Promise<Result<null, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("change_auto_stop_silence_timeout_setting", { timeout }) };
} catch (e) {
if(e instanceof Error) throw e;
else return { status: "error", error: e as any };
}
},
async changeUpdateChecksSetting(enabled: boolean) : Promise<Result<null, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("change_update_checks_setting", { enabled }) };
Expand Down Expand Up @@ -621,7 +629,8 @@ async isLaptop() : Promise<Result<boolean, string>> {

/** user-defined types **/

export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string }
export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; auto_stop_silence_timeout?: AutoStopSilenceTimeout }
export type AutoStopSilenceTimeout = "disabled" | "sec2" | "sec3" | "sec5" | "sec10"
export type AudioDevice = { index: string; name: string; is_default: boolean }
export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null }
export type ClipboardHandling = "dont_modify" | "copy_to_clipboard"
Expand Down
Loading