diff --git a/src-tauri/src/audio_toolkit/audio/recorder.rs b/src-tauri/src/audio_toolkit/audio/recorder.rs index c3f23adbb..c3a65ba45 100644 --- a/src-tauri/src/audio_toolkit/audio/recorder.rs +++ b/src-tauri/src/audio_toolkit/audio/recorder.rs @@ -28,6 +28,8 @@ pub struct AudioRecorder { worker_handle: Option>, vad: Option>>>, level_cb: Option) + Send + Sync + 'static>>, + auto_stop_cb: Option>, + auto_stop_timeout_secs: Arc>>, } impl AudioRecorder { @@ -38,6 +40,8 @@ impl AudioRecorder { worker_handle: None, vad: None, level_cb: None, + auto_stop_cb: None, + auto_stop_timeout_secs: Arc::new(Mutex::new(None)), }) } @@ -54,6 +58,20 @@ impl AudioRecorder { self } + pub fn with_auto_stop_callback(mut self, cb: F) -> Self + where + F: Fn() + Send + Sync + 'static, + { + self.auto_stop_cb = Some(Arc::new(cb)); + self + } + + pub fn set_auto_stop_timeout(&self, timeout_secs: Option) { + if let Ok(mut guard) = self.auto_stop_timeout_secs.lock() { + *guard = timeout_secs; + } + } + pub fn open(&mut self, device: Option) -> Result<(), Box> { if self.worker_handle.is_some() { return Ok(()); // already open @@ -74,6 +92,8 @@ impl AudioRecorder { let vad = self.vad.clone(); // Move the optional level callback into the worker thread let level_cb = self.level_cb.clone(); + let auto_stop_cb = self.auto_stop_cb.clone(); + let auto_stop_timeout = self.auto_stop_timeout_secs.clone(); let worker = std::thread::spawn(move || { let config = AudioRecorder::get_preferred_config(&thread_device) @@ -117,7 +137,15 @@ impl AudioRecorder { stream.play().expect("failed to start stream"); // keep the stream alive while we process samples - run_consumer(sample_rate, vad, sample_rx, cmd_rx, level_cb); + run_consumer( + sample_rate, + vad, + sample_rx, + cmd_rx, + level_cb, + auto_stop_cb, + auto_stop_timeout, + ); // stream is dropped here, after run_consumer returns }); @@ -245,6 +273,8 @@ fn run_consumer( sample_rx: mpsc::Receiver>, cmd_rx: mpsc::Receiver, level_cb: Option) + Send + Sync + 'static>>, + auto_stop_cb: Option>, + auto_stop_timeout: Arc>>, ) { let mut frame_resampler = FrameResampler::new( in_sample_rate as usize, @@ -254,6 +284,11 @@ fn run_consumer( let mut processed_samples = Vec::::new(); let mut recording = false; + let mut auto_stop_triggered = false; + + const FRAME_DURATION_MS: u64 = 30; + let mut consecutive_silence_frames: u64 = 0; + let mut has_detected_speech = false; // ---------- spectrum visualisation setup ---------------------------- // const BUCKETS: usize = 16; @@ -271,19 +306,23 @@ fn run_consumer( recording: bool, vad: &Option>>>, out_buf: &mut Vec, - ) { + ) -> bool { if !recording { - return; + return false; } if let Some(vad_arc) = vad { let mut det = vad_arc.lock().unwrap(); match det.push_frame(samples).unwrap_or(VadFrame::Speech(samples)) { - VadFrame::Speech(buf) => out_buf.extend_from_slice(buf), - VadFrame::Noise => {} + VadFrame::Speech(buf) => { + out_buf.extend_from_slice(buf); + true + } + VadFrame::Noise => false, } } else { out_buf.extend_from_slice(samples); + true // No VAD means we assume all audio is speech } } @@ -302,7 +341,35 @@ fn run_consumer( // ---------- existing pipeline ------------------------------------ // frame_resampler.push(&raw, &mut |frame: &[f32]| { - handle_frame(frame, recording, &vad, &mut processed_samples) + let is_speech = handle_frame(frame, recording, &vad, &mut processed_samples); + + if recording && !auto_stop_triggered { + if is_speech { + has_detected_speech = true; + consecutive_silence_frames = 0; + } else if has_detected_speech { + consecutive_silence_frames += 1; + + if let Ok(guard) = auto_stop_timeout.lock() { + if let Some(timeout_secs) = *guard { + let silence_duration_ms = consecutive_silence_frames * FRAME_DURATION_MS; + let timeout_ms = timeout_secs * 1000; + + if silence_duration_ms >= timeout_ms { + auto_stop_triggered = true; + log::debug!( + "Auto-stop triggered: {}ms silence detected (threshold: {}ms)", + silence_duration_ms, + timeout_ms + ); + if let Some(cb) = &auto_stop_cb { + cb(); + } + } + } + } + } + } }); // non-blocking check for a command @@ -311,6 +378,9 @@ fn run_consumer( Cmd::Start => { processed_samples.clear(); recording = true; + auto_stop_triggered = false; + consecutive_silence_frames = 0; + has_detected_speech = false; visualizer.reset(); // Reset visualization buffer if let Some(v) = &vad { v.lock().unwrap().reset(); @@ -318,10 +388,13 @@ fn run_consumer( } Cmd::Stop(reply_tx) => { recording = false; + auto_stop_triggered = false; + consecutive_silence_frames = 0; + has_detected_speech = false; frame_resampler.finish(&mut |frame: &[f32]| { // we still want to process the last few frames - handle_frame(frame, true, &vad, &mut processed_samples) + handle_frame(frame, true, &vad, &mut processed_samples); }); let _ = reply_tx.send(std::mem::take(&mut processed_samples)); diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index be98784a8..4928d00ff 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -264,6 +264,7 @@ pub fn run() { shortcut::change_append_trailing_space_setting, shortcut::change_app_language_setting, shortcut::change_update_checks_setting, + shortcut::change_auto_stop_silence_timeout_setting, trigger_update_check, commands::cancel_operation, commands::get_app_dir_path, diff --git a/src-tauri/src/managers/audio.rs b/src-tauri/src/managers/audio.rs index 0add01fcf..39532dd36 100644 --- a/src-tauri/src/managers/audio.rs +++ b/src-tauri/src/managers/audio.rs @@ -132,6 +132,13 @@ fn create_audio_recorder( move |levels| { utils::emit_levels(&app_handle, &levels); } + }) + .with_auto_stop_callback({ + let app_handle = app_handle.clone(); + move || { + debug!("Auto-stop callback triggered"); + utils::trigger_auto_stop_transcription(&app_handle); + } }); Ok(recorder) @@ -345,6 +352,12 @@ impl AudioRecordingManager { } if let Some(rec) = self.recorder.lock().unwrap().as_ref() { + // Configure auto-stop timeout from settings + let settings = get_settings(&self.app_handle); + let timeout_secs = settings.auto_stop_silence_timeout.to_seconds(); + rec.set_auto_stop_timeout(timeout_secs); + debug!("Auto-stop timeout set to: {:?} seconds", timeout_secs); + if rec.start().is_ok() { *self.is_recording.lock().unwrap() = true; *state = RecordingState::Recording { diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index 4b2380eb2..655ce9672 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -111,6 +111,34 @@ pub enum OverlayPosition { Bottom, } +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)] +#[serde(rename_all = "snake_case")] +pub enum AutoStopSilenceTimeout { + Disabled, + Sec2, + Sec3, + Sec5, + Sec10, +} + +impl Default for AutoStopSilenceTimeout { + fn default() -> Self { + AutoStopSilenceTimeout::Disabled + } +} + +impl AutoStopSilenceTimeout { + pub fn to_seconds(self) -> Option { + match self { + AutoStopSilenceTimeout::Disabled => None, + AutoStopSilenceTimeout::Sec2 => Some(2), + AutoStopSilenceTimeout::Sec3 => Some(3), + AutoStopSilenceTimeout::Sec5 => Some(5), + AutoStopSilenceTimeout::Sec10 => Some(10), + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)] #[serde(rename_all = "snake_case")] pub enum ModelUnloadTimeout { @@ -293,6 +321,8 @@ pub struct AppSettings { pub append_trailing_space: bool, #[serde(default = "default_app_language")] pub app_language: String, + #[serde(default)] + pub auto_stop_silence_timeout: AutoStopSilenceTimeout, } fn default_model() -> String { @@ -563,6 +593,7 @@ pub fn get_default_settings() -> AppSettings { mute_while_recording: false, append_trailing_space: false, app_language: default_app_language(), + auto_stop_silence_timeout: AutoStopSilenceTimeout::default(), } } diff --git a/src-tauri/src/shortcut.rs b/src-tauri/src/shortcut.rs index aaa99e0e6..000901155 100644 --- a/src-tauri/src/shortcut.rs +++ b/src-tauri/src/shortcut.rs @@ -10,8 +10,8 @@ use crate::actions::ACTION_MAP; use crate::managers::audio::AudioRecordingManager; use crate::settings::ShortcutBinding; use crate::settings::{ - self, get_settings, ClipboardHandling, LLMPrompt, OverlayPosition, PasteMethod, SoundTheme, - APPLE_INTELLIGENCE_DEFAULT_MODEL_ID, APPLE_INTELLIGENCE_PROVIDER_ID, + self, get_settings, AutoStopSilenceTimeout, ClipboardHandling, LLMPrompt, OverlayPosition, + PasteMethod, SoundTheme, APPLE_INTELLIGENCE_DEFAULT_MODEL_ID, APPLE_INTELLIGENCE_PROVIDER_ID, }; use crate::tray; use crate::ManagedToggleState; @@ -731,6 +731,32 @@ pub fn change_app_language_setting(app: AppHandle, language: String) -> Result<( Ok(()) } +#[tauri::command] +#[specta::specta] +pub fn change_auto_stop_silence_timeout_setting( + app: AppHandle, + timeout: String, +) -> Result<(), String> { + let mut settings = settings::get_settings(&app); + let parsed = match timeout.as_str() { + "disabled" => AutoStopSilenceTimeout::Disabled, + "sec2" => AutoStopSilenceTimeout::Sec2, + "sec3" => AutoStopSilenceTimeout::Sec3, + "sec5" => AutoStopSilenceTimeout::Sec5, + "sec10" => AutoStopSilenceTimeout::Sec10, + other => { + warn!( + "Invalid auto-stop silence timeout '{}', defaulting to disabled", + other + ); + AutoStopSilenceTimeout::Disabled + } + }; + settings.auto_stop_silence_timeout = parsed; + settings::write_settings(&app, settings); + Ok(()) +} + /// Determine whether a shortcut string contains at least one non-modifier key. /// We allow single non-modifier keys (e.g. "f5" or "space") but disallow /// modifier-only combos (e.g. "ctrl" or "ctrl+shift"). diff --git a/src-tauri/src/utils.rs b/src-tauri/src/utils.rs index 636ef8ef2..fdc94ee0c 100644 --- a/src-tauri/src/utils.rs +++ b/src-tauri/src/utils.rs @@ -1,3 +1,4 @@ +use crate::actions::ACTION_MAP; use crate::managers::audio::AudioRecordingManager; use crate::shortcut; use crate::ManagedToggleState; @@ -39,6 +40,35 @@ pub fn cancel_current_operation(app: &AppHandle) { info!("Operation cancellation completed - returned to idle state"); } +/// Stops transcription when silence timeout is exceeded. +pub fn trigger_auto_stop_transcription(app: &AppHandle) { + let binding_id = "transcribe"; + let shortcut_string = "auto-stop-silence"; + + let audio_manager = app.state::>(); + if !audio_manager.is_recording() { + return; + } + + let toggle_state_manager = app.state::(); + if let Ok(mut states) = toggle_state_manager.lock() { + if let Some(is_active) = states.active_toggles.get_mut(binding_id) { + if !*is_active { + return; + } + *is_active = false; + } + } else { + warn!("Auto-stop: Failed to lock toggle state"); + return; + } + + if let Some(action) = ACTION_MAP.get(binding_id) { + action.stop(app, binding_id, shortcut_string); + info!("Auto-stop: Transcription stopped due to silence"); + } +} + /// Check if using the Wayland display server protocol #[cfg(target_os = "linux")] pub fn is_wayland() -> bool { diff --git a/src/bindings.ts b/src/bindings.ts index 7e271a462..b5b96c74c 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -260,6 +260,14 @@ async changeAppLanguageSetting(language: string) : Promise> else return { status: "error", error: e as any }; } }, +async changeAutoStopSilenceTimeoutSetting(timeout: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("change_auto_stop_silence_timeout_setting", { timeout }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async changeUpdateChecksSetting(enabled: boolean) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("change_update_checks_setting", { enabled }) }; @@ -621,7 +629,8 @@ async isLaptop() : Promise> { /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; auto_stop_silence_timeout?: AutoStopSilenceTimeout } +export type AutoStopSilenceTimeout = "disabled" | "sec2" | "sec3" | "sec5" | "sec10" export type AudioDevice = { index: string; name: string; is_default: boolean } export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null } export type ClipboardHandling = "dont_modify" | "copy_to_clipboard" diff --git a/src/components/settings/AutoStopSilenceTimeout.tsx b/src/components/settings/AutoStopSilenceTimeout.tsx new file mode 100644 index 000000000..592999fa8 --- /dev/null +++ b/src/components/settings/AutoStopSilenceTimeout.tsx @@ -0,0 +1,61 @@ +import React from "react"; +import { useTranslation } from "react-i18next"; +import { useSettings } from "../../hooks/useSettings"; +import type { AutoStopSilenceTimeout } from "@/bindings"; +import { Dropdown } from "../ui/Dropdown"; +import { SettingContainer } from "../ui/SettingContainer"; + +interface AutoStopSilenceTimeoutProps { + descriptionMode?: "tooltip" | "inline"; + grouped?: boolean; +} + +export const AutoStopSilenceTimeoutSetting: React.FC< + AutoStopSilenceTimeoutProps +> = ({ descriptionMode = "inline", grouped = false }) => { + const { t } = useTranslation(); + const { getSetting, updateSetting, isUpdating } = useSettings(); + + const timeoutOptions = [ + { + value: "disabled" as AutoStopSilenceTimeout, + label: t("settings.advanced.autoStopSilence.options.disabled"), + }, + { + value: "sec2" as AutoStopSilenceTimeout, + label: t("settings.advanced.autoStopSilence.options.sec2"), + }, + { + value: "sec3" as AutoStopSilenceTimeout, + label: t("settings.advanced.autoStopSilence.options.sec3"), + }, + { + value: "sec5" as AutoStopSilenceTimeout, + label: t("settings.advanced.autoStopSilence.options.sec5"), + }, + { + value: "sec10" as AutoStopSilenceTimeout, + label: t("settings.advanced.autoStopSilence.options.sec10"), + }, + ]; + + const currentValue = getSetting("auto_stop_silence_timeout") ?? "disabled"; + + return ( + + + updateSetting("auto_stop_silence_timeout", value as AutoStopSilenceTimeout) + } + disabled={isUpdating("auto_stop_silence_timeout")} + /> + + ); +}; diff --git a/src/components/settings/advanced/AdvancedSettings.tsx b/src/components/settings/advanced/AdvancedSettings.tsx index 7f3bd3c6b..c88d8a815 100644 --- a/src/components/settings/advanced/AdvancedSettings.tsx +++ b/src/components/settings/advanced/AdvancedSettings.tsx @@ -9,6 +9,7 @@ import { StartHidden } from "../StartHidden"; import { AutostartToggle } from "../AutostartToggle"; import { PasteMethodSetting } from "../PasteMethod"; import { ClipboardHandlingSetting } from "../ClipboardHandling"; +import { AutoStopSilenceTimeoutSetting } from "../AutoStopSilenceTimeout"; export const AdvancedSettings: React.FC = () => { const { t } = useTranslation(); @@ -21,6 +22,10 @@ export const AdvancedSettings: React.FC = () => { + diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index c02d2df14..b7779e8c6 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -201,6 +201,17 @@ "placeholder": "Add a word", "add": "Add", "remove": "Remove {{word}}" + }, + "autoStopSilence": { + "title": "Auto-Stop on Silence", + "description": "Automatically stop recording after a period of silence following speech. Useful for hands-free operation.", + "options": { + "disabled": "Disabled", + "sec2": "After 2 seconds", + "sec3": "After 3 seconds", + "sec5": "After 5 seconds", + "sec10": "After 10 seconds" + } } }, "postProcessing": { diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index f35a6e956..f4c29a127 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -125,6 +125,8 @@ const settingUpdaters: { commands.changeAppendTrailingSpaceSetting(value as boolean), log_level: (value) => commands.setLogLevel(value as any), app_language: (value) => commands.changeAppLanguageSetting(value as string), + auto_stop_silence_timeout: (value) => + commands.changeAutoStopSilenceTimeoutSetting(value as string), }; export const useSettingsStore = create()(