diff --git a/src-tauri/src/audio/audio.rs b/src-tauri/src/audio/audio.rs index fb2c374e..a96aa885 100644 --- a/src-tauri/src/audio/audio.rs +++ b/src-tauri/src/audio/audio.rs @@ -1,7 +1,7 @@ use crate::audio::helpers::{cleanup_recordings, ensure_recordings_dir, generate_unique_wav_name}; use crate::audio::pipeline::process_recording; use crate::audio::recorder::AudioRecorder; -use crate::audio::types::{AudioState, RecordingMode}; +use crate::audio::types::{AudioState, RecordingMode, RecordingTrigger}; use crate::clipboard; use crate::engine::transcription_engine::TranscriptionEngine; use crate::engine::{ParakeetEngine, ParakeetModelParams}; @@ -15,6 +15,11 @@ use tauri::{AppHandle, Emitter, Manager}; pub fn record_audio(app: &AppHandle, mode: RecordingMode) { let state = app.state::(); state.set_recording_mode(mode); + if state.get_recording_trigger() != RecordingTrigger::WakeWord { + state.set_recording_trigger(RecordingTrigger::Keyboard); + } + + crate::wake_word::pause_listener(app); if matches!(mode, RecordingMode::Llm | RecordingMode::Command) { crate::llm::warmup_ollama_model_background(app); @@ -127,10 +132,18 @@ pub fn stop_recording(app: &AppHandle) -> Option { overlay::hide_recording_overlay(app); } + // Reset recording trigger and resume wake word listener + state.set_recording_trigger(RecordingTrigger::Keyboard); + crate::wake_word::resume_listener(app); + return path; } else { debug!("Recording stopped (no active file)"); } + + state.set_recording_trigger(RecordingTrigger::Keyboard); + crate::wake_word::resume_listener(app); + None } @@ -168,6 +181,9 @@ pub fn cancel_recording(app: &AppHandle) { overlay::hide_recording_overlay(app); } + state.set_recording_trigger(RecordingTrigger::Keyboard); + crate::wake_word::resume_listener(app); + info!("Recording cancelled by user"); } diff --git a/src-tauri/src/audio/recorder.rs b/src-tauri/src/audio/recorder.rs index 1b179ccf..252e6300 100644 --- a/src-tauri/src/audio/recorder.rs +++ b/src-tauri/src/audio/recorder.rs @@ -1,10 +1,11 @@ use crate::audio::helpers::create_wav_writer; use crate::audio::sound; +use crate::audio::types::RecordingTrigger; use anyhow::{Context, Error, Result}; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; use cpal::Device; use hound::WavWriter; -use log::{debug, error}; +use log::{debug, error, info}; use parking_lot::Mutex; use std::fs::File; use std::io::BufWriter; @@ -14,6 +15,9 @@ use std::sync::Arc; use tauri::{AppHandle, Emitter, Manager}; const MAX_RECORDING_DURATION_SECS: u64 = 300; // 5 min +const SILENCE_AUTO_STOP_MS: u64 = 1500; +const SILENCE_AUTO_STOP_THRESHOLD: f32 = 0.03; +const SILENCE_AUTO_STOP_SPEECH_THRESHOLD: f32 = 0.03; type WavWriterType = WavWriter>; type SharedWriter = Arc>>; @@ -35,6 +39,9 @@ impl AudioRecorder { // Reset the limit flag at the start of each recording limit_reached.store(false, Ordering::SeqCst); + let audio_state = app.state::(); + let recording_trigger = audio_state.get_recording_trigger(); + let device = Self::get_device(app.clone())?; let config = device .default_input_config() @@ -49,6 +56,7 @@ impl AudioRecorder { writer_arc.clone(), app.clone(), limit_reached, + recording_trigger, )?; Ok(Self { @@ -128,17 +136,33 @@ fn build_stream( writer: SharedWriter, app: AppHandle, limit_reached: Arc, + recording_trigger: RecordingTrigger, ) -> Result { match config.sample_format() { - cpal::SampleFormat::F32 => { - build_stream_impl::(device, config, writer, app, limit_reached.clone()) - } - cpal::SampleFormat::I16 => { - build_stream_impl::(device, config, writer, app, limit_reached.clone()) - } - cpal::SampleFormat::I32 => { - build_stream_impl::(device, config, writer, app, limit_reached.clone()) - } + cpal::SampleFormat::F32 => build_stream_impl::( + device, + config, + writer, + app, + limit_reached.clone(), + recording_trigger, + ), + cpal::SampleFormat::I16 => build_stream_impl::( + device, + config, + writer, + app, + limit_reached.clone(), + recording_trigger, + ), + cpal::SampleFormat::I32 => build_stream_impl::( + device, + config, + writer, + app, + limit_reached.clone(), + recording_trigger, + ), f => Err(anyhow::anyhow!("Unsupported sample format: {:?}", f)), } } @@ -149,6 +173,7 @@ fn build_stream_impl( writer: SharedWriter, app: AppHandle, limit_reached_flag: Arc, + recording_trigger: RecordingTrigger, ) -> Result where T: cpal::Sample + cpal::SizedSample + Send + 'static, @@ -166,6 +191,11 @@ where let start_time = std::time::Instant::now(); let mut local_limit_triggered = false; + let is_wake_word = recording_trigger == RecordingTrigger::WakeWord; + let mut silence_start: Option = None; + let mut silence_auto_stop_triggered = false; + let mut has_speech_started = false; + let app_handle = app.clone(); let writer_clone = writer.clone(); @@ -178,9 +208,7 @@ where >= std::time::Duration::from_secs(MAX_RECORDING_DURATION_SECS) { local_limit_triggered = true; - // Set the shared atomic flag - this is the reliable cross-thread communication limit_reached_flag.store(true, Ordering::SeqCst); - // Also emit event for UI updates let _ = app_handle.emit("recording-limit-reached", ()); } @@ -218,11 +246,43 @@ where // EMA smoothing ema_level = alpha * level + (1.0 - alpha) * ema_level; let _ = app_handle.emit("mic-level", ema_level); - // also forward to overlay window if present if let Some(overlay_window) = app_handle.get_webview_window("recording_overlay") { let _ = overlay_window.emit("mic-level", ema_level); } + + if is_wake_word && !silence_auto_stop_triggered { + if rms >= SILENCE_AUTO_STOP_SPEECH_THRESHOLD { + if !has_speech_started { + info!("Wake word auto-stop: speech detected (rms={:.4})", rms); + } + has_speech_started = true; + } + + if has_speech_started { + if rms < SILENCE_AUTO_STOP_THRESHOLD { + if silence_start.is_none() { + silence_start = Some(std::time::Instant::now()); + debug!("Wake word auto-stop: silence started (rms={:.4})", rms); + } + if let Some(start) = silence_start { + if start.elapsed() + >= std::time::Duration::from_millis(SILENCE_AUTO_STOP_MS) + { + silence_auto_stop_triggered = true; + info!("Wake word auto-stop: stopping after {}ms silence", SILENCE_AUTO_STOP_MS); + let app = app_handle.clone(); + std::thread::spawn(move || { + crate::shortcuts::force_stop_recording(&app); + }); + } + } + } else { + silence_start = None; + } + } + } + acc_sum_squares = 0.0; acc_count = 0; } else { diff --git a/src-tauri/src/audio/types.rs b/src-tauri/src/audio/types.rs index 859a76c6..8f1dc3c0 100644 --- a/src-tauri/src/audio/types.rs +++ b/src-tauri/src/audio/types.rs @@ -2,13 +2,14 @@ use crate::audio::recorder::AudioRecorder; use crate::engine::ParakeetEngine; use cpal::Device; use parking_lot::Mutex; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; pub struct AudioState { pub recorder: Mutex>, pub engine: Mutex>, pub current_file_name: Mutex>, - recording_mode: std::sync::atomic::AtomicU8, + recording_mode: AtomicU8, + recording_trigger: AtomicU8, /// Flag indicating recording duration limit has been reached pub limit_reached: std::sync::Arc, /// Cached audio input device to avoid re-enumerating devices on each recording @@ -34,13 +35,30 @@ impl From for RecordingMode { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum RecordingTrigger { + Keyboard = 0, + WakeWord = 1, +} + +impl From for RecordingTrigger { + fn from(val: u8) -> Self { + match val { + 1 => RecordingTrigger::WakeWord, + _ => RecordingTrigger::Keyboard, + } + } +} + impl AudioState { pub fn new() -> Self { Self { recorder: Mutex::new(None), engine: Mutex::new(None), current_file_name: Mutex::new(None), - recording_mode: std::sync::atomic::AtomicU8::new(RecordingMode::Standard as u8), + recording_mode: AtomicU8::new(RecordingMode::Standard as u8), + recording_trigger: AtomicU8::new(RecordingTrigger::Keyboard as u8), limit_reached: std::sync::Arc::new(AtomicBool::new(false)), cached_device: Mutex::new(None), } @@ -54,6 +72,15 @@ impl AudioState { self.recording_mode.load(Ordering::SeqCst).into() } + pub fn set_recording_trigger(&self, trigger: RecordingTrigger) { + self.recording_trigger + .store(trigger as u8, Ordering::SeqCst); + } + + pub fn get_recording_trigger(&self) -> RecordingTrigger { + self.recording_trigger.load(Ordering::SeqCst).into() + } + pub fn is_limit_reached(&self) -> bool { self.limit_reached.load(Ordering::SeqCst) } diff --git a/src-tauri/src/commands/mod.rs b/src-tauri/src/commands/mod.rs index eae7cf45..6fc9e20f 100644 --- a/src-tauri/src/commands/mod.rs +++ b/src-tauri/src/commands/mod.rs @@ -11,6 +11,7 @@ pub mod record_mode; pub mod settings; pub mod shortcuts; pub mod stats; +pub mod wake_word; pub use clipboard::*; pub use dictionary::*; @@ -25,3 +26,4 @@ pub use record_mode::*; pub use settings::*; pub use shortcuts::*; pub use stats::*; +pub use wake_word::*; diff --git a/src-tauri/src/commands/wake_word.rs b/src-tauri/src/commands/wake_word.rs new file mode 100644 index 00000000..0391c1bf --- /dev/null +++ b/src-tauri/src/commands/wake_word.rs @@ -0,0 +1,56 @@ +use tauri::{command, AppHandle, Manager}; + +#[command] +pub fn get_wake_word_enabled(app: AppHandle) -> Result { + let s = crate::settings::load_settings(&app); + Ok(s.wake_word_enabled) +} + +#[command] +pub fn set_wake_word_enabled(app: AppHandle, enabled: bool) -> Result<(), String> { + let mut s = crate::settings::load_settings(&app); + + if enabled && s.wake_word.trim().is_empty() { + return Err("Wake word cannot be empty".to_string()); + } + + s.wake_word_enabled = enabled; + crate::settings::save_settings(&app, &s)?; + + if enabled { + crate::wake_word::start_listener(&app); + } else { + crate::wake_word::stop_listener(&app); + } + + Ok(()) +} + +#[command] +pub fn get_wake_word(app: AppHandle) -> Result { + let s = crate::settings::load_settings(&app); + Ok(s.wake_word) +} + +#[command] +pub fn set_wake_word(app: AppHandle, word: String) -> Result<(), String> { + let trimmed = word.trim().to_string(); + if trimmed.is_empty() { + return Err("Wake word cannot be empty".to_string()); + } + if trimmed.len() > 50 { + return Err("Wake word is too long (max 50 characters)".to_string()); + } + + let mut s = crate::settings::load_settings(&app); + s.wake_word = trimmed; + crate::settings::save_settings(&app, &s)?; + + let state = app.state::(); + if state.is_active() || s.wake_word_enabled { + crate::wake_word::stop_listener(&app); + crate::wake_word::start_listener(&app); + } + + Ok(()) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 6fbf453a..ff78c7ca 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -16,11 +16,13 @@ mod settings; mod shortcuts; mod stats; mod utils; +mod wake_word; use crate::shortcuts::init_shortcuts; use audio::preload_engine; use audio::types::AudioState; use commands::*; +use wake_word::types::WakeWordState; use dictionary::Dictionary; use http_api::HttpApiState; use llm::llm::pull_ollama_model; @@ -87,6 +89,7 @@ pub fn run() { Arc::new(Model::new(app.handle().clone()).expect("Failed to initialize model")); app.manage(model); app.manage(AudioState::new()); + app.manage(WakeWordState::new()); let mut s = settings::load_settings(app.handle()); @@ -134,6 +137,14 @@ pub fn run() { crate::shortcuts::force_stop_recording(&app_handle); }); + if s.wake_word_enabled { + let app_handle = app.handle().clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_secs(2)); + wake_word::start_listener(&app_handle); + }); + } + Ok(()) }) .on_window_event(|window, event| { @@ -214,7 +225,11 @@ pub fn run() { get_log_level, set_log_level, open_accessibility_settings, - check_accessibility_permission + check_accessibility_permission, + get_wake_word_enabled, + set_wake_word_enabled, + get_wake_word, + set_wake_word ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src-tauri/src/overlay/overlay.rs b/src-tauri/src/overlay/overlay.rs index 8e7b00c0..50329f03 100644 --- a/src-tauri/src/overlay/overlay.rs +++ b/src-tauri/src/overlay/overlay.rs @@ -123,8 +123,6 @@ pub fn show_recording_overlay(app_handle: &AppHandle) { ensure_overlay(app_handle); if let Some(window) = app_handle.get_webview_window("recording_overlay") { update_overlay_position(app_handle); - // Temporarily remove always_on_top before show to prevent focus steal - // then re-set it to keep the overlay above all windows. let _ = window.set_always_on_top(false); let _ = window.show(); let _ = window.set_always_on_top(true); diff --git a/src-tauri/src/settings/types.rs b/src-tauri/src/settings/types.rs index f3403d62..2cb59c13 100644 --- a/src-tauri/src/settings/types.rs +++ b/src-tauri/src/settings/types.rs @@ -4,6 +4,10 @@ fn default_cancel_shortcut() -> String { "escape".to_string() } +fn default_wake_word() -> String { + "murmure".to_string() +} + #[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)] pub enum PasteMethod { #[default] @@ -57,6 +61,10 @@ pub struct AppSettings { pub cancel_shortcut: String, // Shortcut to cancel active recording pub mic_id: Option, // Optional microphone device ID pub log_level: String, // "info" | "debug" | "trace" | "warn" | "error" + #[serde(default)] + pub wake_word_enabled: bool, + #[serde(default = "default_wake_word")] + pub wake_word: String, } impl Default for AppSettings { @@ -85,6 +93,8 @@ impl Default for AppSettings { cancel_shortcut: "escape".to_string(), mic_id: None, log_level: "info".to_string(), + wake_word_enabled: false, + wake_word: default_wake_word(), } } } diff --git a/src-tauri/src/wake_word/mod.rs b/src-tauri/src/wake_word/mod.rs new file mode 100644 index 00000000..1b567376 --- /dev/null +++ b/src-tauri/src/wake_word/mod.rs @@ -0,0 +1,4 @@ +pub mod types; +pub mod wake_word; + +pub use wake_word::*; diff --git a/src-tauri/src/wake_word/types.rs b/src-tauri/src/wake_word/types.rs new file mode 100644 index 00000000..896beaea --- /dev/null +++ b/src-tauri/src/wake_word/types.rs @@ -0,0 +1,26 @@ +use parking_lot::Mutex; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +pub struct WakeWordState { + /// Whether the wake word listener is currently running + pub active: Arc, + /// Signal to stop the listener thread + pub stop_signal: Arc, + /// Handle to the listener thread (for cleanup) + pub thread_handle: Mutex>>, +} + +impl WakeWordState { + pub fn new() -> Self { + Self { + active: Arc::new(AtomicBool::new(false)), + stop_signal: Arc::new(AtomicBool::new(false)), + thread_handle: Mutex::new(None), + } + } + + pub fn is_active(&self) -> bool { + self.active.load(Ordering::SeqCst) + } +} diff --git a/src-tauri/src/wake_word/wake_word.rs b/src-tauri/src/wake_word/wake_word.rs new file mode 100644 index 00000000..7fd433f1 --- /dev/null +++ b/src-tauri/src/wake_word/wake_word.rs @@ -0,0 +1,408 @@ +use crate::audio::helpers::resample_linear; +use crate::audio::types::{AudioState, RecordingMode, RecordingTrigger}; +use crate::engine::transcription_engine::TranscriptionEngine; +use crate::engine::ParakeetModelParams; +use crate::shortcuts::types::{recording_state, RecordingSource}; +use crate::wake_word::types::WakeWordState; +use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; +use log::{debug, error, info, warn}; +use std::collections::VecDeque; +use std::sync::atomic::Ordering; +use std::sync::mpsc; +use std::sync::Arc; +use tauri::{AppHandle, Emitter, Manager}; + +const SPEECH_THRESHOLD: f32 = 0.015; +const SILENCE_THRESHOLD: f32 = 0.01; +const SPEECH_START_DELAY_MS: u64 = 200; +const SPEECH_END_DELAY_MS: u64 = 500; +const MAX_SEGMENT_DURATION_S: f32 = 5.0; +/// Must be > SPEECH_START_DELAY_MS to avoid clipping the onset of speech. +const PRE_BUFFER_DURATION_MS: f32 = 400.0; + +pub fn start_listener(app: &AppHandle) { + let state = app.state::(); + + if state.is_active() { + debug!("Wake word listener already active"); + return; + } + + let settings = crate::settings::load_settings(app); + if settings.wake_word.trim().is_empty() { + warn!("Wake word is empty, cannot start listener"); + return; + } + + let wake_word = settings.wake_word.to_lowercase(); + let stop_signal = state.stop_signal.clone(); + let active = state.active.clone(); + + stop_signal.store(false, Ordering::SeqCst); + + let app_handle = app.clone(); + + let handle = std::thread::spawn(move || { + if let Err(e) = listener_loop(&app_handle, &wake_word, &stop_signal, &active) { + error!("Wake word listener error: {}", e); + } + active.store(false, Ordering::SeqCst); + info!("Wake word listener thread exited"); + }); + + *state.thread_handle.lock() = Some(handle); + + let _ = app.emit("wake-word-listening", true); + info!("Wake word listener started"); +} + +pub fn stop_listener(app: &AppHandle) { + let state = app.state::(); + + if !state.is_active() { + debug!("Wake word listener already inactive"); + state.stop_signal.store(true, Ordering::SeqCst); + return; + } + + state.stop_signal.store(true, Ordering::SeqCst); + + let handle = state.thread_handle.lock().take(); + if let Some(h) = handle { + let _ = h.join(); + } + + let _ = app.emit("wake-word-listening", false); + info!("Wake word listener stopped"); +} + +pub fn pause_listener(app: &AppHandle) { + let state = app.state::(); + if state.is_active() { + debug!("Pausing wake word listener (non-blocking)"); + state.stop_signal.store(true, Ordering::SeqCst); + state.active.store(false, Ordering::SeqCst); + let _ = state.thread_handle.lock().take(); + let _ = app.emit("wake-word-listening", false); + } +} + +pub fn resume_listener(app: &AppHandle) { + let settings = crate::settings::load_settings(app); + if settings.wake_word_enabled { + debug!("Resuming wake word listener"); + start_listener(app); + } +} + +fn listener_loop( + app: &AppHandle, + wake_word: &str, + stop_signal: &Arc, + active: &Arc, +) -> anyhow::Result<()> { + let device = get_device(app)?; + let config = device + .default_input_config() + .map_err(|e| anyhow::anyhow!("No input config: {}", e))?; + + let sample_rate = config.sample_rate() as usize; + let channels = config.channels() as usize; + + let (tx, rx) = mpsc::channel::>(); + + let stop = stop_signal.clone(); + + let max_samples = (MAX_SEGMENT_DURATION_S * sample_rate as f32) as usize; + let pre_buffer_capacity = (PRE_BUFFER_DURATION_MS / 1000.0 * sample_rate as f32) as usize; + + let mut vad_state = VadState::new(max_samples, pre_buffer_capacity); + + let tx_clone = tx.clone(); + let stop_clone = stop.clone(); + + let stream = match config.sample_format() { + cpal::SampleFormat::F32 => device.build_input_stream( + &config.clone().into(), + move |data: &[f32], _: &cpal::InputCallbackInfo| { + if stop_clone.load(Ordering::SeqCst) { + return; + } + process_audio_callback(data, channels, &mut vad_state, &tx_clone); + }, + |err| error!("Wake word stream error: {}", err), + None, + )?, + cpal::SampleFormat::I16 => { + let mut vad_state_i16 = VadState::new(max_samples, pre_buffer_capacity); + let tx_i16 = tx.clone(); + let stop_i16 = stop.clone(); + + device.build_input_stream( + &config.clone().into(), + move |data: &[i16], _: &cpal::InputCallbackInfo| { + if stop_i16.load(Ordering::SeqCst) { + return; + } + let f32_data: Vec = + data.iter().map(|&s| s as f32 / i16::MAX as f32).collect(); + process_audio_callback(&f32_data, channels, &mut vad_state_i16, &tx_i16); + }, + |err| error!("Wake word stream error: {}", err), + None, + )? + } + f => return Err(anyhow::anyhow!("Unsupported sample format: {:?}", f)), + }; + + stream + .play() + .map_err(|e| anyhow::anyhow!("Failed to start wake word stream: {}", e))?; + + active.store(true, Ordering::SeqCst); + info!("Wake word listener loop running (sample_rate={})", sample_rate); + + loop { + if stop_signal.load(Ordering::SeqCst) { + break; + } + + match rx.recv_timeout(std::time::Duration::from_millis(200)) { + Ok(segment) => { + if stop_signal.load(Ordering::SeqCst) { + break; + } + + let samples_16k = if sample_rate != 16000 { + resample_linear(&segment, sample_rate, 16000) + } else { + segment + }; + + if samples_16k.len() < 1600 { + continue; + } + + match transcribe_segment(app, samples_16k) { + Ok(text) => { + let text_lower = text.to_lowercase(); + debug!("Wake word segment transcription: \"{}\"", text_lower); + + if text_lower.contains(wake_word) { + info!("Wake word detected: \"{}\"", text); + let _ = app.emit("wake-word-detected", ()); + + drop(stream); + active.store(false, Ordering::SeqCst); + + trigger_recording(app); + return Ok(()); + } + } + Err(e) => { + warn!("Wake word transcription failed: {}", e); + } + } + } + Err(mpsc::RecvTimeoutError::Timeout) => {} + Err(mpsc::RecvTimeoutError::Disconnected) => { + break; + } + } + } + + drop(stream); + Ok(()) +} + +struct VadState { + buffer: Vec, + max_samples: usize, + pre_buffer: VecDeque, + pre_buffer_capacity: usize, + speech_active: bool, + speech_start_time: Option, + silence_start_time: Option, + acc_sum_squares: f32, + acc_count: usize, + last_check: std::time::Instant, +} + +impl VadState { + fn new(max_samples: usize, pre_buffer_capacity: usize) -> Self { + Self { + buffer: Vec::with_capacity(max_samples), + max_samples, + pre_buffer: VecDeque::with_capacity(pre_buffer_capacity), + pre_buffer_capacity, + speech_active: false, + speech_start_time: None, + silence_start_time: None, + acc_sum_squares: 0.0, + acc_count: 0, + last_check: std::time::Instant::now(), + } + } +} + +fn process_audio_callback( + data: &[f32], + channels: usize, + state: &mut VadState, + tx: &mpsc::Sender>, +) { + for frame in data.chunks_exact(channels) { + let sample = if channels == 1 { + frame[0] + } else { + frame.iter().sum::() / channels as f32 + }; + + state.acc_sum_squares += sample * sample; + state.acc_count += 1; + + if state.speech_active { + if state.buffer.len() < state.max_samples { + state.buffer.push(sample); + } + } else { + if state.pre_buffer.len() >= state.pre_buffer_capacity { + state.pre_buffer.pop_front(); + } + state.pre_buffer.push_back(sample); + } + } + + if state.last_check.elapsed() < std::time::Duration::from_millis(33) { + return; + } + state.last_check = std::time::Instant::now(); + + if state.acc_count == 0 { + return; + } + + let rms = (state.acc_sum_squares / state.acc_count as f32).sqrt(); + state.acc_sum_squares = 0.0; + state.acc_count = 0; + + if !state.speech_active { + if rms > SPEECH_THRESHOLD { + match state.speech_start_time { + Some(start) => { + if start.elapsed() + >= std::time::Duration::from_millis(SPEECH_START_DELAY_MS) + { + state.speech_active = true; + state.silence_start_time = None; + + state.buffer.clear(); + state.buffer.extend(state.pre_buffer.drain(..)); + debug!( + "Wake word VAD: speech started (pre-buffer: {} samples)", + state.buffer.len() + ); + } + } + None => { + state.speech_start_time = Some(std::time::Instant::now()); + } + } + } else { + state.speech_start_time = None; + } + } else { + if rms < SILENCE_THRESHOLD { + match state.silence_start_time { + Some(start) => { + if start.elapsed() + >= std::time::Duration::from_millis(SPEECH_END_DELAY_MS) + { + let segment = std::mem::take(&mut state.buffer); + state.speech_active = false; + state.silence_start_time = None; + state.speech_start_time = None; + + if !segment.is_empty() { + let _ = tx.send(segment); + } + } + } + None => { + state.silence_start_time = Some(std::time::Instant::now()); + } + } + } else { + state.silence_start_time = None; + } + + if state.buffer.len() >= state.max_samples { + let segment = std::mem::take(&mut state.buffer); + state.speech_active = false; + state.silence_start_time = None; + state.speech_start_time = None; + + if !segment.is_empty() { + let _ = tx.send(segment); + } + } + } +} + +fn transcribe_segment(app: &AppHandle, samples: Vec) -> anyhow::Result { + let audio_state = app.state::(); + + { + let mut engine_guard = audio_state.engine.lock(); + if engine_guard.is_none() { + let model = app.state::>(); + let model_path = model + .get_model_path() + .map_err(|e| anyhow::anyhow!("Failed to get model path: {}", e))?; + + let mut new_engine = crate::engine::ParakeetEngine::new(); + new_engine + .load_model_with_params(&model_path, ParakeetModelParams::int8()) + .map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?; + + *engine_guard = Some(new_engine); + info!("Model loaded for wake word detection"); + } + } + + let mut engine_guard = audio_state.engine.lock(); + let engine = engine_guard + .as_mut() + .ok_or_else(|| anyhow::anyhow!("Engine not loaded"))?; + + let result = engine + .transcribe_samples(samples, None) + .map_err(|e| anyhow::anyhow!("Transcription failed: {}", e))?; + + Ok(result.text) +} + +fn trigger_recording(app: &AppHandle) { + let audio_state = app.state::(); + audio_state.set_recording_trigger(RecordingTrigger::WakeWord); + + crate::onboarding::onboarding::capture_focus_at_record_start(app); + crate::audio::record_audio(app, RecordingMode::Standard); + + let mut source = recording_state().source.lock(); + *source = RecordingSource::Standard; + + info!("Recording triggered by wake word"); +} + +fn get_device(app: &AppHandle) -> anyhow::Result { + let audio_state = app.state::(); + + if let Some(device) = audio_state.get_cached_device() { + return Ok(device); + } + + let host = cpal::default_host(); + host.default_input_device() + .ok_or_else(|| anyhow::anyhow!("No default input device available")) +} diff --git a/src/features/settings/system/system.tsx b/src/features/settings/system/system.tsx index 3b8df724..c959b5c6 100644 --- a/src/features/settings/system/system.tsx +++ b/src/features/settings/system/system.tsx @@ -12,6 +12,7 @@ import { SoundSettings } from './sound-settings/sound-settings'; import { MicSettings } from './mic-settings/mic-settings'; import { useTranslation } from '@/i18n'; import { RecordModeSettings } from '@/features/settings/system/record-mode-settings/record-mode-settings.tsx'; +import { WakeWordSettings } from '@/features/settings/system/wake-word-settings/wake-word-settings'; import { LogLevelSettings } from './log-level-settings/log-level-settings'; @@ -39,6 +40,8 @@ export const System = () => { + + diff --git a/src/features/settings/system/wake-word-settings/wake-word-settings.tsx b/src/features/settings/system/wake-word-settings/wake-word-settings.tsx new file mode 100644 index 00000000..40515037 --- /dev/null +++ b/src/features/settings/system/wake-word-settings/wake-word-settings.tsx @@ -0,0 +1,89 @@ +import { invoke } from '@tauri-apps/api/core'; +import { useEffect, useRef, useState } from 'react'; +import { SettingsUI } from '@/components/settings-ui'; +import { Switch } from '@/components/switch'; +import { Typography } from '@/components/typography'; +import { Input } from '@/components/input'; +import { Mic } from 'lucide-react'; +import { useTranslation } from '@/i18n'; + +export function WakeWordSettings() { + const [enabled, setEnabled] = useState(false); + const [wakeWord, setWakeWord] = useState('murmure'); + const [previousWord, setPreviousWord] = useState('murmure'); + const { t } = useTranslation(); + const debounceRef = useRef | null>(null); + + useEffect(() => { + invoke('get_wake_word_enabled').then(setEnabled); + invoke('get_wake_word').then((word) => { + setWakeWord(word); + setPreviousWord(word); + }); + }, []); + + const handleToggle = (checked: boolean) => { + if (checked && wakeWord.trim().length === 0) { + return; + } + setEnabled(checked); + invoke('set_wake_word_enabled', { enabled: checked }); + }; + + const handleWordChange = (value: string) => { + setWakeWord(value); + + if (debounceRef.current != null) { + clearTimeout(debounceRef.current); + } + + debounceRef.current = setTimeout(() => { + const trimmed = value.trim(); + if (trimmed.length > 0) { + invoke('set_wake_word', { word: trimmed }); + setPreviousWord(trimmed); + } + }, 500); + }; + + const handleBlur = () => { + if (wakeWord.trim().length === 0) { + setWakeWord(previousWord); + } + }; + + return ( + +
+ + + + {t('Wake Word')} + + + {t( + 'Trigger recording by saying a wake word. Recording stops automatically after silence.' + )} + + + +
+ {enabled && ( +
+ handleWordChange(e.target.value)} + onBlur={handleBlur} + placeholder="murmure" + maxLength={50} + /> + + {t( + 'Experimental — Keeps your microphone listening continuously when enabled.' + )} + +
+ )} +
+ ); +} diff --git a/src/i18n/locales/fr.json b/src/i18n/locales/fr.json index c72b5494..bf12f67a 100644 --- a/src/i18n/locales/fr.json +++ b/src/i18n/locales/fr.json @@ -303,5 +303,8 @@ "You are all set!": "Vous êtes prêt !", "You can customize the prompt or create new modes on the next screen.": "Vous pouvez personnaliser le prompt ou créer de nouveaux modes sur l'écran suivant.", "Your average words per minute with Murmure this month.": "Votre moyenne de mots par minute avec Murmure ce mois-ci.", - "Zero tracking, zero analytics. Your data stays yours, always.": "Zéro suivi, zéro analyse. Vos données restent les vôtres, toujours." + "Zero tracking, zero analytics. Your data stays yours, always.": "Zéro suivi, zéro analyse. Vos données restent les vôtres, toujours.", + "Wake Word": "Mot d'activation", + "Trigger recording by saying a wake word. Recording stops automatically after silence.": "Déclenchez l'enregistrement en disant un mot. L'enregistrement s'arrête automatiquement après un silence.", + "Experimental — Keeps your microphone listening continuously when enabled.": "Expérimental — Le microphone reste en écoute continue quand activé." }