Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src-tauri/src/audio/audio.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::audio::helpers::{cleanup_recordings, ensure_recordings_dir, generate_unique_wav_name};
use crate::audio::pipeline::process_recording;
use crate::audio::recorder::AudioRecorder;
use crate::audio::types::{AudioState, RecordingMode};
use crate::audio::types::{AudioState, RecordingMode, RecordingTrigger};
use crate::clipboard;
use crate::engine::transcription_engine::TranscriptionEngine;
use crate::engine::{ParakeetEngine, ParakeetModelParams};
Expand All @@ -15,6 +15,11 @@ use tauri::{AppHandle, Emitter, Manager};
pub fn record_audio(app: &AppHandle, mode: RecordingMode) {
let state = app.state::<AudioState>();
state.set_recording_mode(mode);
if state.get_recording_trigger() != RecordingTrigger::WakeWord {
state.set_recording_trigger(RecordingTrigger::Keyboard);
}

crate::wake_word::pause_listener(app);

if matches!(mode, RecordingMode::Llm | RecordingMode::Command) {
crate::llm::warmup_ollama_model_background(app);
Expand Down Expand Up @@ -127,10 +132,18 @@ pub fn stop_recording(app: &AppHandle) -> Option<std::path::PathBuf> {
overlay::hide_recording_overlay(app);
}

// Reset recording trigger and resume wake word listener
state.set_recording_trigger(RecordingTrigger::Keyboard);
crate::wake_word::resume_listener(app);

return path;
} else {
debug!("Recording stopped (no active file)");
}

state.set_recording_trigger(RecordingTrigger::Keyboard);
crate::wake_word::resume_listener(app);

None
}

Expand Down Expand Up @@ -168,6 +181,9 @@ pub fn cancel_recording(app: &AppHandle) {
overlay::hide_recording_overlay(app);
}

state.set_recording_trigger(RecordingTrigger::Keyboard);
crate::wake_word::resume_listener(app);

info!("Recording cancelled by user");
}

Expand Down
86 changes: 73 additions & 13 deletions src-tauri/src/audio/recorder.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use crate::audio::helpers::create_wav_writer;
use crate::audio::sound;
use crate::audio::types::RecordingTrigger;
use anyhow::{Context, Error, Result};
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use cpal::Device;
use hound::WavWriter;
use log::{debug, error};
use log::{debug, error, info};
use parking_lot::Mutex;
use std::fs::File;
use std::io::BufWriter;
Expand All @@ -14,6 +15,9 @@ use std::sync::Arc;
use tauri::{AppHandle, Emitter, Manager};

const MAX_RECORDING_DURATION_SECS: u64 = 300; // 5 min
const SILENCE_AUTO_STOP_MS: u64 = 1500;
const SILENCE_AUTO_STOP_THRESHOLD: f32 = 0.03;
const SILENCE_AUTO_STOP_SPEECH_THRESHOLD: f32 = 0.03;

type WavWriterType = WavWriter<BufWriter<File>>;
type SharedWriter = Arc<Mutex<Option<WavWriterType>>>;
Expand All @@ -35,6 +39,9 @@ impl AudioRecorder {
// Reset the limit flag at the start of each recording
limit_reached.store(false, Ordering::SeqCst);

let audio_state = app.state::<crate::audio::types::AudioState>();
let recording_trigger = audio_state.get_recording_trigger();

let device = Self::get_device(app.clone())?;
let config = device
.default_input_config()
Expand All @@ -49,6 +56,7 @@ impl AudioRecorder {
writer_arc.clone(),
app.clone(),
limit_reached,
recording_trigger,
)?;

Ok(Self {
Expand Down Expand Up @@ -128,17 +136,33 @@ fn build_stream(
writer: SharedWriter,
app: AppHandle,
limit_reached: Arc<AtomicBool>,
recording_trigger: RecordingTrigger,
) -> Result<cpal::Stream> {
match config.sample_format() {
cpal::SampleFormat::F32 => {
build_stream_impl::<f32>(device, config, writer, app, limit_reached.clone())
}
cpal::SampleFormat::I16 => {
build_stream_impl::<i16>(device, config, writer, app, limit_reached.clone())
}
cpal::SampleFormat::I32 => {
build_stream_impl::<i32>(device, config, writer, app, limit_reached.clone())
}
cpal::SampleFormat::F32 => build_stream_impl::<f32>(
device,
config,
writer,
app,
limit_reached.clone(),
recording_trigger,
),
cpal::SampleFormat::I16 => build_stream_impl::<i16>(
device,
config,
writer,
app,
limit_reached.clone(),
recording_trigger,
),
cpal::SampleFormat::I32 => build_stream_impl::<i32>(
device,
config,
writer,
app,
limit_reached.clone(),
recording_trigger,
),
f => Err(anyhow::anyhow!("Unsupported sample format: {:?}", f)),
}
}
Expand All @@ -149,6 +173,7 @@ fn build_stream_impl<T>(
writer: SharedWriter,
app: AppHandle,
limit_reached_flag: Arc<AtomicBool>,
recording_trigger: RecordingTrigger,
) -> Result<cpal::Stream>
where
T: cpal::Sample + cpal::SizedSample + Send + 'static,
Expand All @@ -166,6 +191,11 @@ where
let start_time = std::time::Instant::now();
let mut local_limit_triggered = false;

let is_wake_word = recording_trigger == RecordingTrigger::WakeWord;
let mut silence_start: Option<std::time::Instant> = None;
let mut silence_auto_stop_triggered = false;
let mut has_speech_started = false;

let app_handle = app.clone();
let writer_clone = writer.clone();

Expand All @@ -178,9 +208,7 @@ where
>= std::time::Duration::from_secs(MAX_RECORDING_DURATION_SECS)
{
local_limit_triggered = true;
// Set the shared atomic flag - this is the reliable cross-thread communication
limit_reached_flag.store(true, Ordering::SeqCst);
// Also emit event for UI updates
let _ = app_handle.emit("recording-limit-reached", ());
}

Expand Down Expand Up @@ -218,11 +246,43 @@ where
// EMA smoothing
ema_level = alpha * level + (1.0 - alpha) * ema_level;
let _ = app_handle.emit("mic-level", ema_level);
// also forward to overlay window if present
if let Some(overlay_window) = app_handle.get_webview_window("recording_overlay")
{
let _ = overlay_window.emit("mic-level", ema_level);
}

if is_wake_word && !silence_auto_stop_triggered {
if rms >= SILENCE_AUTO_STOP_SPEECH_THRESHOLD {
if !has_speech_started {
info!("Wake word auto-stop: speech detected (rms={:.4})", rms);
}
has_speech_started = true;
}

if has_speech_started {
if rms < SILENCE_AUTO_STOP_THRESHOLD {
if silence_start.is_none() {
silence_start = Some(std::time::Instant::now());
debug!("Wake word auto-stop: silence started (rms={:.4})", rms);
}
if let Some(start) = silence_start {
if start.elapsed()
>= std::time::Duration::from_millis(SILENCE_AUTO_STOP_MS)
{
silence_auto_stop_triggered = true;
info!("Wake word auto-stop: stopping after {}ms silence", SILENCE_AUTO_STOP_MS);
let app = app_handle.clone();
std::thread::spawn(move || {
crate::shortcuts::force_stop_recording(&app);
});
}
}
} else {
silence_start = None;
}
}
}

acc_sum_squares = 0.0;
acc_count = 0;
} else {
Expand Down
33 changes: 30 additions & 3 deletions src-tauri/src/audio/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ use crate::audio::recorder::AudioRecorder;
use crate::engine::ParakeetEngine;
use cpal::Device;
use parking_lot::Mutex;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};

pub struct AudioState {
pub recorder: Mutex<Option<AudioRecorder>>,
pub engine: Mutex<Option<ParakeetEngine>>,
pub current_file_name: Mutex<Option<String>>,
recording_mode: std::sync::atomic::AtomicU8,
recording_mode: AtomicU8,
recording_trigger: AtomicU8,
/// Flag indicating recording duration limit has been reached
pub limit_reached: std::sync::Arc<AtomicBool>,
/// Cached audio input device to avoid re-enumerating devices on each recording
Expand All @@ -34,13 +35,30 @@ impl From<u8> for RecordingMode {
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum RecordingTrigger {
Keyboard = 0,
WakeWord = 1,
}

impl From<u8> for RecordingTrigger {
fn from(val: u8) -> Self {
match val {
1 => RecordingTrigger::WakeWord,
_ => RecordingTrigger::Keyboard,
}
}
}

impl AudioState {
pub fn new() -> Self {
Self {
recorder: Mutex::new(None),
engine: Mutex::new(None),
current_file_name: Mutex::new(None),
recording_mode: std::sync::atomic::AtomicU8::new(RecordingMode::Standard as u8),
recording_mode: AtomicU8::new(RecordingMode::Standard as u8),
recording_trigger: AtomicU8::new(RecordingTrigger::Keyboard as u8),
limit_reached: std::sync::Arc::new(AtomicBool::new(false)),
cached_device: Mutex::new(None),
}
Expand All @@ -54,6 +72,15 @@ impl AudioState {
self.recording_mode.load(Ordering::SeqCst).into()
}

pub fn set_recording_trigger(&self, trigger: RecordingTrigger) {
self.recording_trigger
.store(trigger as u8, Ordering::SeqCst);
}

pub fn get_recording_trigger(&self) -> RecordingTrigger {
self.recording_trigger.load(Ordering::SeqCst).into()
}

pub fn is_limit_reached(&self) -> bool {
self.limit_reached.load(Ordering::SeqCst)
}
Expand Down
2 changes: 2 additions & 0 deletions src-tauri/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod record_mode;
pub mod settings;
pub mod shortcuts;
pub mod stats;
pub mod wake_word;

pub use clipboard::*;
pub use dictionary::*;
Expand All @@ -25,3 +26,4 @@ pub use record_mode::*;
pub use settings::*;
pub use shortcuts::*;
pub use stats::*;
pub use wake_word::*;
56 changes: 56 additions & 0 deletions src-tauri/src/commands/wake_word.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use tauri::{command, AppHandle, Manager};

#[command]
pub fn get_wake_word_enabled(app: AppHandle) -> Result<bool, String> {
let s = crate::settings::load_settings(&app);
Ok(s.wake_word_enabled)
}

#[command]
pub fn set_wake_word_enabled(app: AppHandle, enabled: bool) -> Result<(), String> {
let mut s = crate::settings::load_settings(&app);

if enabled && s.wake_word.trim().is_empty() {
return Err("Wake word cannot be empty".to_string());
}

s.wake_word_enabled = enabled;
crate::settings::save_settings(&app, &s)?;

if enabled {
crate::wake_word::start_listener(&app);
} else {
crate::wake_word::stop_listener(&app);
}

Ok(())
}

#[command]
pub fn get_wake_word(app: AppHandle) -> Result<String, String> {
let s = crate::settings::load_settings(&app);
Ok(s.wake_word)
}

#[command]
pub fn set_wake_word(app: AppHandle, word: String) -> Result<(), String> {
let trimmed = word.trim().to_string();
if trimmed.is_empty() {
return Err("Wake word cannot be empty".to_string());
}
if trimmed.len() > 50 {
return Err("Wake word is too long (max 50 characters)".to_string());
}

let mut s = crate::settings::load_settings(&app);
s.wake_word = trimmed;
crate::settings::save_settings(&app, &s)?;

let state = app.state::<crate::wake_word::types::WakeWordState>();
if state.is_active() || s.wake_word_enabled {
crate::wake_word::stop_listener(&app);
crate::wake_word::start_listener(&app);
}

Ok(())
}
17 changes: 16 additions & 1 deletion src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ mod settings;
mod shortcuts;
mod stats;
mod utils;
mod wake_word;

use crate::shortcuts::init_shortcuts;
use audio::preload_engine;
use audio::types::AudioState;
use commands::*;
use wake_word::types::WakeWordState;
use dictionary::Dictionary;
use http_api::HttpApiState;
use llm::llm::pull_ollama_model;
Expand Down Expand Up @@ -87,6 +89,7 @@ pub fn run() {
Arc::new(Model::new(app.handle().clone()).expect("Failed to initialize model"));
app.manage(model);
app.manage(AudioState::new());
app.manage(WakeWordState::new());

let mut s = settings::load_settings(app.handle());

Expand Down Expand Up @@ -134,6 +137,14 @@ pub fn run() {
crate::shortcuts::force_stop_recording(&app_handle);
});

if s.wake_word_enabled {
let app_handle = app.handle().clone();
std::thread::spawn(move || {
std::thread::sleep(std::time::Duration::from_secs(2));
wake_word::start_listener(&app_handle);
});
}

Ok(())
})
.on_window_event(|window, event| {
Expand Down Expand Up @@ -214,7 +225,11 @@ pub fn run() {
get_log_level,
set_log_level,
open_accessibility_settings,
check_accessibility_permission
check_accessibility_permission,
get_wake_word_enabled,
set_wake_word_enabled,
get_wake_word,
set_wake_word
])
.run(tauri::generate_context!())
.expect("error while running tauri application");
Expand Down
Loading