Skip to content

Commit 79616ae

Browse files
committed
feat: adds cancel recording function
1 parent b19cff0 commit 79616ae

File tree

5 files changed

+138
-5
lines changed

5 files changed

+138
-5
lines changed

lib/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def main():
146146
record_start_parser.add_argument('--lang', dest='language', metavar='CODE',
147147
help='Language code for transcription (e.g., en, it, de)')
148148
record_subparsers.add_parser('stop', help='Stop recording')
149+
record_subparsers.add_parser('cancel', help='Cancel recording and discard audio')
149150
record_toggle_parser = record_subparsers.add_parser('toggle', help='Toggle recording on/off')
150151
record_toggle_parser.add_argument('--lang', dest='language', metavar='CODE',
151152
help='Language code for transcription (e.g., en, it, de)')

lib/main.py

Lines changed: 121 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def __init__(self):
8686
self.text_injector = TextInjector(self.config)
8787
self.global_shortcuts = None
8888
self.secondary_shortcuts = None
89+
self._cancel_shortcuts = None
8990

9091
# Application state
9192
self.is_recording = False
@@ -310,6 +311,27 @@ def _setup_global_shortcuts(self):
310311
print(f"[ERROR] Failed to initialize secondary shortcuts: {e}", flush=True)
311312
self.secondary_shortcuts = None
312313

314+
# Set up cancel shortcut if configured
315+
try:
316+
cancel_shortcut_key = self.config.get_setting("cancel_shortcut", None)
317+
if cancel_shortcut_key:
318+
self._cancel_shortcuts = GlobalShortcuts(
319+
cancel_shortcut_key,
320+
self._on_cancel_shortcut_triggered,
321+
None, # No release callback
322+
device_path=selected_device_path,
323+
device_name=selected_device_name,
324+
grab_keys=grab_keys,
325+
)
326+
if self._cancel_shortcuts.start():
327+
print(f"[INFO] Cancel shortcut registered: {cancel_shortcut_key}", flush=True)
328+
else:
329+
print(f"[WARNING] Failed to start cancel shortcut: {cancel_shortcut_key}", flush=True)
330+
self._cancel_shortcuts = None
331+
except Exception as e:
332+
print(f"[ERROR] Failed to initialize cancel shortcut: {e}", flush=True)
333+
self._cancel_shortcuts = None
334+
313335
# Set up submit shortcut for long-form mode
314336
if recording_mode == 'long_form':
315337
try:
@@ -627,6 +649,16 @@ def _on_secondary_shortcut_triggered(self):
627649
# Secondary release is identical to primary release - reuse the same handler
628650
_on_secondary_shortcut_released = _on_shortcut_released
629651

652+
def _on_cancel_shortcut_triggered(self):
653+
"""Handle cancel shortcut trigger - discard recording without transcribing"""
654+
recording_mode = self.config.get_setting("recording_mode", "toggle")
655+
if recording_mode == "long_form":
656+
self._ensure_longform_initialized()
657+
with self._longform_lock:
658+
self._cancel_longform_recording()
659+
else:
660+
self._cancel_recording()
661+
630662
# Long-form recording mode handlers
631663
def _ensure_longform_initialized(self):
632664
"""Ensure long-form segment manager is initialized (lazy initialization)"""
@@ -755,6 +787,34 @@ def _longform_resume_recording(self):
755787
# Play start sound
756788
self.audio_manager.play_start_sound()
757789

790+
def _cancel_longform_recording(self):
791+
"""Cancel long-form recording session and discard all segments"""
792+
if self._longform_state not in ('RECORDING', 'PAUSED'):
793+
return
794+
795+
print("[LONGFORM] Recording cancelled (discarded)", flush=True)
796+
797+
try:
798+
self._stop_longform_auto_save_timer()
799+
self.audio_capture.stop_recording()
800+
self._longform_segment_manager.clear_session()
801+
self._longform_error_audio = None
802+
self._longform_language_override = None
803+
self._longform_state = 'IDLE'
804+
self._write_longform_state('IDLE')
805+
self._hide_mic_osd()
806+
self._write_recording_status(False)
807+
self.audio_manager.play_error_sound()
808+
except Exception as e:
809+
print(f"[ERROR] Error cancelling long-form recording: {e}", flush=True)
810+
try:
811+
self._longform_state = 'IDLE'
812+
self._write_longform_state('IDLE')
813+
self._hide_mic_osd()
814+
self._write_recording_status(False)
815+
except Exception:
816+
pass # Best effort cleanup
817+
758818
def _longform_submit(self, retry=False):
759819
"""Submit all accumulated segments for transcription"""
760820
print("[LONGFORM] Submitting for transcription")
@@ -1134,6 +1194,47 @@ def _cancel_recording_muted(self):
11341194
except Exception:
11351195
pass # Best effort cleanup
11361196

1197+
def _cancel_recording(self):
1198+
"""Cancel recording and discard audio without transcribing or injecting text"""
1199+
with self._recording_lock:
1200+
if not self.is_recording:
1201+
return
1202+
self.is_recording = False
1203+
self._current_language_override = None
1204+
1205+
print("Recording cancelled (discarded)", flush=True)
1206+
1207+
try:
1208+
self._hide_mic_osd()
1209+
self._stop_audio_level_monitoring()
1210+
self._write_recording_status(False)
1211+
1212+
# Restore audio if it was ducked
1213+
if self.audio_ducker.is_ducked:
1214+
self.audio_ducker.restore()
1215+
1216+
# Stop capture and discard the audio data
1217+
self.audio_capture.stop_recording()
1218+
1219+
# Close WebSocket if using realtime-ws backend (no transcription needed)
1220+
backend = normalize_backend(self.config.get_setting('transcription_backend', 'pywhispercpp'))
1221+
if backend == 'realtime-ws' and self.whisper_manager._realtime_client:
1222+
self.whisper_manager._cleanup_realtime_client()
1223+
1224+
self.audio_manager.play_error_sound()
1225+
except Exception as e:
1226+
print(f"[ERROR] Error cancelling recording: {e}", flush=True)
1227+
# Ensure cleanup even if error occurs (is_recording already False from above)
1228+
try:
1229+
self._hide_mic_osd()
1230+
self._stop_audio_level_monitoring()
1231+
self._write_recording_status(False)
1232+
if self.audio_ducker.is_ducked:
1233+
self.audio_ducker.restore()
1234+
self.audio_capture.stop_recording()
1235+
except Exception:
1236+
pass # Best effort cleanup
1237+
11371238
def _stop_recording(self):
11381239
"""Stop voice recording and process audio"""
11391240
if not self.is_recording:
@@ -1605,8 +1706,8 @@ def _recording_control_listener(self):
16051706
# Handle multiple commands written to FIFO before read
16061707
# (e.g., user clicks rapidly during timeout - "start\nstart")
16071708
# Take only the last valid command (most recent intent)
1608-
# Commands can be: 'start', 'start:lang', 'stop', 'submit'
1609-
valid_base_commands = {'start', 'stop', 'submit'}
1709+
# Commands can be: 'start', 'start:lang', 'stop', 'cancel', 'submit'
1710+
valid_base_commands = {'start', 'stop', 'cancel', 'submit'}
16101711
lines = [line.strip() for line in raw_data.splitlines() if line.strip()]
16111712

16121713
# Parse commands - extract base command and optional language
@@ -1673,6 +1774,20 @@ def _recording_control_listener(self):
16731774
self._stop_recording()
16741775
else:
16751776
print("[CONTROL] Not currently recording, ignoring stop request", flush=True)
1777+
elif action == "cancel":
1778+
if recording_mode == "long_form":
1779+
self._ensure_longform_initialized()
1780+
with self._longform_lock:
1781+
if self._longform_state in ('RECORDING', 'PAUSED'):
1782+
print("[CONTROL] Long-form cancel requested (immediate)", flush=True)
1783+
self._cancel_longform_recording()
1784+
else:
1785+
print(f"[CONTROL] Long-form in {self._longform_state} state, ignoring cancel request", flush=True)
1786+
elif self.is_recording:
1787+
print("[CONTROL] Recording cancel requested (immediate)", flush=True)
1788+
self._cancel_recording()
1789+
else:
1790+
print("[CONTROL] Not currently recording, ignoring cancel request", flush=True)
16761791
elif action == "submit":
16771792
# Submit command for long-form mode submit shortcut
16781793
if recording_mode == "long_form":
@@ -2091,6 +2206,10 @@ def _cleanup(self):
20912206
if self.secondary_shortcuts:
20922207
self.secondary_shortcuts.stop()
20932208

2209+
# Stop cancel shortcut
2210+
if self._cancel_shortcuts:
2211+
self._cancel_shortcuts.stop()
2212+
20942213
# Stop audio capture
20952214
if self.is_recording:
20962215
self.audio_capture.stop_recording()

lib/src/cli_commands.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4383,14 +4383,14 @@ def record_command(action: str, language: str = None):
43834383
"""
43844384
Control recording via CLI - useful when keyboard grab is not possible.
43854385
4386-
This writes to the recording control FIFO to trigger start/stop/toggle
4386+
This writes to the recording control FIFO to trigger start/stop/cancel/toggle
43874387
without requiring keyboard grab. Useful for users with:
43884388
- External hotkey systems (KDE, GNOME, sxhkd, etc.)
43894389
- Keyboard remappers that grab devices (Espanso, keyd, kmonad)
43904390
- Multiple keyboard tools that conflict with grab_keys
43914391
43924392
Args:
4393-
action: The action to perform (start, stop, toggle, status)
4393+
action: The action to perform (start, stop, cancel, toggle, status)
43944394
language: Optional language code for transcription (e.g., 'en', 'it', 'de')
43954395
"""
43964396
import stat
@@ -4473,6 +4473,13 @@ def send_control(command: str) -> bool:
44734473
if send_control('stop'):
44744474
log_success("Recording stopped")
44754475

4476+
elif action == 'cancel':
4477+
if not is_recording():
4478+
log_warning("Not currently recording")
4479+
return
4480+
if send_control('cancel'):
4481+
log_success("Recording cancelled (audio discarded)")
4482+
44764483
elif action == 'toggle':
44774484
if is_recording():
44784485
if send_control('stop'):
@@ -4490,5 +4497,5 @@ def send_control(command: str) -> bool:
44904497

44914498
else:
44924499
log_error(f"Unknown action: {action}")
4493-
log_info("Available actions: start, stop, toggle, status")
4500+
log_info("Available actions: start, stop, cancel, toggle, status")
44944501

lib/src/config_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def __init__(self):
2525
'primary_shortcut': 'SUPER+ALT+D',
2626
'secondary_shortcut': None, # Optional secondary hotkey for language-specific recording (e.g., "SUPER+ALT+I")
2727
'secondary_language': None, # Language code for secondary shortcut (e.g., "it", "en", "fr", etc.)
28+
'cancel_shortcut': None, # Optional shortcut to cancel recording and discard audio (e.g., "SUPER+ESCAPE")
2829
'recording_mode': 'toggle', # 'toggle' | 'push_to_talk' | 'auto' (hybrid tap/hold)
2930
'grab_keys': False, # Exclusive keyboard grab (false = safer, true = suppress shortcut from other apps)
3031
'use_hypr_bindings': False, # Use Hyprland compositor bindings instead of evdev (disables GlobalShortcuts)

share/config.schema.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
"default": null,
2626
"description": "Language code for secondary shortcut (e.g., 'it', 'en', 'fr')"
2727
},
28+
"cancel_shortcut": {
29+
"type": ["string", "null"],
30+
"default": null,
31+
"description": "Optional shortcut to cancel recording and discard audio without transcribing (e.g., 'SUPER+ESCAPE')"
32+
},
2833
"recording_mode": {
2934
"type": "string",
3035
"enum": ["toggle", "push_to_talk", "auto"],

0 commit comments

Comments
 (0)