22#
33# SPDX-License-Identifier: MIT
44
5- from queue import Queue
65import time
76
87import speech_recognition as sr
98
10-
119class Listener :
1210 def __init__ (
13- self , api_key , energy_threshold = 300 , phrase_timeout = 3.0 , record_timeout = 30
11+ self , api_key , energy_threshold = 300 , record_timeout = 30
1412 ):
1513 self .listener_handle = None
1614 self .microphone = sr .Microphone ()
1715 self .recognizer = sr .Recognizer ()
1816 self .recognizer .energy_threshold = energy_threshold
1917 self .recognizer .dynamic_energy_threshold = False
2018 self .recognizer .pause_threshold = 1
21- self .last_sample = bytes ()
2219 self .phrase_time = time .monotonic ()
23- self .phrase_timeout = phrase_timeout
2420 with self .microphone as source :
2521 self .recognizer .adjust_for_ambient_noise (
2622 source
2723 ) # we only need to calibrate once, before we start listening
2824 self .record_timeout = record_timeout
29- self .phrase_complete = False
30- self .data_queue = Queue ()
25+ self ._audio = None
3126 self .listener_handle = None
3227 self .api_key = api_key
3328
3429 def listen (self , ready_callback = None ):
3530 print ("Start listening..." )
36- self .phrase_complete = False
37- start = time .monotonic ()
3831 self ._start_listening ()
3932 if ready_callback :
4033 ready_callback ()
34+
4135 while (
4236 self .listener_handle and not self .speech_waiting ()
43- ) or not self .phrase_complete :
44- if self .phrase_time and time .monotonic () > start + self .phrase_timeout :
45- self .last_sample = bytes ()
46- self .phrase_complete = True
47- self .phrase_time = time .monotonic () - start
37+ ):
38+ time .sleep (0.1 )
4839 self .stop_listening ()
4940
5041 def _save_audio_callback (self , _ , audio ):
5142 print ("Saving audio" )
52- data = audio .get_raw_data ()
53- self .data_queue .put (data )
54-
55- def _get_audio (self ):
56- """Concatenate and convert the queued raw data back to audio and return it"""
57- start = time .monotonic ()
58- if self .speech_waiting ():
59- self .phrase_complete = False
60- if self .phrase_time and time .monotonic () > start + self .phrase_timeout :
61- self .last_sample = bytes ()
62- self .phrase_complete = True
63- self .phrase_time = time .monotonic () - start
64-
65- # Concatenate our current audio data with the latest audio data.
66- while self .speech_waiting ():
67- data = self .data_queue .get ()
68- self .last_sample += data
69-
70- # Use AudioData to convert the raw data to wav data.
71- return sr .AudioData (
72- self .last_sample ,
73- self .microphone .SAMPLE_RATE ,
74- self .microphone .SAMPLE_WIDTH ,
75- )
76- return None
43+ self ._audio = audio
7744
7845 def _start_listening (self ):
7946 if not self .listener_handle :
@@ -93,20 +60,19 @@ def is_listening(self):
9360 return self .listener_handle is not None
9461
9562 def speech_waiting (self ):
96- return not self .data_queue . empty ()
63+ return self ._audio is not None
9764
9865 def recognize (self ):
99- audio = self ._get_audio ()
100- if audio :
66+ if self ._audio :
10167 # Transcribe the audio data to text using Whisper
10268 print ("Recognizing..." )
10369 attempts = 0
10470 while attempts < 3 :
10571 try :
10672 result = self .recognizer .recognize_whisper_api (
107- audio , api_key = self .api_key
73+ self . _audio , api_key = self .api_key
10874 )
109-
75+ self . _audio = None
11076 return result .strip ()
11177 except sr .RequestError as e :
11278 print (f"Error: { e } " )
0 commit comments