55import threading
66import os
77import sys
8-
9- from datetime import datetime , timedelta
10- from queue import Queue
118import time
129import random
10+ import configparser
1311from tempfile import NamedTemporaryFile
1412
1513import azure .cognitiveservices .speech as speechsdk
16- import speech_recognition as sr
17- import openai
14+ from openai import OpenAI
1815
1916import board
2017import digitalio
2118from adafruit_motorkit import MotorKit
2219
20+ from listener import Listener
21+
22+ API_KEYS_FILE = "~/keys.txt"
23+
2324# ChatGPT Parameters
2425SYSTEM_ROLE = (
2526 "You are a helpful voice assistant in the form of a talking teddy bear"
3435
3536# Speech Recognition Parameters
3637ENERGY_THRESHOLD = 1000 # Energy level for mic to detect
37- PHRASE_TIMEOUT = 3.0 # Space between recordings for sepating phrases
3838RECORD_TIMEOUT = 30
3939
4040# Motor Parameters
4444 # It pauses for BASE_MOUTH_DURATION ± SPEECH_VARIANCE
4545MOTOR_DUTY_CYCLE = 1.0 # Lower provides less power to the motors
4646
47- # Import keys from environment variables
48- openai .api_key = os .environ .get ("OPENAI_API_KEY" )
49- speech_key = os .environ .get ("SPEECH_KEY" )
50- service_region = os .environ .get ("SPEECH_REGION" )
47+ # Do some checks and Import API keys from API_KEYS_FILE
48+ config = configparser .ConfigParser ()
5149
52- if openai .api_key is None or speech_key is None or service_region is None :
53- print (
54- "Please set the OPENAI_API_KEY, SPEECH_KEY, and SPEECH_REGION environment variables first."
55- )
56- sys .exit (1 )
50+ username = os .environ ["USER" ]
51+ user_homedir = os .path .expanduser (f"~{ username } " )
52+ API_KEYS_FILE = API_KEYS_FILE .replace ("~" , user_homedir )
53+
54+ def get_config_value (section , key , min_length = None ):
55+ if not config .has_section (section ):
56+ print (f"Please make sure API_KEYS_FILE points to a valid file and has an [{ section } ] section." )
57+ sys .exit (1 )
58+ if key not in config [section ]:
59+ print (
60+ f"Please make sure your API keys file contains an { key } under the { section } section."
61+ )
62+ sys .exit (1 )
63+ value = config [section ][key ]
64+ if min_length and len (value ) < min_length :
65+ print (f"Please set { key } in your API keys file with a valid key." )
66+ sys .exit (1 )
67+ return config [section ][key ]
68+
69+ print (os .path .expanduser (API_KEYS_FILE ))
70+ config .read (os .path .expanduser (API_KEYS_FILE ))
71+ openai = OpenAI (
72+ # This is the default and can be omitted
73+ api_key = get_config_value ("openai" , "OPENAI_API_KEY" , 10 )
74+ )
75+
76+ speech_key = get_config_value ("azure" , "SPEECH_KEY" , 15 )
77+ service_region = get_config_value ("azure" , "SPEECH_REGION" )
5778
5879speech_config = speechsdk .SpeechConfig (subscription = speech_key , region = service_region )
5980speech_config .speech_synthesis_voice_name = AZURE_SPEECH_VOICE
6081
6182
6283def sendchat (prompt ):
63- completion = openai .ChatCompletion .create (
84+ response = ""
85+ stream = openai .chat .completions .create (
6486 model = CHATGPT_MODEL ,
6587 messages = [
6688 {"role" : "system" , "content" : SYSTEM_ROLE },
6789 {"role" : "user" , "content" : prompt },
6890 ],
91+ stream = True ,
6992 )
7093 # Send the heard text to ChatGPT and return the result
71- return completion .choices [0 ].message .content
94+ for chunk in stream :
95+ if chunk .choices [0 ].delta .content is not None :
96+ response += chunk .choices [0 ].delta .content
7297
98+ # Send the heard text to ChatGPT and return the result
99+ return response
73100
74101def transcribe (wav_data ):
75102 # Read the transcription.
@@ -88,69 +115,6 @@ def transcribe(wav_data):
88115 return "I wasn't able to understand you. Please repeat that."
89116
90117
91- class Listener :
92- def __init__ (self ):
93- self .listener_handle = None
94- self .recognizer = sr .Recognizer ()
95- self .recognizer .energy_threshold = ENERGY_THRESHOLD
96- self .recognizer .dynamic_energy_threshold = False
97- self .recognizer .pause_threshold = 1
98- self .last_sample = bytes ()
99- self .phrase_time = datetime .utcnow ()
100- self .phrase_timeout = PHRASE_TIMEOUT
101- self .phrase_complete = False
102- # Thread safe Queue for passing data from the threaded recording callback.
103- self .data_queue = Queue ()
104- self .mic_dev_index = None
105-
106- def listen (self ):
107- if not self .listener_handle :
108- with sr .Microphone () as source :
109- print (source .stream )
110- self .recognizer .adjust_for_ambient_noise (source )
111- audio = self .recognizer .listen (source , timeout = RECORD_TIMEOUT )
112- data = audio .get_raw_data ()
113- self .data_queue .put (data )
114-
115- def record_callback (self , _ , audio : sr .AudioData ) -> None :
116- # Grab the raw bytes and push it into the thread safe queue.
117- data = audio .get_raw_data ()
118- self .data_queue .put (data )
119-
120- def speech_waiting (self ):
121- return not self .data_queue .empty ()
122-
123- def get_speech (self ):
124- if self .speech_waiting ():
125- return self .data_queue .get ()
126- return None
127-
128- def get_audio_data (self ):
129- now = datetime .utcnow ()
130- if self .speech_waiting ():
131- self .phrase_complete = False
132- if self .phrase_time and now - self .phrase_time > timedelta (
133- seconds = self .phrase_timeout
134- ):
135- self .last_sample = bytes ()
136- self .phrase_complete = True
137- self .phrase_time = now
138-
139- # Concatenate our current audio data with the latest audio data.
140- while self .speech_waiting ():
141- data = self .get_speech ()
142- self .last_sample += data
143-
144- # Use AudioData to convert the raw data to wav data.
145- with sr .Microphone () as source :
146- audio_data = sr .AudioData (
147- self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
148- )
149- return audio_data
150-
151- return None
152-
153-
154118class Bear :
155119 def __init__ (self , azure_speech_config ):
156120 kit = MotorKit (i2c = board .I2C ())
@@ -234,41 +198,35 @@ def speak(self, text):
234198 if cancellation_details .reason == speechsdk .CancellationReason .Error :
235199 print ("Error details: {}" .format (cancellation_details .error_details ))
236200
237-
238201def main ():
239- listener = Listener ()
202+ listener = Listener (openai . api_key , ENERGY_THRESHOLD , RECORD_TIMEOUT )
240203 bear = Bear (speech_config )
241204
242205 transcription = ["" ]
243206 bear .speak (
244207 "Hello there! Just give my left foot a squeeze if you would like to get my attention."
245208 )
209+
246210 while True :
247211 try :
248212 # If button is pressed, start listening
249213 if bear .foot_pressed ():
250214 bear .speak ("How may I help you?" )
251215 listener .listen ()
252216
253- # Pull raw recorded audio from the queue.
254217 if listener .speech_waiting ():
255- audio_data = listener .get_audio_data ()
256218 bear .speak ("Let me think about that" )
257219 bear .move_arms (hide = True )
258- text = transcribe ( audio_data . get_wav_data () )
220+ text = listener . recognize ( )
259221
260222 if text :
261- if listener .phrase_complete :
262- transcription .append (text )
263- print (f"Phrase Complete. Sent '{ text } ' to ChatGPT." )
264- chat_response = sendchat (text )
265- transcription .append (f"> { chat_response } " )
266- print ("Got response from ChatGPT. Beginning speech synthesis." )
267- bear .move_arms (hide = False )
268- bear .speak (chat_response )
269- else :
270- print ("Partial Phrase..." )
271- transcription [- 1 ] = text
223+ transcription .append (text )
224+ print (f"Phrase Complete. Sent '{ text } ' to ChatGPT." )
225+ chat_response = sendchat (text )
226+ transcription .append (f"> { chat_response } " )
227+ print ("Got response from ChatGPT. Beginning speech synthesis." )
228+ bear .move_arms (hide = False )
229+ bear .speak (chat_response )
272230
273231 os .system ("clear" )
274232 for line in transcription :
0 commit comments