Skip to content

Commit 738b887

Browse files
authored
Merge pull request #2781 from makermelissa/update-chatgptbear
Update Bear for Bookworm
2 parents 033c530 + 116a41d commit 738b887

File tree

3 files changed

+145
-96
lines changed

3 files changed

+145
-96
lines changed

ChatGPT_Bear/assistant.py

Lines changed: 55 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,22 @@
55
import threading
66
import os
77
import sys
8-
9-
from datetime import datetime, timedelta
10-
from queue import Queue
118
import time
129
import random
10+
import configparser
1311
from tempfile import NamedTemporaryFile
1412

1513
import azure.cognitiveservices.speech as speechsdk
16-
import speech_recognition as sr
17-
import openai
14+
from openai import OpenAI
1815

1916
import board
2017
import digitalio
2118
from adafruit_motorkit import MotorKit
2219

20+
from listener import Listener
21+
22+
API_KEYS_FILE = "~/keys.txt"
23+
2324
# ChatGPT Parameters
2425
SYSTEM_ROLE = (
2526
"You are a helpful voice assistant in the form of a talking teddy bear"
@@ -34,7 +35,6 @@
3435

3536
# Speech Recognition Parameters
3637
ENERGY_THRESHOLD = 1000 # Energy level for mic to detect
37-
PHRASE_TIMEOUT = 3.0 # Space between recordings for sepating phrases
3838
RECORD_TIMEOUT = 30
3939

4040
# Motor Parameters
@@ -44,32 +44,60 @@
4444
# It pauses for BASE_MOUTH_DURATION ± SPEECH_VARIANCE
4545
MOTOR_DUTY_CYCLE = 1.0 # Lower provides less power to the motors
4646

47-
# Import keys from environment variables
48-
openai.api_key = os.environ.get("OPENAI_API_KEY")
49-
speech_key = os.environ.get("SPEECH_KEY")
50-
service_region = os.environ.get("SPEECH_REGION")
47+
# Do some checks and Import API keys from API_KEYS_FILE
48+
config = configparser.ConfigParser()
49+
50+
username = os.environ["USER"]
51+
user_homedir = os.path.expanduser(f"~{username}")
52+
API_KEYS_FILE = API_KEYS_FILE.replace("~", user_homedir)
53+
54+
def get_config_value(section, key, min_length=None):
55+
if not config.has_section(section):
56+
print("Please make sure API_KEYS_FILE points to "
57+
f"a valid file and has an [{section}] section.")
58+
sys.exit(1)
59+
if key not in config[section]:
60+
print(
61+
f"Please make sure your API keys file contains an {key} under the {section} section."
62+
)
63+
sys.exit(1)
64+
value = config[section][key]
65+
if min_length and len(value) < min_length:
66+
print(f"Please set {key} in your API keys file with a valid key.")
67+
sys.exit(1)
68+
return config[section][key]
69+
70+
print(os.path.expanduser(API_KEYS_FILE))
71+
config.read(os.path.expanduser(API_KEYS_FILE))
72+
openai = OpenAI(
73+
# This is the default and can be omitted
74+
api_key=get_config_value("openai", "OPENAI_API_KEY", 10)
75+
)
5176

52-
if openai.api_key is None or speech_key is None or service_region is None:
53-
print(
54-
"Please set the OPENAI_API_KEY, SPEECH_KEY, and SPEECH_REGION environment variables first."
55-
)
56-
sys.exit(1)
77+
speech_key = get_config_value("azure", "SPEECH_KEY", 15)
78+
service_region = get_config_value("azure", "SPEECH_REGION")
5779

5880
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
5981
speech_config.speech_synthesis_voice_name = AZURE_SPEECH_VOICE
6082

6183

6284
def sendchat(prompt):
63-
completion = openai.ChatCompletion.create(
85+
response = ""
86+
stream = openai.chat.completions.create(
6487
model=CHATGPT_MODEL,
6588
messages=[
6689
{"role": "system", "content": SYSTEM_ROLE},
6790
{"role": "user", "content": prompt},
6891
],
92+
stream=True,
6993
)
7094
# Send the heard text to ChatGPT and return the result
71-
return completion.choices[0].message.content
95+
for chunk in stream:
96+
if chunk.choices[0].delta.content is not None:
97+
response += chunk.choices[0].delta.content
7298

99+
# Send the heard text to ChatGPT and return the result
100+
return response
73101

74102
def transcribe(wav_data):
75103
# Read the transcription.
@@ -88,69 +116,6 @@ def transcribe(wav_data):
88116
return "I wasn't able to understand you. Please repeat that."
89117

90118

91-
class Listener:
92-
def __init__(self):
93-
self.listener_handle = None
94-
self.recognizer = sr.Recognizer()
95-
self.recognizer.energy_threshold = ENERGY_THRESHOLD
96-
self.recognizer.dynamic_energy_threshold = False
97-
self.recognizer.pause_threshold = 1
98-
self.last_sample = bytes()
99-
self.phrase_time = datetime.utcnow()
100-
self.phrase_timeout = PHRASE_TIMEOUT
101-
self.phrase_complete = False
102-
# Thread safe Queue for passing data from the threaded recording callback.
103-
self.data_queue = Queue()
104-
self.mic_dev_index = None
105-
106-
def listen(self):
107-
if not self.listener_handle:
108-
with sr.Microphone() as source:
109-
print(source.stream)
110-
self.recognizer.adjust_for_ambient_noise(source)
111-
audio = self.recognizer.listen(source, timeout=RECORD_TIMEOUT)
112-
data = audio.get_raw_data()
113-
self.data_queue.put(data)
114-
115-
def record_callback(self, _, audio: sr.AudioData) -> None:
116-
# Grab the raw bytes and push it into the thread safe queue.
117-
data = audio.get_raw_data()
118-
self.data_queue.put(data)
119-
120-
def speech_waiting(self):
121-
return not self.data_queue.empty()
122-
123-
def get_speech(self):
124-
if self.speech_waiting():
125-
return self.data_queue.get()
126-
return None
127-
128-
def get_audio_data(self):
129-
now = datetime.utcnow()
130-
if self.speech_waiting():
131-
self.phrase_complete = False
132-
if self.phrase_time and now - self.phrase_time > timedelta(
133-
seconds=self.phrase_timeout
134-
):
135-
self.last_sample = bytes()
136-
self.phrase_complete = True
137-
self.phrase_time = now
138-
139-
# Concatenate our current audio data with the latest audio data.
140-
while self.speech_waiting():
141-
data = self.get_speech()
142-
self.last_sample += data
143-
144-
# Use AudioData to convert the raw data to wav data.
145-
with sr.Microphone() as source:
146-
audio_data = sr.AudioData(
147-
self.last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH
148-
)
149-
return audio_data
150-
151-
return None
152-
153-
154119
class Bear:
155120
def __init__(self, azure_speech_config):
156121
kit = MotorKit(i2c=board.I2C())
@@ -234,41 +199,35 @@ def speak(self, text):
234199
if cancellation_details.reason == speechsdk.CancellationReason.Error:
235200
print("Error details: {}".format(cancellation_details.error_details))
236201

237-
238202
def main():
239-
listener = Listener()
203+
listener = Listener(openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT)
240204
bear = Bear(speech_config)
241205

242206
transcription = [""]
243207
bear.speak(
244208
"Hello there! Just give my left foot a squeeze if you would like to get my attention."
245209
)
210+
246211
while True:
247212
try:
248213
# If button is pressed, start listening
249214
if bear.foot_pressed():
250215
bear.speak("How may I help you?")
251216
listener.listen()
252217

253-
# Pull raw recorded audio from the queue.
254218
if listener.speech_waiting():
255-
audio_data = listener.get_audio_data()
256219
bear.speak("Let me think about that")
257220
bear.move_arms(hide=True)
258-
text = transcribe(audio_data.get_wav_data())
221+
text = listener.recognize()
259222

260223
if text:
261-
if listener.phrase_complete:
262-
transcription.append(text)
263-
print(f"Phrase Complete. Sent '{text}' to ChatGPT.")
264-
chat_response = sendchat(text)
265-
transcription.append(f"> {chat_response}")
266-
print("Got response from ChatGPT. Beginning speech synthesis.")
267-
bear.move_arms(hide=False)
268-
bear.speak(chat_response)
269-
else:
270-
print("Partial Phrase...")
271-
transcription[-1] = text
224+
transcription.append(text)
225+
print(f"Phrase Complete. Sent '{text}' to ChatGPT.")
226+
chat_response = sendchat(text)
227+
transcription.append(f"> {chat_response}")
228+
print("Got response from ChatGPT. Beginning speech synthesis.")
229+
bear.move_arms(hide=False)
230+
bear.speak(chat_response)
272231

273232
os.system("clear")
274233
for line in transcription:

ChatGPT_Bear/keys.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[openai]
2+
OPENAI_API_KEY = sk-...
3+
4+
[azure]
5+
SPEECH_KEY = 4f1d...02a9
6+
SPEECH_REGION = eastus

ChatGPT_Bear/listener.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# SPDX-FileCopyrightText: 2023 Melissa LeBlanc-Williams for Adafruit Industries
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import time
6+
7+
import speech_recognition as sr
8+
9+
class Listener:
10+
def __init__(
11+
self, api_key, energy_threshold=300, record_timeout=30
12+
):
13+
self.listener_handle = None
14+
self.microphone = sr.Microphone()
15+
self.recognizer = sr.Recognizer()
16+
self.recognizer.energy_threshold = energy_threshold
17+
self.recognizer.dynamic_energy_threshold = False
18+
self.recognizer.pause_threshold = 1
19+
self.phrase_time = time.monotonic()
20+
with self.microphone as source:
21+
self.recognizer.adjust_for_ambient_noise(
22+
source
23+
) # we only need to calibrate once, before we start listening
24+
self.record_timeout = record_timeout
25+
self._audio = None
26+
self.listener_handle = None
27+
self.api_key = api_key
28+
29+
def listen(self, ready_callback=None):
30+
print("Start listening...")
31+
self._start_listening()
32+
if ready_callback:
33+
ready_callback()
34+
35+
while (
36+
self.listener_handle and not self.speech_waiting()
37+
):
38+
time.sleep(0.1)
39+
self.stop_listening()
40+
41+
def _save_audio_callback(self, _, audio):
42+
print("Saving audio")
43+
self._audio = audio
44+
45+
def _start_listening(self):
46+
if not self.listener_handle:
47+
self.listener_handle = self.recognizer.listen_in_background(
48+
self.microphone,
49+
self._save_audio_callback,
50+
phrase_time_limit=self.record_timeout,
51+
)
52+
53+
def stop_listening(self, wait_for_stop=False):
54+
if self.listener_handle:
55+
self.listener_handle(wait_for_stop=wait_for_stop)
56+
self.listener_handle = None
57+
print("Stop listening...")
58+
59+
def is_listening(self):
60+
return self.listener_handle is not None
61+
62+
def speech_waiting(self):
63+
return self._audio is not None
64+
65+
def recognize(self):
66+
if self._audio:
67+
# Transcribe the audio data to text using Whisper
68+
print("Recognizing...")
69+
attempts = 0
70+
while attempts < 3:
71+
try:
72+
result = self.recognizer.recognize_whisper_api(
73+
self._audio, api_key=self.api_key
74+
)
75+
self._audio = None
76+
return result.strip()
77+
except sr.RequestError as e:
78+
print(f"Error: {e}")
79+
time.sleep(3)
80+
attempts += 1
81+
print("Retry attempt: ", attempts)
82+
print("Failed to recognize")
83+
return None
84+
return None

0 commit comments

Comments
 (0)