Update Magic AI Storybook for Bookworm and new OpenAI API

makermelissa · makermelissa · commit c354179e0c7e · 2024-03-29T11:19:20.000-07:00
diff --git a/Magic_AI_Storybook/listener.py b/Magic_AI_Storybook/listener.py
@@ -2,78 +2,45 @@
 #
 # SPDX-License-Identifier: MIT
 
-from queue import Queue
 import time
 
 import speech_recognition as sr
 
-
 class Listener:
     def __init__(
-        self, api_key, energy_threshold=300, phrase_timeout=3.0, record_timeout=30
+        self, api_key, energy_threshold=300, record_timeout=30
     ):
         self.listener_handle = None
         self.microphone = sr.Microphone()
         self.recognizer = sr.Recognizer()
         self.recognizer.energy_threshold = energy_threshold
         self.recognizer.dynamic_energy_threshold = False
         self.recognizer.pause_threshold = 1
-        self.last_sample = bytes()
         self.phrase_time = time.monotonic()
-        self.phrase_timeout = phrase_timeout
         with self.microphone as source:
             self.recognizer.adjust_for_ambient_noise(
                 source
             )  # we only need to calibrate once, before we start listening
         self.record_timeout = record_timeout
-        self.phrase_complete = False
-        self.data_queue = Queue()
+        self._audio = None
         self.listener_handle = None
         self.api_key = api_key
 
     def listen(self, ready_callback=None):
         print("Start listening...")
-        self.phrase_complete = False
-        start = time.monotonic()
         self._start_listening()
         if ready_callback:
             ready_callback()
+
         while (
             self.listener_handle and not self.speech_waiting()
-        ) or not self.phrase_complete:
-            if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
-                self.last_sample = bytes()
-                self.phrase_complete = True
-            self.phrase_time = time.monotonic() - start
+        ):
+            time.sleep(0.1)
         self.stop_listening()
 
     def _save_audio_callback(self, _, audio):
         print("Saving audio")
-        data = audio.get_raw_data()
-        self.data_queue.put(data)
-
-    def _get_audio(self):
-        """Concatenate and convert the queued raw data back to audio and return it"""
-        start = time.monotonic()
-        if self.speech_waiting():
-            self.phrase_complete = False
-            if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
-                self.last_sample = bytes()
-                self.phrase_complete = True
-            self.phrase_time = time.monotonic() - start
-
-            # Concatenate our current audio data with the latest audio data.
-            while self.speech_waiting():
-                data = self.data_queue.get()
-                self.last_sample += data
-
-            # Use AudioData to convert the raw data to wav data.
-            return sr.AudioData(
-                self.last_sample,
-                self.microphone.SAMPLE_RATE,
-                self.microphone.SAMPLE_WIDTH,
-            )
-        return None
+        self._audio = audio
 
     def _start_listening(self):
         if not self.listener_handle:
@@ -93,20 +60,19 @@ def is_listening(self):
         return self.listener_handle is not None
 
     def speech_waiting(self):
-        return not self.data_queue.empty()
+        return self._audio is not None
 
     def recognize(self):
-        audio = self._get_audio()
-        if audio:
+        if self._audio:
             # Transcribe the audio data to text using Whisper
             print("Recognizing...")
             attempts = 0
             while attempts < 3:
                 try:
                     result = self.recognizer.recognize_whisper_api(
-                        audio, api_key=self.api_key
+                        self._audio, api_key=self.api_key
                     )
-
+                    self._audio = None
                     return result.strip()
                 except sr.RequestError as e:
                     print(f"Error: {e}")
diff --git a/Magic_AI_Storybook/make_shortcut.py b/Magic_AI_Storybook/make_shortcut.py
@@ -28,6 +28,7 @@ def main():
     APP_PATH = "~/Magic_AI_Storybook/story.py"
     APP_ICON = "~/Magic_AI_Storybook/images/magic_book_icon.png"
     FILENAME = "storybook.desktop"
+    ENV_PATH = "~/story"
     AUTO_START = True
 
     if os.geteuid() == 0:
@@ -41,12 +42,16 @@ def main():
 
     APP_PATH = APP_PATH.replace("~", user_homedir)
     APP_ICON = APP_ICON.replace("~", user_homedir)
+    PYTHON_PATH = "python"
+    if ENV_PATH is not None:
+        ENV_PATH = ENV_PATH.replace("~", user_homedir)
+        PYTHON_PATH = ENV_PATH + "/bin/" + PYTHON_PATH
 
     shortcut_template = f"""[Desktop Entry]
 Comment=Run {APP_TITLE}
 Terminal={"true" if RUN_IN_TERMINAL else "false"}
 Name={APP_TITLE}
-Exec=sudo python {APP_PATH}
+Exec=sudo -E env PATH=$PATH {PYTHON_PATH} {APP_PATH}
 Type=Application
 Icon={APP_ICON}
 """
diff --git a/Magic_AI_Storybook/story.py b/Magic_AI_Storybook/story.py
@@ -16,7 +16,7 @@
 import board
 import digitalio
 import neopixel
-import openai
+from openai import OpenAI
 import pygame
 from rpi_backlight import Backlight
 from adafruit_led_animation.animation.pulse import Pulse
@@ -87,12 +87,11 @@
 
 # ChatGPT Parameters
 SYSTEM_ROLE = "You are a master AI Storyteller that can tell a story of any length."
-CHATGPT_MODEL = "gpt-3.5-turbo"
+CHATGPT_MODEL = "gpt-3.5-turbo"  # You can also use "gpt-4", which is slower, but more accurate
 WHISPER_MODEL = "whisper-1"
 
 # Speech Recognition Parameters
 ENERGY_THRESHOLD = 300  # Energy level for mic to detect
-PHRASE_TIMEOUT = 1.0  # Space between recordings for separating phrases
 RECORD_TIMEOUT = 30  # Maximum time in seconds to wait for speech
 
 # Do some checks and Import API keys from API_KEYS_FILE
@@ -118,7 +117,10 @@
 if len(config["openai"]["OPENAI_API_KEY"]) < 10:
     print("Please set OPENAI_API_KEY in your API keys file with a valid key.")
     sys.exit(1)
-openai.api_key = config["openai"]["OPENAI_API_KEY"]
+openai = OpenAI(
+    # This is the default and can be omitted
+    api_key=config["openai"]["OPENAI_API_KEY"],
+)
 
 # Check that the prompt file exists and load it
 if not os.path.isfile(PROMPT_FILE):
@@ -250,7 +252,7 @@ def start(self):
 
         # Initialize the Listener
         self.listener = Listener(
-            openai.api_key, ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT
+            openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT
         )
 
         # Preload remaining images
@@ -728,8 +730,9 @@ def _make_story_prompt(self, request):
     def _sendchat(self, prompt):
         response = ""
         print("Sending to chatGPT")
+        print("Prompt: ", prompt)
         # Package up the text to send to ChatGPT
-        completion = openai.ChatCompletion.create(
+        stream = openai.chat.completions.create(
             model=CHATGPT_MODEL,
             messages=[
                 {"role": "system", "content": SYSTEM_ROLE},
@@ -738,9 +741,9 @@ def _sendchat(self, prompt):
             stream=True,
         )
 
-        for chunk in completion:
-            if "delta" in chunk.choices[0] and "content" in chunk.choices[0]["delta"]:
-                response += chunk.choices[0]["delta"]["content"]
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                response += chunk.choices[0].delta.content
             if self._sleep_request:
                 return None