vannu07 · zhomander · Oct 11, 2025 · Jan 1, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/backend/command.py b/backend/command.py
@@ -1,98 +1,170 @@
-import eel
+import threading
 import pyttsx3
 import speech_recognition as sr
-
-from backend.config import (SPEECH_LANGUAGE, SPEECH_PAUSE_THRESHOLD,
-                            SPEECH_PHRASE_TIMEOUT, SPEECH_TIMEOUT, TTS_ENGINE,
-                            TTS_RATE, TTS_VOICE_ID)
-
-
-def speak(text):
-    text = str(text)
-    engine = pyttsx3.init(TTS_ENGINE)
-    voices = engine.getProperty("voices")
-    # print(voices)
-    engine.setProperty("voice", voices[TTS_VOICE_ID].id)
-    eel.DisplayMessage(text)
-    engine.say(text)
-    engine.runAndWait()
-    engine.setProperty("rate", TTS_RATE)
-    eel.receiverText(text)
+import eel
+from typing import Optional
+
+from backend.feature import openCommand, findContact, whatsApp, PlayYoutube, chatBot
+
+from backend.config import (
+    TTS_VOICE_ID, TTS_RATE, TTS_VOLUME, TTS_ENGINE,
+    SPEECH_LANGUAGE, SPEECH_TIMEOUT, SPEECH_PHRASE_TIMEOUT, SPEECH_PAUSE_THRESHOLD
+)
+
+_engine: Optional[pyttsx3.Engine] = None
+_engine_lock = threading.Lock()
+
+_recognizer: Optional[sr.Recognizer] = None
+_rec_lock = threading.Lock()
+
+_microphone: Optional[sr.Microphone] = None
+_calibrated = False
+_cached_energy_threshold: Optional[float] = None
+
+_CALL_KEYS = ("send message", "video call", "call")
+_YT_KEY = "on youtube"
+_OPEN_KEY = "open"
+
+
+def _get_engine() -> pyttsx3.Engine:
+    global _engine
+    with _engine_lock:
+        if _engine is None:
+            eng = pyttsx3.init(TTS_ENGINE)
+            voices = eng.getProperty("voices")
+            if 0 <= TTS_VOICE_ID < len(voices):
+                eng.setProperty("voice", voices[TTS_VOICE_ID].id)
+            eng.setProperty("rate", TTS_RATE)
+            eng.setProperty("volume", TTS_VOLUME)
+            _engine = eng
+        return _engine
+
+
+
+def _get_recognizer() -> sr.Recognizer:
+    global _recognizer
+    with _rec_lock:
+        if _recognizer is None:
+            r = sr.Recognizer()
+            r.pause_threshold = SPEECH_PAUSE_THRESHOLD
+            r.dynamic_energy_threshold = False
+            _recognizer = r
+        return _recognizer
-        return _recognizer
+        return _recognizer
-        return _recognizer
+        return _recognizer
+def _get_microphone() -> sr.Microphone:
+    global _microphone
+    if _microphone is None:
+        _microphone = sr.Microphone()  
-        _microphone = sr.Microphone()  
+        _microphone = sr.Microphone()
-        _microphone = sr.Microphone()  
+        _microphone = sr.Microphone()
+    return _microphone
+
+
+def _ensure_calibrated(r: sr.Recognizer, source: sr.AudioSource) -> None:
+    """Calibrate ambient noise once and cache the energy threshold."""
+    global _calibrated, _cached_energy_threshold
+    if not _calibrated:
+        r.adjust_for_ambient_noise(source, duration=0.4)
+        _cached_energy_threshold = r.energy_threshold
+        _calibrated = True
+    elif _cached_energy_threshold is not None:
+        r.energy_threshold = _cached_energy_threshold
+
+
+def speak(text) -> None:
+    """TTS + Eel UI; reuses engine; thread-safe."""
+    s = str(text)
+    eel.DisplayMessage(s)
+    eel.receiverText(s)
+    try:
+        eng = _get_engine()
+        with _engine_lock:
+            eng.say(s)
+            eng.runAndWait()
-        with _engine_lock:
-            eng.say(s)
-            eng.runAndWait()
+        eng.say(s)
+        eng.runAndWait()
-        with _engine_lock:
-            eng.say(s)
-            eng.runAndWait()
+        eng.say(s)
+        eng.runAndWait()
+    except Exception as e:
+        print(f"TTS error: {e}")
-    """TTS + Eel UI; reuses engine; thread-safe."""
-    s = str(text)
-    eel.DisplayMessage(s)
-    eel.receiverText(s)
-    try:
-        eng = _get_engine()
-        with _engine_lock:
-            eng.say(s)
-            eng.runAndWait()
-    except Exception as e:
-        print(f"TTS error: {e}")
+    """TTS + Eel UI; use a fresh TTS engine per call to avoid cross-thread state issues."""
+    s = str(text)
+    eel.DisplayMessage(s)
+    eel.receiverText(s)
+    eng: Optional[pyttsx3.Engine] = None
+    try:
+        eng = pyttsx3.init(TTS_ENGINE)
+        voices = eng.getProperty("voices")
+        if 0 <= TTS_VOICE_ID < len(voices):
+            eng.setProperty("voice", voices[TTS_VOICE_ID].id)
+        eng.setProperty("rate", TTS_RATE)
+        eng.setProperty("volume", TTS_VOLUME)
+        eng.say(s)
+        eng.runAndWait()
+    except Exception as e:
+        print(f"TTS error: {e}")
+    finally:
+        if eng is not None:
+            try:
+                eng.stop()
+            except Exception:
+                pass
-    """TTS + Eel UI; reuses engine; thread-safe."""
-    s = str(text)
-    eel.DisplayMessage(s)
-    eel.receiverText(s)
-    try:
-        eng = _get_engine()
-        with _engine_lock:
-            eng.say(s)
-            eng.runAndWait()
-    except Exception as e:
-        print(f"TTS error: {e}")
+    """TTS + Eel UI; use a fresh TTS engine per call to avoid cross-thread state issues."""
+    s = str(text)
+    eel.DisplayMessage(s)
+    eel.receiverText(s)
+    eng: Optional[pyttsx3.Engine] = None
+    try:
+        eng = pyttsx3.init(TTS_ENGINE)
+        voices = eng.getProperty("voices")
+        if 0 <= TTS_VOICE_ID < len(voices):
+            eng.setProperty("voice", voices[TTS_VOICE_ID].id)
+        eng.setProperty("rate", TTS_RATE)
+        eng.setProperty("volume", TTS_VOLUME)
+        eng.say(s)
+        eng.runAndWait()
+    except Exception as e:
+        print(f"TTS error: {e}")
+    finally:
+        if eng is not None:
+            try:
+                eng.stop()
+            except Exception:
+                pass
 
 
-# Expose the Python function to JavaScript
+def takecommand() -> Optional[str]:
+    """Capture voice -> lowercase string, or None on failure. Reuses mic/recognizer."""
+    r = _get_recognizer()
+    mic = _get_microphone()
 
+    eel.DisplayMessage("I'm listening...")
+    print("I'm listening...")
 
-def takecommand():
-    r = sr.Recognizer()
-    with sr.Microphone() as source:
-        print("I'm listening...")
-        eel.DisplayMessage("I'm listening...")
-        r.pause_threshold = SPEECH_PAUSE_THRESHOLD
-        r.adjust_for_ambient_noise(source)
-        audio = r.listen(source, SPEECH_TIMEOUT, SPEECH_PHRASE_TIMEOUT)
+    try:
+        with mic as source:
+            _ensure_calibrated(r, source)
+            audio = r.listen(
+                source,
+                timeout=SPEECH_TIMEOUT,
+                phrase_time_limit=SPEECH_PHRASE_TIMEOUT
+            )
+    except Exception as e:
+        print(f"Listen error: {e}")
+        return None
 
     try:
-        print("Recognizing...")
         eel.DisplayMessage("Recognizing...")
+        print("Recognizing...")
         query = r.recognize_google(audio, language=SPEECH_LANGUAGE)
-        print(f"User said: {query}\n")
         eel.DisplayMessage(query)
-
+        print(f"User said: {query}\n")
         speak(query)
+        normalized = query.strip().lower()
+        return normalized or None
-        return normalized or None
+        return normalized
-        return normalized or None
+        return normalized
     except Exception as e:
-        print(f"Error: {str(e)}\n")
+        print(f"Recognition error: {e}")
         return None
 
-    return query.lower()
 
+def _handle_comm(query: str) -> None:
+    """Route the normalized query to the correct feature handler."""
+    if _OPEN_KEY in query:
+        openCommand(query)
+        return
+
+    if any(k in query for k in _CALL_KEYS):
+        Phone, name = findContact(query)
+        if Phone != 0:
+            if "send message" in query:
+                speak("What message to send?")
+                msg = takecommand()
+                if msg:
+                    whatsApp(Phone, msg, "message", name)
+                else:
+                    speak("I didn't catch the message.")
-                speak("What message to send?")
-                msg = takecommand()
-                if msg:
-                    whatsApp(Phone, msg, "message", name)
-                else:
-                    speak("I didn't catch the message.")
+                max_retries = 3
+                for attempt in range(max_retries):
+                    speak("What message to send?")
+                    msg = takecommand()
+                    if msg:
+                        whatsApp(Phone, msg, "message", name)
+                        break
+                    else:
+                        speak("I didn't catch the message.")
+                        if attempt < max_retries - 1:
+                            speak("Please say the message again.")
+                else:
+                    speak("I couldn't get the message. Let's try again later.")
-                    speak("I didn't catch the message.")
+                    speak("I didn't catch the message.")
+                    # Maintain original behavior: still pass the (possibly None) msg to whatsApp
+                    whatsApp(Phone, msg, "message", name)
-                speak("What message to send?")
-                msg = takecommand()
-                if msg:
-                    whatsApp(Phone, msg, "message", name)
-                else:
-                    speak("I didn't catch the message.")
+                max_retries = 3
+                for attempt in range(max_retries):
+                    speak("What message to send?")
+                    msg = takecommand()
+                    if msg:
+                        whatsApp(Phone, msg, "message", name)
+                        break
+                    else:
+                        speak("I didn't catch the message.")
+                        if attempt < max_retries - 1:
+                            speak("Please say the message again.")
+                else:
+                    speak("I couldn't get the message. Let's try again later.")
-                    speak("I didn't catch the message.")
+                    speak("I didn't catch the message.")
+                    # Maintain original behavior: still pass the (possibly None) msg to whatsApp
+                    whatsApp(Phone, msg, "message", name)
+            elif "call" in query and "video call" not in query:
+                whatsApp(Phone, query, "call", name)
+            else:
+                whatsApp(Phone, query, "video call", name)
-        Phone, name = findContact(query)
-        if Phone != 0:
-            if "send message" in query:
-                speak("What message to send?")
-                msg = takecommand()
-                if msg:
-                    whatsApp(Phone, msg, "message", name)
-                else:
-                    speak("I didn't catch the message.")
-            elif "call" in query and "video call" not in query:
-                whatsApp(Phone, query, "call", name)
-            else:
-                whatsApp(Phone, query, "video call", name)
+        phone, name = findContact(query)
+        if phone != 0:
+            if "send message" in query:
+                speak("What message to send?")
+                msg = takecommand()
+                if msg:
+                    whatsApp(phone, msg, "message", name)
+                else:
+                    speak("I didn't catch the message.")
+            elif "call" in query and "video call" not in query:
+                whatsApp(phone, query, "call", name)
+            else:
+                whatsApp(phone, query, "video call", name)
-        Phone, name = findContact(query)
-        if Phone != 0:
-            if "send message" in query:
-                speak("What message to send?")
-                msg = takecommand()
-                if msg:
-                    whatsApp(Phone, msg, "message", name)
-                else:
-                    speak("I didn't catch the message.")
-            elif "call" in query and "video call" not in query:
-                whatsApp(Phone, query, "call", name)
-            else:
-                whatsApp(Phone, query, "video call", name)
+        phone, name = findContact(query)
+        if phone != 0:
+            if "send message" in query:
+                speak("What message to send?")
+                msg = takecommand()
+                if msg:
+                    whatsApp(phone, msg, "message", name)
+                else:
+                    speak("I didn't catch the message.")
+            elif "call" in query and "video call" not in query:
+                whatsApp(phone, query, "call", name)
+            else:
+                whatsApp(phone, query, "video call", name)
+        return
+
+    if _YT_KEY in query:
+        PlayYoutube(query)
+        return
+    chatBot(query)
 
 @eel.expose
-def takeAllCommands(message=None):
+def takeAllCommands(message: Optional[str] = None) -> None:
+    """Entry point for both text and voice; maintains original behavior."""
     if message is None:
-        query = takecommand()  # If no message is passed, listen for voice input
+        query = takecommand()
         if not query:
-            return  # Exit if no query is received
-        print(query)
+            speak("No command was given.")
+            return
         eel.senderText(query)
     else:
-        query = message  # If there's a message, use it
-        print(f"Message received: {query}")
-        eel.senderText(query)
+        q = str(message).strip()
+        print(f"Message received: {q}")
+        eel.senderText(q)
-        eel.senderText(q)
+        eel.senderText(q)
+        speak(q)
-        eel.senderText(q)
+        eel.senderText(q)
+        speak(q)
+        query = q.lower()
 
     try:
         if query:
-            if "open" in query:
-                from backend.feature import openCommand
-
-                openCommand(query)
-            elif "send message" in query or "call" in query or "video call" in query:
-                from backend.feature import findContact, whatsApp
-
-                flag = ""
-                Phone, name = findContact(query)
-                if Phone != 0:
-                    if "send message" in query:
-                        flag = "message"
-                        speak("What message to send?")
-                        query = takecommand()  # Ask for the message text
-                    elif "call" in query:
-                        flag = "call"
-                    else:
-                        flag = "video call"
-                    whatsApp(Phone, query, flag, name)
-            elif "on youtube" in query:
-                from backend.feature import PlayYoutube
-
-                PlayYoutube(query)
-            else:
-                from backend.feature import chatBot
-
-                chatBot(query)
+            _handle_comm(query)
         else:
             speak("No command was given.")
     except Exception as e:
         print(f"An error occurred: {e}")
         speak("Sorry, something went wrong.")
-
-    eel.ShowHood()
+    finally:
+        eel.ShowHood()