BESSER-PEARL
diff --git a/‎besser/agent/core/processors/audio_language_detection_processor.py‎
Lines changed: 94 additions & 0 deletions b/‎besser/agent/core/processors/audio_language_detection_processor.py‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎besser/agent/core/processors/user_adaptation_processor.py‎
Lines changed: 1 addition & 1 deletion b/‎besser/agent/core/processors/user_adaptation_processor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎besser/agent/core/session.py‎
Lines changed: 5 additions & 2 deletions b/‎besser/agent/core/session.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎besser/agent/db/monitoring_db.py‎
Lines changed: 4 additions & 2 deletions b/‎besser/agent/db/monitoring_db.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎besser/agent/exceptions/logger.py‎
Lines changed: 2 additions & 2 deletions b/‎besser/agent/exceptions/logger.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎besser/agent/nlp/__init__.py‎
Lines changed: 1 addition & 24 deletions b/‎besser/agent/nlp/__init__.py‎
Lines changed: 1 addition & 24 deletions
diff --git a/‎besser/agent/nlp/llm/llm.py‎
Lines changed: 3 additions & 2 deletions b/‎besser/agent/nlp/llm/llm.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎besser/agent/nlp/llm/llm_huggingface.py‎
Lines changed: 1 addition & 1 deletion b/‎besser/agent/nlp/llm/llm_huggingface.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎besser/agent/nlp/llm/llm_huggingface_api.py‎
Lines changed: 1 addition & 1 deletion b/‎besser/agent/nlp/llm/llm_huggingface_api.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎besser/agent/nlp/llm/llm_openai_api.py‎
Lines changed: 2 additions & 2 deletions b/‎besser/agent/nlp/llm/llm_openai_api.py‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,94 @@
+from typing import TYPE_CHECKING
+
+import io
+import soundfile as sf
+import numpy as np
+
+
+from besser.agent.core.processors.processor import Processor
+from besser.agent.core.session import Session
+from besser.agent.nlp.nlp_engine import NLPEngine
+from besser.agent.nlp.llm.llm import LLM
+
+from besser.agent.exceptions.logger import logger
+
+from besser.agent.nlp.speech2text.speech2text import Speech2Text
+
+
+if TYPE_CHECKING:
+    from besser.agent.core.agent import Agent
+
+
+class AudioLanguageDetectionProcessor(Processor):
+    """The AudioLanguageDetectionProcessor detects the spoken language in a given audio message.
+
+    This processor uses a speech-to-text model to transcribe audio and then leverages an LLM to predict the language.
+    Ideally, you use a model that is trained for language detection, such as OpenAI's GPT-4o-mini
+    or anything that works well on a plethora of languages.
+
+    Args:
+        agent (Agent): The agent the processor belongs to.
+        transcription_model (Speech2Text): The speech-to-text model to use for transcription.
+        llm_name (str): The name of the LLM to use for language detection.
+
+    Attributes:
+        agent (Agent): The agent the processor belongs to.
+        _transcription_model_name (str): The speech-to-text model to use for transcription.
+        _llm_name (str): The name of the LLM used for language detection.
+        _nlp_engine (NLPEngine): The NLP Engine the Agent uses.
+    """
+
+    def __init__(self, agent: "Agent", transcription_model: Speech2Text, llm_name: str):
+        super().__init__(agent=agent, user_messages=True, agent_messages=False)
+        self._llm_name: str = llm_name
+        self._transcription_model: Speech2Text = transcription_model
+        self._nlp_engine: NLPEngine = agent.nlp_engine
+
+    def process(self, session: Session, message: bytes) -> bytes:
+        """Method to process a message and predict the message's language.
+
+        The detected language will be stored as a session parameter. The key is "user_language".
+
+        Args:
+            session (Session): the current session
+            message (str): the message to be processed
+
+        Returns:
+            str: the original message
+        """
+        # transcribe audio bytes
+        llm: LLM = self._nlp_engine._llms.get(self._llm_name)
+
+        try:
+            raw_audio = io.BytesIO(message)
+
+            # Convert raw bytes to NumPy array
+            audio_data = np.frombuffer(raw_audio.read(), dtype=np.int16)
+
+            # Save as WAV in memory
+            wav_file = io.BytesIO()
+            sf.write(
+                wav_file, audio_data, samplerate=44100, format="WAV", subtype="PCM_16"
+            )
+            wav_file.name = "audio.wav"
+            wav_file.seek(0)
+
+            # Use the transcription model to transcribe the audio
+            transcription = self._transcription_model.speech2text(wav_file.getvalue())
+
+            prompt = (
+                f"Identify the language based on the following message: {transcription}. "
+                f"Only return the ISO 639-1 standard language code of the "
+                f"language you recognized."
+            )
+            # Use the LLM to detect the language based on the transcription
+            # this might not work with all LLMs
+            detected_lang = llm.predict(prompt, session=session)
+            
+            logger.info(f"Detected language (ISO 639-1): {detected_lang}")
+            
+            session.set("user_language", detected_lang)
+        except Exception as e:
+            logger.error(f"Error during language detection: {e}")
+
+        return message
@@ -56,7 +56,7 @@ def process(self, session: 'Session', message: str) -> str:
                 You are free to adapt the messages in any way you like.\
                 The user should relate more. This is the user's profile\n \
                 {str(self._user_model[session.id])}"
-        prompt = f"You need to adapt this message: {message}\n Only respond with the adapated message!"
+        prompt = f"You need to adapt this message: {message}\n Only respond with the adapted message!"
         llm_response: str = llm.predict(prompt, session=session, system_message=user_context)
         return llm_response
 
 
@@ -179,16 +179,19 @@ def set(self, key: str, value: Any) -> None:
         """
         self._dictionary[key] = value
 
-    def get(self, key: str) -> Any:
+    def get(self, key: str, default: Any = None) -> Any:
         """Get an entry of the session private data storage.
 
         Args:
             key (str): the entry key
+            default (Any): The default value to be returned, if the dictionary does not contain the key
 
         Returns:
-            Any: the entry value, or None if the key does not exist
+            Any: the entry value, default or None if the key does not exist
         """
         if key not in self._dictionary:
+            if default:
+                return default
             return None
         return self._dictionary[key]
 
 
@@ -1,9 +1,11 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Any
 
+import json
 import pandas as pd
 from sqlalchemy import Connection, create_engine, Column, String, Integer, UniqueConstraint, ForeignKey, DateTime, \
     Float, MetaData, insert, Table, select, Executable, CursorResult, desc, Boolean
+from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.orm import declarative_base
 
 from besser.agent.core.message import Message
@@ -127,7 +129,7 @@ class TableChat(Base):
             id = Column(Integer, primary_key=True, autoincrement=True)
             session_id = Column(Integer, ForeignKey(f'{TABLE_SESSION}.id'), nullable=False)
             type = Column(String, nullable=False)
-            content = Column(String, nullable=False)
+            content = Column(JSONB, nullable=False)  # JSONB allows to handle the dictionary (TTS messages)
             is_user = Column(Boolean, nullable=False)
             timestamp = Column(DateTime, nullable=False)
 
@@ -243,7 +245,7 @@ def insert_chat(self, session: Session, message: Message) -> None:
         stmt = insert(table).values(
             session_id=int(session_entry['id'][0]),
             type=message.type.value,
-            content=message.content,
+            content=str(message.content),
             is_user=message.is_user,
             timestamp=message.timestamp,
         )
 
@@ -6,14 +6,14 @@
 
 # Create handlers
 console_handler = logging.StreamHandler()
-file_handler = logging.FileHandler("application.log")
+file_handler = logging.FileHandler("application.log", encoding='utf-8')
 
 # Set logging levels for handlers
 console_handler.setLevel(logging.INFO)
 file_handler.setLevel(logging.DEBUG)
 
 # Create formatters
-formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 console_handler.setFormatter(formatter)
 file_handler.setFormatter(formatter)
 
 
@@ -6,7 +6,7 @@
 
 NLP_LANGUAGE = Property(SECTION_NLP, 'nlp.language', str, 'en')
 """
-The agenr language. This is the expected language the users will talk to the agent. Using another language may 
+The agent language. This is the expected language the users will talk to the agent. Using another language may 
 affect the quality of some NLP processes.
 
 The list of available languages can be found at `snowballstemmer <https://pypi.org/project/snowballstemmer/>`_.
@@ -79,29 +79,6 @@
 default value: ``0.4``
 """
 
-NLP_STT_HF_MODEL = Property(SECTION_NLP, 'nlp.speech2text.hf.model', str, None)
-"""
-The name of the Hugging Face model for the HFSpeech2Text agent component. If none is provided, the component will not be 
-activated.
-
-name: ``nlp.speech2text.hf.model``
-
-type: ``str``
-
-default value: ``None``
-"""
-
-NLP_STT_SR_ENGINE = Property(SECTION_NLP, 'nlp.speech2text.sr.engine', str, None)
-"""
-The name of the transcription engine for the Speech Recognition agent component. If none is provided, the component will
-not be activated.
-
-name: ``nlp.speech2text.sr.engine``
-
-type: ``str``
-
-default value: ``None``
-"""
 
 OPENAI_API_KEY = Property(SECTION_NLP, 'nlp.openai.api_key', str, None)
 """
 
@@ -5,6 +5,7 @@
 from besser.agent.nlp.intent_classifier.intent_classifier_prediction import IntentClassifierPrediction
 
 if TYPE_CHECKING:
+    from besser.agent.core.agent import Agent
     from besser.agent.core.session import Session
     from besser.agent.nlp.intent_classifier.llm_intent_classifier import LLMIntentClassifier
     from besser.agent.nlp.nlp_engine import NLPEngine
@@ -34,8 +35,8 @@ class LLM(ABC):
             user specific context to be provided to the LLM for each request
     """
 
-    def __init__(self, nlp_engine: 'NLPEngine', name: str, parameters: dict, global_context: str = None):
-        self._nlp_engine: 'NLPEngine' = nlp_engine
+    def __init__(self, agent: 'Agent', name: str, parameters: dict, global_context: str = None):
+        self._nlp_engine: 'NLPEngine' = agent.nlp_engine
         self.name: str = name
         self.parameters: dict = parameters
         self._nlp_engine._llms[name] = self
 
@@ -50,7 +50,7 @@ class LLMHuggingFace(LLM):
 
     def __init__(self, agent: 'Agent', name: str, parameters: dict, num_previous_messages: int = 1,
                  global_context: str = None):
-        super().__init__(agent.nlp_engine, name, parameters, global_context)
+        super().__init__(agent, name, parameters, global_context)
         self.pipe = None
         self.num_previous_messages: int = num_previous_messages
 
 
@@ -48,7 +48,7 @@ class LLMHuggingFaceAPI(LLM):
 
     def __init__(self, agent: 'Agent', name: str, parameters: dict, num_previous_messages: int = 1,
                  global_context: str = None):
-        super().__init__(agent.nlp_engine, name, parameters, global_context=global_context)
+        super().__init__(agent, name, parameters, global_context=global_context)
         self.num_previous_messages: int = num_previous_messages
 
     def set_model(self, name: str) -> None:
 
@@ -46,7 +46,7 @@ class LLMOpenAI(LLM):
 
     def __init__(self, agent: 'Agent', name: str, parameters: dict, num_previous_messages: int = 1,
                  global_context: str = None):
-        super().__init__(agent.nlp_engine, name, parameters, global_context=global_context)
+        super().__init__(agent, name, parameters, global_context=global_context)
         self.client: OpenAI = None
         self.num_previous_messages: int = num_previous_messages
 
@@ -96,7 +96,7 @@ def chat(self, session: 'Session', parameters: dict = None, system_message: str
         messages = [
             {'role': 'user' if message.is_user else 'assistant', 'content': message.content}
             for message in chat_history
-            if message.type in [MessageType.STR, MessageType.LOCATION]
+            if message.type in [MessageType.STR, MessageType.LOCATION, MessageType.JSON]
         ]
         context_messages = []
         if self._global_context: