Skip to content

Commit 47cb38b

Browse files
authored
Merge pull request #136 from BESSER-PEARL/dev
PR v4.0.0
2 parents 1dd2066 + f76f124 commit 47cb38b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1732
-118
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from typing import TYPE_CHECKING
2+
3+
import io
4+
import soundfile as sf
5+
import numpy as np
6+
7+
8+
from besser.agent.core.processors.processor import Processor
9+
from besser.agent.core.session import Session
10+
from besser.agent.nlp.nlp_engine import NLPEngine
11+
from besser.agent.nlp.llm.llm import LLM
12+
13+
from besser.agent.exceptions.logger import logger
14+
15+
from besser.agent.nlp.speech2text.speech2text import Speech2Text
16+
17+
18+
if TYPE_CHECKING:
19+
from besser.agent.core.agent import Agent
20+
21+
22+
class AudioLanguageDetectionProcessor(Processor):
23+
"""The AudioLanguageDetectionProcessor detects the spoken language in a given audio message.
24+
25+
This processor uses a speech-to-text model to transcribe audio and then leverages an LLM to predict the language.
26+
Ideally, you use a model that is trained for language detection, such as OpenAI's GPT-4o-mini
27+
or anything that works well on a plethora of languages.
28+
29+
Args:
30+
agent (Agent): The agent the processor belongs to.
31+
transcription_model (Speech2Text): The speech-to-text model to use for transcription.
32+
llm_name (str): The name of the LLM to use for language detection.
33+
34+
Attributes:
35+
agent (Agent): The agent the processor belongs to.
36+
_transcription_model_name (str): The speech-to-text model to use for transcription.
37+
_llm_name (str): The name of the LLM used for language detection.
38+
_nlp_engine (NLPEngine): The NLP Engine the Agent uses.
39+
"""
40+
41+
def __init__(self, agent: "Agent", transcription_model: Speech2Text, llm_name: str):
42+
super().__init__(agent=agent, user_messages=True, agent_messages=False)
43+
self._llm_name: str = llm_name
44+
self._transcription_model: Speech2Text = transcription_model
45+
self._nlp_engine: NLPEngine = agent.nlp_engine
46+
47+
def process(self, session: Session, message: bytes) -> bytes:
48+
"""Method to process a message and predict the message's language.
49+
50+
The detected language will be stored as a session parameter. The key is "user_language".
51+
52+
Args:
53+
session (Session): the current session
54+
message (str): the message to be processed
55+
56+
Returns:
57+
str: the original message
58+
"""
59+
# transcribe audio bytes
60+
llm: LLM = self._nlp_engine._llms.get(self._llm_name)
61+
62+
try:
63+
raw_audio = io.BytesIO(message)
64+
65+
# Convert raw bytes to NumPy array
66+
audio_data = np.frombuffer(raw_audio.read(), dtype=np.int16)
67+
68+
# Save as WAV in memory
69+
wav_file = io.BytesIO()
70+
sf.write(
71+
wav_file, audio_data, samplerate=44100, format="WAV", subtype="PCM_16"
72+
)
73+
wav_file.name = "audio.wav"
74+
wav_file.seek(0)
75+
76+
# Use the transcription model to transcribe the audio
77+
transcription = self._transcription_model.speech2text(wav_file.getvalue())
78+
79+
prompt = (
80+
f"Identify the language based on the following message: {transcription}. "
81+
f"Only return the ISO 639-1 standard language code of the "
82+
f"language you recognized."
83+
)
84+
# Use the LLM to detect the language based on the transcription
85+
# this might not work with all LLMs
86+
detected_lang = llm.predict(prompt, session=session)
87+
88+
logger.info(f"Detected language (ISO 639-1): {detected_lang}")
89+
90+
session.set("user_language", detected_lang)
91+
except Exception as e:
92+
logger.error(f"Error during language detection: {e}")
93+
94+
return message

besser/agent/core/processors/user_adaptation_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def process(self, session: 'Session', message: str) -> str:
5656
You are free to adapt the messages in any way you like.\
5757
The user should relate more. This is the user's profile\n \
5858
{str(self._user_model[session.id])}"
59-
prompt = f"You need to adapt this message: {message}\n Only respond with the adapated message!"
59+
prompt = f"You need to adapt this message: {message}\n Only respond with the adapted message!"
6060
llm_response: str = llm.predict(prompt, session=session, system_message=user_context)
6161
return llm_response
6262

besser/agent/core/session.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,19 @@ def set(self, key: str, value: Any) -> None:
179179
"""
180180
self._dictionary[key] = value
181181

182-
def get(self, key: str) -> Any:
182+
def get(self, key: str, default: Any = None) -> Any:
183183
"""Get an entry of the session private data storage.
184184
185185
Args:
186186
key (str): the entry key
187+
default (Any): The default value to be returned, if the dictionary does not contain the key
187188
188189
Returns:
189-
Any: the entry value, or None if the key does not exist
190+
Any: the entry value, default or None if the key does not exist
190191
"""
191192
if key not in self._dictionary:
193+
if default:
194+
return default
192195
return None
193196
return self._dictionary[key]
194197

besser/agent/db/monitoring_db.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from datetime import datetime
22
from typing import TYPE_CHECKING, Any
33

4+
import json
45
import pandas as pd
56
from sqlalchemy import Connection, create_engine, Column, String, Integer, UniqueConstraint, ForeignKey, DateTime, \
67
Float, MetaData, insert, Table, select, Executable, CursorResult, desc, Boolean
8+
from sqlalchemy.dialects.postgresql import JSONB
79
from sqlalchemy.orm import declarative_base
810

911
from besser.agent.core.message import Message
@@ -127,7 +129,7 @@ class TableChat(Base):
127129
id = Column(Integer, primary_key=True, autoincrement=True)
128130
session_id = Column(Integer, ForeignKey(f'{TABLE_SESSION}.id'), nullable=False)
129131
type = Column(String, nullable=False)
130-
content = Column(String, nullable=False)
132+
content = Column(JSONB, nullable=False) # JSONB allows to handle the dictionary (TTS messages)
131133
is_user = Column(Boolean, nullable=False)
132134
timestamp = Column(DateTime, nullable=False)
133135

@@ -243,7 +245,7 @@ def insert_chat(self, session: Session, message: Message) -> None:
243245
stmt = insert(table).values(
244246
session_id=int(session_entry['id'][0]),
245247
type=message.type.value,
246-
content=message.content,
248+
content=str(message.content),
247249
is_user=message.is_user,
248250
timestamp=message.timestamp,
249251
)

besser/agent/exceptions/logger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77
# Create handlers
88
console_handler = logging.StreamHandler()
9-
file_handler = logging.FileHandler("application.log")
9+
file_handler = logging.FileHandler("application.log", encoding='utf-8')
1010

1111
# Set logging levels for handlers
1212
console_handler.setLevel(logging.INFO)
1313
file_handler.setLevel(logging.DEBUG)
1414

1515
# Create formatters
16-
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
16+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
1717
console_handler.setFormatter(formatter)
1818
file_handler.setFormatter(formatter)
1919

besser/agent/nlp/__init__.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
NLP_LANGUAGE = Property(SECTION_NLP, 'nlp.language', str, 'en')
88
"""
9-
The agenr language. This is the expected language the users will talk to the agent. Using another language may
9+
The agent language. This is the expected language the users will talk to the agent. Using another language may
1010
affect the quality of some NLP processes.
1111
1212
The list of available languages can be found at `snowballstemmer <https://pypi.org/project/snowballstemmer/>`_.
@@ -79,29 +79,6 @@
7979
default value: ``0.4``
8080
"""
8181

82-
NLP_STT_HF_MODEL = Property(SECTION_NLP, 'nlp.speech2text.hf.model', str, None)
83-
"""
84-
The name of the Hugging Face model for the HFSpeech2Text agent component. If none is provided, the component will not be
85-
activated.
86-
87-
name: ``nlp.speech2text.hf.model``
88-
89-
type: ``str``
90-
91-
default value: ``None``
92-
"""
93-
94-
NLP_STT_SR_ENGINE = Property(SECTION_NLP, 'nlp.speech2text.sr.engine', str, None)
95-
"""
96-
The name of the transcription engine for the Speech Recognition agent component. If none is provided, the component will
97-
not be activated.
98-
99-
name: ``nlp.speech2text.sr.engine``
100-
101-
type: ``str``
102-
103-
default value: ``None``
104-
"""
10582

10683
OPENAI_API_KEY = Property(SECTION_NLP, 'nlp.openai.api_key', str, None)
10784
"""

besser/agent/nlp/llm/llm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from besser.agent.nlp.intent_classifier.intent_classifier_prediction import IntentClassifierPrediction
66

77
if TYPE_CHECKING:
8+
from besser.agent.core.agent import Agent
89
from besser.agent.core.session import Session
910
from besser.agent.nlp.intent_classifier.llm_intent_classifier import LLMIntentClassifier
1011
from besser.agent.nlp.nlp_engine import NLPEngine
@@ -34,8 +35,8 @@ class LLM(ABC):
3435
user specific context to be provided to the LLM for each request
3536
"""
3637

37-
def __init__(self, nlp_engine: 'NLPEngine', name: str, parameters: dict, global_context: str = None):
38-
self._nlp_engine: 'NLPEngine' = nlp_engine
38+
def __init__(self, agent: 'Agent', name: str, parameters: dict, global_context: str = None):
39+
self._nlp_engine: 'NLPEngine' = agent.nlp_engine
3940
self.name: str = name
4041
self.parameters: dict = parameters
4142
self._nlp_engine._llms[name] = self

besser/agent/nlp/llm/llm_huggingface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class LLMHuggingFace(LLM):
5050

5151
def __init__(self, agent: 'Agent', name: str, parameters: dict, num_previous_messages: int = 1,
5252
global_context: str = None):
53-
super().__init__(agent.nlp_engine, name, parameters, global_context)
53+
super().__init__(agent, name, parameters, global_context)
5454
self.pipe = None
5555
self.num_previous_messages: int = num_previous_messages
5656

besser/agent/nlp/llm/llm_huggingface_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class LLMHuggingFaceAPI(LLM):
4848

4949
def __init__(self, agent: 'Agent', name: str, parameters: dict, num_previous_messages: int = 1,
5050
global_context: str = None):
51-
super().__init__(agent.nlp_engine, name, parameters, global_context=global_context)
51+
super().__init__(agent, name, parameters, global_context=global_context)
5252
self.num_previous_messages: int = num_previous_messages
5353

5454
def set_model(self, name: str) -> None:

besser/agent/nlp/llm/llm_openai_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class LLMOpenAI(LLM):
4646

4747
def __init__(self, agent: 'Agent', name: str, parameters: dict, num_previous_messages: int = 1,
4848
global_context: str = None):
49-
super().__init__(agent.nlp_engine, name, parameters, global_context=global_context)
49+
super().__init__(agent, name, parameters, global_context=global_context)
5050
self.client: OpenAI = None
5151
self.num_previous_messages: int = num_previous_messages
5252

@@ -96,7 +96,7 @@ def chat(self, session: 'Session', parameters: dict = None, system_message: str
9696
messages = [
9797
{'role': 'user' if message.is_user else 'assistant', 'content': message.content}
9898
for message in chat_history
99-
if message.type in [MessageType.STR, MessageType.LOCATION]
99+
if message.type in [MessageType.STR, MessageType.LOCATION, MessageType.JSON]
100100
]
101101
context_messages = []
102102
if self._global_context:

0 commit comments

Comments
 (0)