diff --git a/ovos_solver_openai_persona/engines.py b/ovos_solver_openai_persona/engines.py index 9c8877f..8b4b83d 100644 --- a/ovos_solver_openai_persona/engines.py +++ b/ovos_solver_openai_persona/engines.py @@ -101,11 +101,11 @@ def __init__(self, config=None, enable_cache: bool = False, internal_lang: Optional[str] = None): """ - Initializes the OpenAIChatCompletionsSolver with API configuration, memory settings, and system prompt. - - Raises: - ValueError: If the API key is not provided in the configuration. - """ + Initialize an OpenAIChatCompletionsSolver instance with API configuration, conversation memory settings, and system prompt. + + Raises: + ValueError: If the API key is missing from the configuration. + """ super().__init__(config=config, translator=translator, detector=detector, priority=priority, enable_tx=enable_tx, enable_cache=enable_cache, @@ -131,16 +131,16 @@ def __init__(self, config=None, # OpenAI API integration def _do_api_request(self, messages): """ - Sends a chat completion request to the OpenAI API and returns the assistant's reply. + Send a chat completion request to the OpenAI API using the provided conversation history and return the assistant's reply. - Args: - messages: A list of message dictionaries representing the conversation history. + Parameters: + messages (list): Conversation history as a list of message dictionaries. Returns: - The content of the assistant's reply as a string. + str: The assistant's reply content. Raises: - RequestException: If the OpenAI API returns an error in the response. + RequestException: If the OpenAI API response contains an error. """ s = requests.Session() headers = { @@ -243,14 +243,14 @@ def get_chat_history(self, system_prompt=None): def get_messages(self, utt, system_prompt=None) -> MessageList: """ - Builds a list of chat messages including the system prompt, recent conversation history, and the current user utterance. + Constructs a list of chat messages for the API, including the system prompt, recent conversation history, and the current user utterance. - Args: - utt: The current user input to be appended as the latest message. + Parameters: + utt: The current user input to be added as the latest message. system_prompt: Optional system prompt to use as the initial message. Returns: - A list of message dictionaries representing the chat context for the API. + A list of message dictionaries representing the chat context. """ messages = self.get_chat_history(system_prompt) messages.append({"role": "user", "content": utt}) @@ -261,18 +261,18 @@ def continue_chat(self, messages: MessageList, lang: Optional[str], units: Optional[str] = None) -> Optional[str]: """ - Generates a chat response using the provided message history and updates memory if enabled. - - If the first message is not a system prompt, prepends the system prompt. Processes the API response and returns a cleaned answer, or None if the answer is empty or only punctuation/underscores. Updates internal memory with the latest question and answer if memory is enabled. - - Args: - messages: List of chat messages with 'role' and 'content' keys. - lang: Optional language code for the response. - units: Optional unit system for numerical values. - - Returns: - The generated response as a string, or None if no valid response is produced. - """ + Generate a chat response based on the provided message history and update conversation memory if enabled. + + If the first message is not a system prompt, prepends the system prompt. Returns a cleaned response string, or None if the response is empty or contains only punctuation or underscores. Updates internal memory with the latest user message and answer when memory is enabled. + + Parameters: + messages (MessageList): List of chat messages, each with 'role' and 'content' keys. + lang (Optional[str]): Language code for the response. + units (Optional[str]): Unit system for numerical values. + + Returns: + Optional[str]: The generated response string, or None if no valid response is produced. + """ if messages[0]["role"] != "system": messages = [{"role": "system", "content": self.system_prompt }] + messages response = self._do_api_request(messages) @@ -288,16 +288,10 @@ def stream_chat_utterances(self, messages: MessageList, lang: Optional[str] = None, units: Optional[str] = None) -> Iterable[str]: """ - Stream utterances for the given chat history as they become available. - - Args: - messages: The chat messages. - lang (Optional[str]): Optional language code. Defaults to None. - units (Optional[str]): Optional units for the query. Defaults to None. - - Returns: - Iterable[str]: An iterable of utterances. - """ + Streams partial assistant responses for a chat conversation as they are generated. + + Yields post-processed segments of the assistant's reply, emitting each chunk when a sentence or phrase boundary is detected. If conversation memory is enabled, updates the internal memory with the accumulating answer. + """ if messages[0]["role"] != "system": messages = [{"role": "system", "content": self.system_prompt }] + messages answer = "" @@ -322,16 +316,16 @@ def stream_utterances(self, query: str, lang: Optional[str] = None, units: Optional[str] = None) -> Iterable[str]: """ - Stream utterances for the given query as they become available. - - Args: - query (str): The query text. - lang (Optional[str]): Optional language code. Defaults to None. - units (Optional[str]): Optional units for the query. Defaults to None. - - Returns: - Iterable[str]: An iterable of utterances. - """ + Yields partial responses for a query as they are generated by the chat completions API. + + Parameters: + query (str): The user query to send to the chat model. + lang (Optional[str]): Language code for the response, if applicable. + units (Optional[str]): Units relevant to the query, if applicable. + + Returns: + Iterable[str]: An iterator yielding segments of the model's response as they become available. + """ messages = self.get_messages(query) yield from self.stream_chat_utterances(messages, lang, units) diff --git a/ovos_solver_openai_persona/rag.py b/ovos_solver_openai_persona/rag.py index ae7861b..626cc60 100644 --- a/ovos_solver_openai_persona/rag.py +++ b/ovos_solver_openai_persona/rag.py @@ -27,31 +27,13 @@ def __init__(self, config: Optional[Dict[str, Any]] = None, enable_cache: bool = False, internal_lang: Optional[str] = None): """ - Initializes the PersonaServerRAGSolver. - - Args: - config (dict): Configuration dictionary for the solver. Expected keys: - - "api_url" (str): Base URL of the ovos-persona-server (e.g., "http://localhost:8337/v1"). - - "vector_store_id" (str): The ID of the vector store to query for RAG. - - "max_num_results" (int, optional): Max number of chunks to retrieve from search. Defaults to 5. - - "max_context_tokens" (int, optional): Max tokens for retrieved context in the LLM prompt. Defaults to 2000. - - "system_prompt_template" (str, optional): Template for the RAG system prompt. - Must contain "{context}" and "{question}" placeholders. - - "llm_model" (str, optional): The model name to use for chat completions on the Persona Server. - - "key" (str, optional): API key for the Persona Server's chat completions endpoint. - - "llm_temperature" (float, optional): Sampling temperature for LLM. Defaults to 0.7. - - "llm_top_p" (float, optional): Top-p sampling for LLM. Defaults to 1.0. - - "llm_max_tokens" (int, optional): Max tokens for LLM generation. Defaults to 500. - translator (LanguageTranslator, optional): Language translator instance. - detector (LanguageDetector, optional): Language detector instance. - priority (int): Solver priority. - enable_tx (bool): Enable translation. - enable_cache (bool): Enable caching. - internal_lang (str, optional): Internal language code. - - Raises: - ValueError: If required configuration parameters are missing or invalid. - """ + Initialize the OpenAIRAGSolver with configuration for connecting to the ovos-persona-server and vector store. + + Validates required configuration parameters, sets up prompt templates, LLM parameters, and memory settings for multi-turn conversation support. + + Raises: + ValueError: If required configuration keys ('api_url', 'vector_store_id') are missing or if the system prompt template lacks required placeholders. + """ super().__init__(config=config, translator=translator, detector=detector, priority=priority, enable_tx=enable_tx, enable_cache=enable_cache, @@ -100,16 +82,16 @@ def __init__(self, config: Optional[Dict[str, Any]] = None, def _search_vector_store(self, query: str) -> List[str]: """ - Performs a search against the ovos-persona-server's vector store. - - Args: - query (str): The user's query string. - + Searches the configured vector store for relevant text chunks matching the user query. + + Parameters: + query (str): The user's query string to search for relevant context. + Returns: - List[str]: A list of relevant text chunks (content) retrieved from the vector store. - + List[str]: A list of text chunks retrieved from the vector store that are relevant to the query. + Raises: - RequestException: If the search API call fails or returns an error. + RequestException: If the search request fails or the response format is invalid. """ search_url = f"{self.api_url}/vector_stores/{self.vector_store_id}/search" headers = {"Content-Type": "application/json"} @@ -140,15 +122,17 @@ def _search_vector_store(self, query: str) -> List[str]: def _build_llm_messages(self, user_query: str, retrieved_context_chunks: List[str], chat_history: List[Dict[str, str]]) -> List[Dict[str, str]]: """ - Constructs the complete message list for the LLM, including RAG context and chat history. - - Args: - user_query (str): The current user's utterance. - retrieved_context_chunks (List[str]): List of text chunks retrieved from the vector store. - chat_history (List[Dict[str, str]]): The conversation history from `self.qa_pairs`. - + Constructs the message list for the LLM by combining retrieved context, recent chat history, and the current user query. + + The method concatenates relevant context chunks (up to a token limit), formats the system prompt with this context and the user's question, appends recent Q&A pairs from memory, and adds the current user query as the final message. + + Parameters: + user_query (str): The user's current question or utterance. + retrieved_context_chunks (List[str]): Relevant text segments retrieved from the vector store. + chat_history (List[Dict[str, str]]): Previous conversation history. + Returns: - List[Dict[str, str]]: A new list of messages, augmented with the RAG context and history. + List[Dict[str, str]]: The complete list of messages to send to the LLM, including system prompt, chat history, and user query. """ context_str = "" current_context_tokens = 0 @@ -186,8 +170,10 @@ def _build_llm_messages(self, user_query: str, retrieved_context_chunks: List[st def get_chat_history(self) -> List[Dict[str, str]]: """ - Returns the chat history managed by this RAG solver. - This method is called by the base ChatMessageSolver. + Return the recent chat history as a list of user and assistant messages. + + Returns: + List of message dictionaries representing the most recent question-answer pairs, formatted with roles 'user' and 'assistant'. """ # The base class expects a list of messages (role, content). # We store (query, answer) tuples. @@ -202,18 +188,19 @@ def continue_chat(self, messages: List[Dict[str, str]], lang: Optional[str], units: Optional[str] = None) -> Optional[str]: """ - Generates a chat response using RAG by directly calling the Persona Server's - chat completions endpoint. - - Args: - messages: List of chat messages with 'role' and 'content' keys. - The last user message is used for RAG retrieval and as the current query. - lang: Optional language code for the response. - units: Optional unit system for numerical values. - - Returns: - The generated response as a string, or None if no valid response is produced. - """ + Generate a chat response by augmenting the user query with retrieved context from a vector store and sending the constructed prompt to the Persona Server's chat completions endpoint. + + Parameters: + messages (List[Dict[str, str]]): List of chat messages, where the last message is treated as the current user query. + lang (Optional[str]): Optional language code for the response. + units (Optional[str]): Optional unit system for numerical values. + + Returns: + Optional[str]: The generated response as a string, or None if no valid response is produced. + + Raises: + RequestException: If the Persona Server's chat completions endpoint returns an error or an invalid response. + """ user_query = messages[-1]["content"] # Get the current user query # 1. Search vector store for context @@ -265,17 +252,18 @@ def stream_chat_utterances(self, messages: List[Dict[str, str]], lang: Optional[str] = None, units: Optional[str] = None) -> Iterable[str]: # Yields raw data: lines """ - Stream utterances for the given chat history using RAG by directly calling the Persona Server's - chat completions endpoint in streaming mode. - - Args: - messages: The chat messages. The last user message is used for RAG retrieval and as the current query. - lang (Optional[str]): Optional language code. Defaults to None. - units (Optional[str]): Optional units for the query. Defaults to None. - - Returns: - Iterable[str]: An iterable of raw data: [JSON] strings from the streaming API. - """ + Streams chat completion responses from the Persona Server using Retrieval Augmented Generation (RAG), yielding each line of streamed data as it arrives. + + The method retrieves relevant context from the vector store based on the latest user query, augments the chat history, and streams the LLM's response line by line. If enabled, it stores the full answer in memory for multi-turn conversations. + + Parameters: + messages (List[Dict[str, str]]): The chat history, with the last message as the current user query. + lang (Optional[str]): Optional language code for the query. + units (Optional[str]): Optional units for the query. + + Returns: + Iterable[str]: Yields each raw data line (as a string) from the streaming API response. + """ user_query = messages[-1]["content"] # Get the current user query # 1. Search vector store for context @@ -339,16 +327,16 @@ def stream_utterances(self, query: str, lang: Optional[str] = None, units: Optional[str] = None) -> Iterable[str]: """ - Stream utterances for the given query using RAG. - - Args: - query (str): The query text. - lang (Optional[str]): Optional language code. Defaults to None. - units (Optional[str]): Optional units for the query. Defaults to None. - - Returns: - Iterable[str]: An iterable of raw data: [JSON] strings from the streaming API. - """ + Streams the assistant's response for a given user query, incorporating current chat history and Retrieval Augmented Generation context. + + Parameters: + query (str): The user's input query. + lang (Optional[str]): Language code for the response, if applicable. + units (Optional[str]): Units relevant to the query, if applicable. + + Returns: + Iterable[str]: Yields raw data chunks from the streaming chat completions API. + """ # For stream_utterances, we directly build a single-turn message list # We need to include existing chat history here as well for proper context messages: List[Dict[str, str]] = self.get_chat_history() @@ -359,16 +347,16 @@ def get_spoken_answer(self, query: str, lang: Optional[str] = None, units: Optional[str] = None) -> Optional[str]: """ - Obtain the spoken answer for a given query using RAG. - - Args: - query (str): The query text. - lang (Optional[str]): Optional language code. Defaults to None. - units (Optional[str]): Optional units for the query. Defaults to None. - - Returns: - str: The spoken answer as a text response. - """ + Return the assistant's spoken answer to a user query, incorporating recent chat history for context. + + Parameters: + query (str): The user's input question. + lang (Optional[str]): Language code for the response, if specified. + units (Optional[str]): Units relevant to the query, if specified. + + Returns: + Optional[str]: The assistant's text response, or None if no answer is generated. + """ # For get_spoken_answer, we need to include existing chat history messages: List[Dict[str, str]] = self.get_chat_history() messages.append({"role": "user", "content": query}) @@ -376,8 +364,13 @@ def get_spoken_answer(self, query: str, def get_messages(self, utt: str, system_prompt: Optional[str] = None) -> List[Dict[str, str]]: """ - Builds a message list including the RAG solver's chat history and the current user utterance. - The system prompt for the LLM is constructed dynamically in _build_llm_messages. + Return the current chat history messages with the latest user utterance appended. + + Parameters: + utt (str): The current user utterance to add to the message list. + + Returns: + List of message dictionaries representing the conversation history plus the new user message. """ messages = self.get_chat_history() messages.append({"role": "user", "content": utt}) diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index c2bbfd9..21ac549 --- a/setup.py +++ b/setup.py @@ -23,7 +23,12 @@ def required(requirements_file): def get_version(): - """ Find the version of the package""" + """ + Extract and return the package version string from the version.py file. + + Returns: + str: The version string in the format 'major.minor.build' with an optional 'a{alpha}' suffix if an alpha version is specified. + """ version_file = os.path.join(BASEDIR, 'ovos_solver_openai_persona', 'version.py') major, minor, build, alpha = (None, None, None, None) with open(version_file) as f: