[DERCBOT-1609] reviews

assouktim · assouktim · commit 534843e122dd · 2025-09-12T18:01:55.000+02:00
diff --git a/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts b/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts
@@ -43,31 +43,86 @@ Return only the reformulated question.`;
 
 export const QuestionAnsweringDefaultPrompt: string = `# TOCK (The Open Conversation Kit) chatbot
 
-## General context
-
-You are a chatbot designed to provide short conversational messages in response to user queries.
-
-## Guidelines
-
-Incorporate any relevant details from the provided context into your answers, ensuring they are directly related to the user's query.
-
-## Style and format
-
-Your tone is empathetic, informative and polite.
-
-## Additional instructions
-
-Use the following pieces of retrieved context to answer the question.
-If you dont know the answer, answer (exactly) with "{{no_answer}}".
-Answer in {{locale}}.
-
-## Context
-
-{{context}}
-
-## Question
-
-{{question}}
+## Instructions:
+You must answer STRICTLY in valid JSON format (no extra text, no explanations).
+Use only the following context and the rules below to answer the question.
+
+### Rules for JSON output:
+
+- If the answer is found in the context:
+  - "status": "found_in_context"
+
+- If the answer is NOT found in the context:
+  - "status": "not_found_in_context"
+  - "answer":
+    - The "answer" must not be a generic refusal. Instead, generate a helpful and intelligent response:
+        - If a similar or related element exists in the context (e.g., another product, service, or regulation with a close name, date, or wording), suggest it naturally in the answer.
+        - If no similar element exists, politely acknowledge the lack of information while encouraging clarification or rephrasing.
+    - Always ensure the response is phrased in a natural and user-friendly way, rather than a dry "not found in context".
+
+- If the question matches a special case defined below:
+  - "status": "<the corresponding case code>"
+
+And for all cases (MANDATORY):
+  - "answer": "<the best possible answer in {{ locale }}>"
+  - "topic": "<exactly ONE topic chosen STRICTLY from the predefined list below. If no exact match is possible, set 'unknown'>"
+  - "suggested_topics": ["<zero or more free-form suggestions if topic is unknown>"]
+
+Exception: If the question is small talk (only to conversational rituals such as greetings (e.g., “hello”, “hi”) and farewells or leave-takings (e.g., “goodbye”, “see you”) ), you may ignore the context and generate a natural small-talk response in the "answer". In this case:
+  - "status": "small_talk"
+  - "topic": "<e.g., greetings>"
+  - "suggested_topics": []
+  - "context": []
+
+### Context tracing requirements (MANDATORY):
+- You MUST include **every** chunk from the input context in the "context" array, in the same order they appear. **No chunk may be omitted**.
+- If explicit chunk identifiers are present in the context, use them; otherwise assign sequential numbers starting at 1.
+- For each chunk object:
+  - "chunk": "<chunk_identifier_or_sequential_number>"
+  - "sentences": ["<verbatim sentence(s) from this chunk used to answer the question>"] — leave empty `[]` if none.
+  - "reason": null if the chunk contributed; otherwise a concise explanation of why this chunk is not relevant to the question (e.g., "general background only", "different product", "no data for the asked period", etc.).
+- If there are zero chunks in the context, return `"context": []`.
+
+### Predefined list of topics (use EXACT spelling, no variations):
+
+## Context:
+{{ context }}
+
+## Conversation history
+{{ chat_history }}
+
+## User question
+{{ question }}
+
+## Output format (JSON only):
+Return your response in the following format:
+
+{
+  "status": "found_on_context" | "not_in_context" | "small_talk",
+  "answer": "TEXTUAL_ANSWER",
+  "topic": "EXACT_TOPIC_FROM_LIST_OR_UNKNOWN",
+  "suggested_topics": [
+    "SUGGESTED_TOPIC_1",
+    "SUGGESTED_TOPIC_2"
+  ],
+  "context": [
+    {
+      "chunk": "1",
+      "sentences": ["SENTENCE_1", "SENTENCE_2"],
+      "reason": null
+    },
+    {
+      "chunk": "2",
+      "sentences": [],
+      "reason": "General description; no details related to the question."
+    },
+    {
+      "chunk": "3",
+      "sentences": ["SENTENCE_X"],
+      "reason": null
+    }
+  ]
+}
 `;
 
 export const QuestionCondensing_prompt: ProvidersConfigurationParam[] = [
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py
@@ -52,17 +52,55 @@ class Footnote(Source):
 
     identifier: str = Field(description='Footnote identifier', examples=['1'])
 
-class ChunkSentences(BaseModel):
-    chunk: Optional[str] = None
-    sentences: Optional[List[str]] = None
-    reason: Optional[str] = None
+class ChunkInfos(BaseModel):
+    """A model representing information about a chunk used in the RAG context."""
+
+    chunk: Optional[str] = Field(
+        description='Unique identifier of the chunk.',
+        examples=['cd6d8221-ba9f-44da-86ee-0e25a3c9a5c7'],
+        default=None
+    )
+    sentences: Optional[List[str]] = Field(
+        description='List of verbatim sentences from the chunk that were used by the LLM.',
+        default=None
+    )
+    reason: Optional[str] = Field(
+        description='Reason why the chunk was not used (e.g., irrelevant, general background).',
+        default=None
+    )
+
 
 class LLMAnswer(BaseModel):
-    status: Optional[str] = None
-    answer: Optional[str] = None
-    topic: Optional[str] = None
-    suggested_topics: Optional[List[str]] = None
-    context: Optional[List[ChunkSentences]] = None
+    """
+    A model representing the structured answer generated by the LLM
+    in response to a user query, based on the provided RAG context.
+    """
+
+    status: Optional[str] = Field(
+        description="The status of the answer generation. "
+                    "Possible values: 'found_in_context', 'not_found_in_context', 'small_talk', "
+                    "or other case-specific codes.",
+        default=None
+    )
+    answer: Optional[str] = Field(
+        description="The textual answer generated by the LLM, in the user's locale.",
+        default=None
+    )
+    topic: Optional[str] = Field(
+        description="The main topic assigned to the answer. Must be one of the predefined list "
+                    "of topics, or 'unknown' if no match is possible.",
+        default=None
+    )
+    suggested_topics: Optional[List[str]] = Field(
+        description="A list of suggested alternative or related topics, "
+                    "used when the main topic is 'unknown'.",
+        default=None
+    )
+    context: Optional[List[ChunkInfos]] = Field(
+        description="The list of chunks from the context that contributed to or were considered "
+                    "in the LLM's answer. Each entry contains identifiers, sentences, and reasons.",
+        default=None
+    )
 
 @unique
 class ChatMessageType(str, Enum):
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py
@@ -23,11 +23,8 @@
 from functools import partial
 from logging import ERROR, WARNING
 from operator import itemgetter
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
-from langchain.chains.conversational_retrieval.base import (
-    ConversationalRetrievalChain,
-)
 from langchain.retrievers.contextual_compression import (
     ContextualCompressionRetriever,
 )
@@ -41,11 +38,10 @@
 from langchain_core.runnables import (
     RunnableParallel,
     RunnablePassthrough,
-    RunnableSerializable, RunnableConfig, RunnableBranch, RunnableLambda,
+    RunnableSerializable, RunnableConfig, RunnableLambda,
 )
 from langchain_core.vectorstores import VectorStoreRetriever
-from langfuse.callback import CallbackHandler as LangfuseCallbackHandler
-from typing_extensions import Any, deprecated
+from typing_extensions import Any
 
 from gen_ai_orchestrator.errors.exceptions.exceptions import (
     GenAIGuardCheckException,
@@ -75,7 +71,6 @@
 )
 from gen_ai_orchestrator.routers.requests.requests import RAGRequest
 from gen_ai_orchestrator.routers.responses.responses import (
-    ObservabilityInfo,
     RAGResponse,
 )
 from gen_ai_orchestrator.services.langchain.callbacks.rag_callback_handler import (
@@ -112,7 +107,7 @@ async def execute_rag_chain(
     Args:
         request: The RAG request
         debug: True if RAG data debug should be returned with the response.
-        custom_observability_handler: Custom observability handler
+        custom_observability_handler: Custom observability handler (Used in the tooling run_experiment.py script)
     Returns:
         The RAG response (Answer and document sources)
     """
@@ -133,17 +128,13 @@ async def execute_rag_chain(
     logger.debug('RAG chain - Use chat history: %s', len(message_history.messages) > 0)
     logger.debug('RAG chain - Use RAGCallbackHandler for debugging : %s', debug)
 
-    callback_handlers = get_callback_handlers(request, custom_observability_handler, debug)
-    records_callback_handler = None
-    if debug:
-        records_callback_handler = next(
-            (x for x in callback_handlers if isinstance(x, RAGCallbackHandler)),
-            None
-        )
-    observability_handler = next(
-        (x for x in callback_handlers if isinstance(x, LangfuseCallbackHandler)),
-        None
-    )
+    records_handler, observability_handler = get_callback_handlers(request, debug)
+
+    callbacks = [
+        handler
+        for handler in (records_handler, observability_handler, custom_observability_handler)
+        if handler is not None
+    ]
 
     inputs = {
         **request.question_answering_prompt.inputs,
@@ -152,7 +143,7 @@ async def execute_rag_chain(
 
     response = await conversational_retrieval_chain.ainvoke(
         input=inputs,
-        config=RunnableConfig(callbacks=callback_handlers)
+        config=RunnableConfig(callbacks=callbacks)
     )
     llm_answer = LLMAnswer(**response['answer'])
 
@@ -193,19 +184,18 @@ async def execute_rag_chain(
             if doc.metadata['id'] in contexts_by_chunk
         },
         observability_info=get_observability_info(observability_handler),
-        debug=get_rag_debug_data(request, records_callback_handler, rag_duration)
+        debug=get_rag_debug_data(request, records_handler, rag_duration)
         if debug
         else None,
     )
 
-def get_callback_handlers(request, custom_observability_handler, debug):
-    callback_handlers = []
-    records_callback_handler = RAGCallbackHandler()
-    if debug:
-        # Debug callback handler
-        callback_handlers.append(records_callback_handler)
-    if custom_observability_handler is not None:
-        callback_handlers.append(custom_observability_handler)
+def get_callback_handlers(request, debug) -> Tuple[
+    Optional[RAGCallbackHandler],
+    Optional[object],
+]:
+    records_handler = RAGCallbackHandler() if debug else None
+    observability_handler = None
+
     if request.observability_setting is not None:
         if request.dialog:
             session_id = request.dialog.dialog_id
@@ -215,17 +205,18 @@ def get_callback_handlers(request, custom_observability_handler, debug):
             session_id = None
             user_id = None
             tags = None
-        # Langfuse callback handler
         observability_handler = create_observability_callback_handler(
             observability_setting=request.observability_setting,
             trace_name=ObservabilityTrace.RAG.value,
             session_id=session_id,
             user_id=user_id,
             tags=tags,
         )
-        callback_handlers.append(observability_handler)
 
-    return callback_handlers
+    return (
+        records_handler,
+        observability_handler,
+    )
 
 def get_source_content(doc: Document) -> str:
     """
@@ -296,6 +287,7 @@ def create_rag_chain(
         question_condensing_llm = question_condensing_llm_factory.get_language_model()
     question_answering_llm = question_answering_llm_factory.get_language_model()
 
+    # Fallback in case of missing condensing LLM setting using the answering LLM setting.
     if question_condensing_llm is not None:
         condensing_llm = question_condensing_llm
     else :
@@ -371,23 +363,6 @@ def format_chat_history(x):
             messages.append({"assistant": msg.content})
     return json.dumps(messages, ensure_ascii=False, indent=2)
 
-def construct_rag_chain(llm, rag_prompt):
-    return (
-        {
-            "context": lambda x: json.dumps([
-                {
-                    "chunk_id": doc.metadata['id'],
-                    "chunk_text": doc.page_content,
-                }
-                for doc in x["documents"]
-            ], ensure_ascii=False, indent=2),
-            "chat_history": format_chat_history,
-        }
-        | rag_prompt
-        | llm
-        | JsonOutputParser(pydantic_object=LLMAnswer, name="rag_chain_output")
-    )
-
 def build_question_condensation_chain(
     llm, prompt: Optional[PromptTemplate]
 ) -> ChatPromptTemplate: