Skip to content

Commit 534843e

Browse files
committed
[DERCBOT-1609] reviews
1 parent edb1b1c commit 534843e

File tree

3 files changed

+152
-84
lines changed

3 files changed

+152
-84
lines changed

bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,31 +43,86 @@ Return only the reformulated question.`;
4343

4444
export const QuestionAnsweringDefaultPrompt: string = `# TOCK (The Open Conversation Kit) chatbot
4545
46-
## General context
47-
48-
You are a chatbot designed to provide short conversational messages in response to user queries.
49-
50-
## Guidelines
51-
52-
Incorporate any relevant details from the provided context into your answers, ensuring they are directly related to the user's query.
53-
54-
## Style and format
55-
56-
Your tone is empathetic, informative and polite.
57-
58-
## Additional instructions
59-
60-
Use the following pieces of retrieved context to answer the question.
61-
If you dont know the answer, answer (exactly) with "{{no_answer}}".
62-
Answer in {{locale}}.
63-
64-
## Context
65-
66-
{{context}}
67-
68-
## Question
69-
70-
{{question}}
46+
## Instructions:
47+
You must answer STRICTLY in valid JSON format (no extra text, no explanations).
48+
Use only the following context and the rules below to answer the question.
49+
50+
### Rules for JSON output:
51+
52+
- If the answer is found in the context:
53+
- "status": "found_in_context"
54+
55+
- If the answer is NOT found in the context:
56+
- "status": "not_found_in_context"
57+
- "answer":
58+
- The "answer" must not be a generic refusal. Instead, generate a helpful and intelligent response:
59+
- If a similar or related element exists in the context (e.g., another product, service, or regulation with a close name, date, or wording), suggest it naturally in the answer.
60+
- If no similar element exists, politely acknowledge the lack of information while encouraging clarification or rephrasing.
61+
- Always ensure the response is phrased in a natural and user-friendly way, rather than a dry "not found in context".
62+
63+
- If the question matches a special case defined below:
64+
- "status": "<the corresponding case code>"
65+
66+
And for all cases (MANDATORY):
67+
- "answer": "<the best possible answer in {{ locale }}>"
68+
- "topic": "<exactly ONE topic chosen STRICTLY from the predefined list below. If no exact match is possible, set 'unknown'>"
69+
- "suggested_topics": ["<zero or more free-form suggestions if topic is unknown>"]
70+
71+
Exception: If the question is small talk (only to conversational rituals such as greetings (e.g., “hello”, “hi”) and farewells or leave-takings (e.g., “goodbye”, “see you”) ), you may ignore the context and generate a natural small-talk response in the "answer". In this case:
72+
- "status": "small_talk"
73+
- "topic": "<e.g., greetings>"
74+
- "suggested_topics": []
75+
- "context": []
76+
77+
### Context tracing requirements (MANDATORY):
78+
- You MUST include **every** chunk from the input context in the "context" array, in the same order they appear. **No chunk may be omitted**.
79+
- If explicit chunk identifiers are present in the context, use them; otherwise assign sequential numbers starting at 1.
80+
- For each chunk object:
81+
- "chunk": "<chunk_identifier_or_sequential_number>"
82+
- "sentences": ["<verbatim sentence(s) from this chunk used to answer the question>"] — leave empty `[]` if none.
83+
- "reason": null if the chunk contributed; otherwise a concise explanation of why this chunk is not relevant to the question (e.g., "general background only", "different product", "no data for the asked period", etc.).
84+
- If there are zero chunks in the context, return `"context": []`.
85+
86+
### Predefined list of topics (use EXACT spelling, no variations):
87+
88+
## Context:
89+
{{ context }}
90+
91+
## Conversation history
92+
{{ chat_history }}
93+
94+
## User question
95+
{{ question }}
96+
97+
## Output format (JSON only):
98+
Return your response in the following format:
99+
100+
{
101+
"status": "found_on_context" | "not_in_context" | "small_talk",
102+
"answer": "TEXTUAL_ANSWER",
103+
"topic": "EXACT_TOPIC_FROM_LIST_OR_UNKNOWN",
104+
"suggested_topics": [
105+
"SUGGESTED_TOPIC_1",
106+
"SUGGESTED_TOPIC_2"
107+
],
108+
"context": [
109+
{
110+
"chunk": "1",
111+
"sentences": ["SENTENCE_1", "SENTENCE_2"],
112+
"reason": null
113+
},
114+
{
115+
"chunk": "2",
116+
"sentences": [],
117+
"reason": "General description; no details related to the question."
118+
},
119+
{
120+
"chunk": "3",
121+
"sentences": ["SENTENCE_X"],
122+
"reason": null
123+
}
124+
]
125+
}
71126
`;
72127

73128
export const QuestionCondensing_prompt: ProvidersConfigurationParam[] = [

gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,55 @@ class Footnote(Source):
5252

5353
identifier: str = Field(description='Footnote identifier', examples=['1'])
5454

55-
class ChunkSentences(BaseModel):
56-
chunk: Optional[str] = None
57-
sentences: Optional[List[str]] = None
58-
reason: Optional[str] = None
55+
class ChunkInfos(BaseModel):
56+
"""A model representing information about a chunk used in the RAG context."""
57+
58+
chunk: Optional[str] = Field(
59+
description='Unique identifier of the chunk.',
60+
examples=['cd6d8221-ba9f-44da-86ee-0e25a3c9a5c7'],
61+
default=None
62+
)
63+
sentences: Optional[List[str]] = Field(
64+
description='List of verbatim sentences from the chunk that were used by the LLM.',
65+
default=None
66+
)
67+
reason: Optional[str] = Field(
68+
description='Reason why the chunk was not used (e.g., irrelevant, general background).',
69+
default=None
70+
)
71+
5972

6073
class LLMAnswer(BaseModel):
61-
status: Optional[str] = None
62-
answer: Optional[str] = None
63-
topic: Optional[str] = None
64-
suggested_topics: Optional[List[str]] = None
65-
context: Optional[List[ChunkSentences]] = None
74+
"""
75+
A model representing the structured answer generated by the LLM
76+
in response to a user query, based on the provided RAG context.
77+
"""
78+
79+
status: Optional[str] = Field(
80+
description="The status of the answer generation. "
81+
"Possible values: 'found_in_context', 'not_found_in_context', 'small_talk', "
82+
"or other case-specific codes.",
83+
default=None
84+
)
85+
answer: Optional[str] = Field(
86+
description="The textual answer generated by the LLM, in the user's locale.",
87+
default=None
88+
)
89+
topic: Optional[str] = Field(
90+
description="The main topic assigned to the answer. Must be one of the predefined list "
91+
"of topics, or 'unknown' if no match is possible.",
92+
default=None
93+
)
94+
suggested_topics: Optional[List[str]] = Field(
95+
description="A list of suggested alternative or related topics, "
96+
"used when the main topic is 'unknown'.",
97+
default=None
98+
)
99+
context: Optional[List[ChunkInfos]] = Field(
100+
description="The list of chunks from the context that contributed to or were considered "
101+
"in the LLM's answer. Each entry contains identifiers, sentences, and reasons.",
102+
default=None
103+
)
66104

67105
@unique
68106
class ChatMessageType(str, Enum):

gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py

Lines changed: 25 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,8 @@
2323
from functools import partial
2424
from logging import ERROR, WARNING
2525
from operator import itemgetter
26-
from typing import List, Optional
26+
from typing import List, Optional, Tuple
2727

28-
from langchain.chains.conversational_retrieval.base import (
29-
ConversationalRetrievalChain,
30-
)
3128
from langchain.retrievers.contextual_compression import (
3229
ContextualCompressionRetriever,
3330
)
@@ -41,11 +38,10 @@
4138
from langchain_core.runnables import (
4239
RunnableParallel,
4340
RunnablePassthrough,
44-
RunnableSerializable, RunnableConfig, RunnableBranch, RunnableLambda,
41+
RunnableSerializable, RunnableConfig, RunnableLambda,
4542
)
4643
from langchain_core.vectorstores import VectorStoreRetriever
47-
from langfuse.callback import CallbackHandler as LangfuseCallbackHandler
48-
from typing_extensions import Any, deprecated
44+
from typing_extensions import Any
4945

5046
from gen_ai_orchestrator.errors.exceptions.exceptions import (
5147
GenAIGuardCheckException,
@@ -75,7 +71,6 @@
7571
)
7672
from gen_ai_orchestrator.routers.requests.requests import RAGRequest
7773
from gen_ai_orchestrator.routers.responses.responses import (
78-
ObservabilityInfo,
7974
RAGResponse,
8075
)
8176
from gen_ai_orchestrator.services.langchain.callbacks.rag_callback_handler import (
@@ -112,7 +107,7 @@ async def execute_rag_chain(
112107
Args:
113108
request: The RAG request
114109
debug: True if RAG data debug should be returned with the response.
115-
custom_observability_handler: Custom observability handler
110+
custom_observability_handler: Custom observability handler (Used in the tooling run_experiment.py script)
116111
Returns:
117112
The RAG response (Answer and document sources)
118113
"""
@@ -133,17 +128,13 @@ async def execute_rag_chain(
133128
logger.debug('RAG chain - Use chat history: %s', len(message_history.messages) > 0)
134129
logger.debug('RAG chain - Use RAGCallbackHandler for debugging : %s', debug)
135130

136-
callback_handlers = get_callback_handlers(request, custom_observability_handler, debug)
137-
records_callback_handler = None
138-
if debug:
139-
records_callback_handler = next(
140-
(x for x in callback_handlers if isinstance(x, RAGCallbackHandler)),
141-
None
142-
)
143-
observability_handler = next(
144-
(x for x in callback_handlers if isinstance(x, LangfuseCallbackHandler)),
145-
None
146-
)
131+
records_handler, observability_handler = get_callback_handlers(request, debug)
132+
133+
callbacks = [
134+
handler
135+
for handler in (records_handler, observability_handler, custom_observability_handler)
136+
if handler is not None
137+
]
147138

148139
inputs = {
149140
**request.question_answering_prompt.inputs,
@@ -152,7 +143,7 @@ async def execute_rag_chain(
152143

153144
response = await conversational_retrieval_chain.ainvoke(
154145
input=inputs,
155-
config=RunnableConfig(callbacks=callback_handlers)
146+
config=RunnableConfig(callbacks=callbacks)
156147
)
157148
llm_answer = LLMAnswer(**response['answer'])
158149

@@ -193,19 +184,18 @@ async def execute_rag_chain(
193184
if doc.metadata['id'] in contexts_by_chunk
194185
},
195186
observability_info=get_observability_info(observability_handler),
196-
debug=get_rag_debug_data(request, records_callback_handler, rag_duration)
187+
debug=get_rag_debug_data(request, records_handler, rag_duration)
197188
if debug
198189
else None,
199190
)
200191

201-
def get_callback_handlers(request, custom_observability_handler, debug):
202-
callback_handlers = []
203-
records_callback_handler = RAGCallbackHandler()
204-
if debug:
205-
# Debug callback handler
206-
callback_handlers.append(records_callback_handler)
207-
if custom_observability_handler is not None:
208-
callback_handlers.append(custom_observability_handler)
192+
def get_callback_handlers(request, debug) -> Tuple[
193+
Optional[RAGCallbackHandler],
194+
Optional[object],
195+
]:
196+
records_handler = RAGCallbackHandler() if debug else None
197+
observability_handler = None
198+
209199
if request.observability_setting is not None:
210200
if request.dialog:
211201
session_id = request.dialog.dialog_id
@@ -215,17 +205,18 @@ def get_callback_handlers(request, custom_observability_handler, debug):
215205
session_id = None
216206
user_id = None
217207
tags = None
218-
# Langfuse callback handler
219208
observability_handler = create_observability_callback_handler(
220209
observability_setting=request.observability_setting,
221210
trace_name=ObservabilityTrace.RAG.value,
222211
session_id=session_id,
223212
user_id=user_id,
224213
tags=tags,
225214
)
226-
callback_handlers.append(observability_handler)
227215

228-
return callback_handlers
216+
return (
217+
records_handler,
218+
observability_handler,
219+
)
229220

230221
def get_source_content(doc: Document) -> str:
231222
"""
@@ -296,6 +287,7 @@ def create_rag_chain(
296287
question_condensing_llm = question_condensing_llm_factory.get_language_model()
297288
question_answering_llm = question_answering_llm_factory.get_language_model()
298289

290+
# Fallback in case of missing condensing LLM setting using the answering LLM setting.
299291
if question_condensing_llm is not None:
300292
condensing_llm = question_condensing_llm
301293
else :
@@ -371,23 +363,6 @@ def format_chat_history(x):
371363
messages.append({"assistant": msg.content})
372364
return json.dumps(messages, ensure_ascii=False, indent=2)
373365

374-
def construct_rag_chain(llm, rag_prompt):
375-
return (
376-
{
377-
"context": lambda x: json.dumps([
378-
{
379-
"chunk_id": doc.metadata['id'],
380-
"chunk_text": doc.page_content,
381-
}
382-
for doc in x["documents"]
383-
], ensure_ascii=False, indent=2),
384-
"chat_history": format_chat_history,
385-
}
386-
| rag_prompt
387-
| llm
388-
| JsonOutputParser(pydantic_object=LLMAnswer, name="rag_chain_output")
389-
)
390-
391366
def build_question_condensation_chain(
392367
llm, prompt: Optional[PromptTemplate]
393368
) -> ChatPromptTemplate:

0 commit comments

Comments
 (0)