Skip to content

Commit 3629df8

Browse files
Use prompty to store prompts (#2178)
* move-prompts-to-jinja-templates * refactor: convert few_shots to JSON format and clean up comments * Clean retreivethenread.py * Port to prompty * Configure Azure Developer Pipeline * Refactor to use a PromptManager * Inject followup at the end * Make mypy so happy, remove conversation truncation for ask approaches * Refactor text.py since it doesnt need to be its own very short file * Fix Chat approach tests * More prompty updates, test updates * Fix type annotations * Update more snapshots * Add prompty metadata, revert some unneeded changes * Fix thought process UI keys and data expectations * Resolve issue with injected prompt, update test * Pass in past messages to query rewrite prompt * Update snapshots * Updated prompty * Removing prompty from types now that it has them * Add samples to the non-vision approaches * Rename the prompts to match the tabs * Add back a comment about RAG flow * Add prompty back to pyproject.toml * Update the docs about customization * Update to pyproject now that prompty is typed * Fix vision approach missing past messages --------- Co-authored-by: Jeannot Damoiseaux <[email protected]>
1 parent 5f898db commit 3629df8

File tree

85 files changed

+643
-432
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+643
-432
lines changed

app/backend/app.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from approaches.approach import Approach
5252
from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
5353
from approaches.chatreadretrievereadvision import ChatReadRetrieveReadVisionApproach
54+
from approaches.promptmanager import PromptyManager
5455
from approaches.retrievethenread import RetrieveThenReadApproach
5556
from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
5657
from chat_history.cosmosdb import chat_history_cosmosdb_bp
@@ -642,8 +643,10 @@ async def setup_clients():
642643
current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
643644
current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS
644645

645-
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
646-
# or some derivative, here we include several for exploration purposes
646+
prompt_manager = PromptyManager()
647+
648+
# Set up the two default RAG approaches for /ask and /chat
649+
# RetrieveThenReadApproach is used by /ask for single-turn Q&A
647650
current_app.config[CONFIG_ASK_APPROACH] = RetrieveThenReadApproach(
648651
search_client=search_client,
649652
openai_client=openai_client,
@@ -657,8 +660,10 @@ async def setup_clients():
657660
content_field=KB_FIELDS_CONTENT,
658661
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
659662
query_speller=AZURE_SEARCH_QUERY_SPELLER,
663+
prompt_manager=prompt_manager,
660664
)
661665

666+
# ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
662667
current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
663668
search_client=search_client,
664669
openai_client=openai_client,
@@ -672,6 +677,7 @@ async def setup_clients():
672677
content_field=KB_FIELDS_CONTENT,
673678
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
674679
query_speller=AZURE_SEARCH_QUERY_SPELLER,
680+
prompt_manager=prompt_manager,
675681
)
676682

677683
if USE_GPT4V:
@@ -696,6 +702,7 @@ async def setup_clients():
696702
content_field=KB_FIELDS_CONTENT,
697703
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
698704
query_speller=AZURE_SEARCH_QUERY_SPELLER,
705+
prompt_manager=prompt_manager,
699706
)
700707

701708
current_app.config[CONFIG_CHAT_VISION_APPROACH] = ChatReadRetrieveReadVisionApproach(
@@ -716,6 +723,7 @@ async def setup_clients():
716723
content_field=KB_FIELDS_CONTENT,
717724
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
718725
query_speller=AZURE_SEARCH_QUERY_SPELLER,
726+
prompt_manager=prompt_manager,
719727
)
720728

721729

app/backend/approaches/approach.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
from openai import AsyncOpenAI
2525
from openai.types.chat import ChatCompletionMessageParam
2626

27+
from approaches.promptmanager import PromptManager
2728
from core.authentication import AuthenticationHelper
28-
from text import nonewlines
2929

3030

3131
@dataclass
@@ -109,6 +109,7 @@ def __init__(
109109
openai_host: str,
110110
vision_endpoint: str,
111111
vision_token_provider: Callable[[], Awaitable[str]],
112+
prompt_manager: PromptManager,
112113
):
113114
self.search_client = search_client
114115
self.openai_client = openai_client
@@ -121,6 +122,7 @@ def __init__(
121122
self.openai_host = openai_host
122123
self.vision_endpoint = vision_endpoint
123124
self.vision_token_provider = vision_token_provider
125+
self.prompt_manager = prompt_manager
124126

125127
def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
126128
include_category = overrides.get("include_category")
@@ -205,6 +207,10 @@ async def search(
205207
def get_sources_content(
206208
self, results: List[Document], use_semantic_captions: bool, use_image_citation: bool
207209
) -> list[str]:
210+
211+
def nonewlines(s: str) -> str:
212+
return s.replace("\n", " ").replace("\r", " ")
213+
208214
if use_semantic_captions:
209215
return [
210216
(self.get_citation((doc.sourcepage or ""), use_image_citation))

app/backend/approaches/chatapproach.py

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,53 +9,21 @@
99

1010

1111
class ChatApproach(Approach, ABC):
12-
query_prompt_few_shots: list[ChatCompletionMessageParam] = [
13-
{"role": "user", "content": "How did crypto do last year?"},
14-
{"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"},
15-
{"role": "user", "content": "What are my health plans?"},
16-
{"role": "assistant", "content": "Show available health plans"},
17-
]
18-
NO_RESPONSE = "0"
19-
20-
follow_up_questions_prompt_content = """Generate 3 very brief follow-up questions that the user would likely ask next.
21-
Enclose the follow-up questions in double angle brackets. Example:
22-
<<Are there exclusions for prescriptions?>>
23-
<<Which pharmacies can be ordered from?>>
24-
<<What is the limit for over-the-counter medication?>>
25-
Do no repeat questions that have already been asked.
26-
Make sure the last question ends with ">>".
27-
"""
28-
29-
query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.
30-
You have access to Azure AI Search index with 100's of documents.
31-
Generate a search query based on the conversation and the new question.
32-
Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
33-
Do not include any text inside [] or <<>> in the search query terms.
34-
Do not include any special characters like '+'.
35-
If the question is not in English, translate the question to English before generating the search query.
36-
If you cannot generate a search query, return just the number 0.
37-
"""
3812

39-
@property
40-
@abstractmethod
41-
def system_message_chat_conversation(self) -> str:
42-
pass
13+
NO_RESPONSE = "0"
4314

4415
@abstractmethod
4516
async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
4617
pass
4718

48-
def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str) -> str:
19+
def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[str, str]:
20+
# Allows client to replace the entire prompt, or to inject into the existing prompt using >>>
4921
if override_prompt is None:
50-
return self.system_message_chat_conversation.format(
51-
injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt
52-
)
22+
return {}
5323
elif override_prompt.startswith(">>>"):
54-
return self.system_message_chat_conversation.format(
55-
injected_prompt=override_prompt[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt
56-
)
24+
return {"injected_prompt": override_prompt[3:]}
5725
else:
58-
return override_prompt.format(follow_up_questions_prompt=follow_up_questions_prompt)
26+
return {"override_prompt": override_prompt}
5927

6028
def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
6129
response_message = chat_completion.choices[0].message

app/backend/approaches/chatreadretrieveread.py

Lines changed: 29 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from approaches.approach import ThoughtStep
1515
from approaches.chatapproach import ChatApproach
16+
from approaches.promptmanager import PromptManager
1617
from core.authentication import AuthenticationHelper
1718

1819

@@ -38,6 +39,7 @@ def __init__(
3839
content_field: str,
3940
query_language: str,
4041
query_speller: str,
42+
prompt_manager: PromptManager
4143
):
4244
self.search_client = search_client
4345
self.openai_client = openai_client
@@ -52,16 +54,10 @@ def __init__(
5254
self.query_language = query_language
5355
self.query_speller = query_speller
5456
self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=self.ALLOW_NON_GPT_MODELS)
55-
56-
@property
57-
def system_message_chat_conversation(self):
58-
return """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
59-
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
60-
If the question is not in English, answer in the language used in the question.
61-
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
62-
{follow_up_questions_prompt}
63-
{injected_prompt}
64-
"""
57+
self.prompt_manager = prompt_manager
58+
self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
59+
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
60+
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
6561

6662
@overload
6763
async def run_until_final_call(
@@ -101,37 +97,21 @@ async def run_until_final_call(
10197
original_user_query = messages[-1]["content"]
10298
if not isinstance(original_user_query, str):
10399
raise ValueError("The most recent message content must be a string.")
104-
user_query_request = "Generate search query for: " + original_user_query
105-
106-
tools: List[ChatCompletionToolParam] = [
107-
{
108-
"type": "function",
109-
"function": {
110-
"name": "search_sources",
111-
"description": "Retrieve sources from the Azure AI Search index",
112-
"parameters": {
113-
"type": "object",
114-
"properties": {
115-
"search_query": {
116-
"type": "string",
117-
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
118-
}
119-
},
120-
"required": ["search_query"],
121-
},
122-
},
123-
}
124-
]
100+
101+
rendered_query_prompt = self.prompt_manager.render_prompt(
102+
self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
103+
)
104+
tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
125105

126106
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
127107
query_response_token_limit = 100
128108
query_messages = build_messages(
129109
model=self.chatgpt_model,
130-
system_prompt=self.query_prompt_template,
110+
system_prompt=rendered_query_prompt.system_content,
111+
few_shots=rendered_query_prompt.few_shot_messages,
112+
past_messages=rendered_query_prompt.past_messages,
113+
new_user_content=rendered_query_prompt.new_user_content,
131114
tools=tools,
132-
few_shots=self.query_prompt_few_shots,
133-
past_messages=messages[:-1],
134-
new_user_content=user_query_request,
135115
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
136116
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
137117
)
@@ -169,32 +149,31 @@ async def run_until_final_call(
169149
minimum_reranker_score,
170150
)
171151

172-
sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
173-
content = "\n".join(sources_content)
174-
175152
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
176-
177-
# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
178-
system_message = self.get_system_prompt(
179-
overrides.get("prompt_template"),
180-
self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
153+
text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
154+
rendered_answer_prompt = self.prompt_manager.render_prompt(
155+
self.answer_prompt,
156+
self.get_system_prompt_variables(overrides.get("prompt_template"))
157+
| {
158+
"include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
159+
"past_messages": messages[:-1],
160+
"user_query": original_user_query,
161+
"text_sources": text_sources,
162+
},
181163
)
182164

183165
response_token_limit = 1024
184166
messages = build_messages(
185167
model=self.chatgpt_model,
186-
system_prompt=system_message,
187-
past_messages=messages[:-1],
188-
# Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
189-
new_user_content=original_user_query + "\n\nSources:\n" + content,
168+
system_prompt=rendered_answer_prompt.system_content,
169+
past_messages=rendered_answer_prompt.past_messages,
170+
new_user_content=rendered_answer_prompt.new_user_content,
190171
max_tokens=self.chatgpt_token_limit - response_token_limit,
191172
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
192173
)
193174

194-
data_points = {"text": sources_content}
195-
196175
extra_info = {
197-
"data_points": data_points,
176+
"data_points": {"text": text_sources},
198177
"thoughts": [
199178
ThoughtStep(
200179
"Prompt to generate search query",

0 commit comments

Comments
 (0)