Azure-Samples
diff --git a/‎app/backend/approaches/approach.py‎
Lines changed: 11 additions & 0 deletions b/‎app/backend/approaches/approach.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎app/backend/approaches/chatapproach.py‎
Lines changed: 21 additions & 31 deletions b/‎app/backend/approaches/chatapproach.py‎
Lines changed: 21 additions & 31 deletions
diff --git a/‎app/backend/approaches/chatreadretrieveread.py‎
Lines changed: 19 additions & 39 deletions b/‎app/backend/approaches/chatreadretrieveread.py‎
Lines changed: 19 additions & 39 deletions
diff --git a/‎app/backend/approaches/chatreadretrievereadvision.py‎
Lines changed: 25 additions & 21 deletions b/‎app/backend/approaches/chatreadretrievereadvision.py‎
Lines changed: 25 additions & 21 deletions
diff --git a/‎app/backend/approaches/prompts/ask/answer_question.prompty‎
Lines changed: 29 additions & 0 deletions b/‎app/backend/approaches/prompts/ask/answer_question.prompty‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎app/backend/approaches/prompts/ask/answer_question_vision.prompty‎
Lines changed: 31 additions & 0 deletions b/‎app/backend/approaches/prompts/ask/answer_question_vision.prompty‎
Lines changed: 31 additions & 0 deletions
@@ -1,4 +1,6 @@
+import json
 import os
+import pathlib
 from abc import ABC
 from dataclasses import dataclass
 from typing import (
@@ -14,6 +16,7 @@
 from urllib.parse import urljoin
 
 import aiohttp
+import prompty
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import (
     QueryCaptionResult,
@@ -96,6 +99,8 @@ class Approach(ABC):
     # Useful for using local small language models, for example
     ALLOW_NON_GPT_MODELS = True
 
+    PROMPTS_DIRECTORY = pathlib.Path(__file__).parent / "prompts"
+
     def __init__(
         self,
         search_client: SearchClient,
@@ -122,6 +127,12 @@ def __init__(
         self.vision_endpoint = vision_endpoint
         self.vision_token_provider = vision_token_provider
 
+    def load_prompty(self, path: str):
+        return prompty.load(self.PROMPTS_DIRECTORY / path)
+
+    def load_tools(self, path: str):
+        return json.loads(open(self.PROMPTS_DIRECTORY / path).read())
+
     def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
         include_category = overrides.get("include_category")
         exclude_category = overrides.get("exclude_category")
 
@@ -2,50 +2,40 @@
 import re
 from abc import ABC, abstractmethod
 from typing import Any, AsyncGenerator, Optional
-from jinja2 import Environment, FileSystemLoader
 
+import prompty
 from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
 
 from approaches.approach import Approach
 
-class ChatApproach(Approach, ABC):
-    
-    NO_RESPONSE = "0"
 
-    def __init__(self):
-        self._initialize_templates()
-    
-    def _initialize_templates(self):
-        self.env = Environment(loader=FileSystemLoader('approaches/prompts/chat'))
-        json_content = self.env.loader.get_source(self.env, 'query_few_shots.json')[0]
-        self.query_prompt_few_shots: list[ChatCompletionMessageParam] = json.loads(json_content)
-        self.query_prompt_template = self.env.get_template('query_template.jinja').render()
-        self.follow_up_questions_prompt = self.env.get_template('follow_up_questions.jinja').render()
-        self.system_message_chat_conversation_template = self.env.get_template('system_message.jinja')
-        self.system_message_chat_conversation_vision_template = self.env.get_template('system_message_vision.jinja')
+class ChatApproach(Approach, ABC):
 
-    @property
-    @abstractmethod
-    def system_message_chat_conversation(self) -> str:
-        pass
+    NO_RESPONSE = "0"
 
     @abstractmethod
     async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
         pass
 
-    def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str) -> str:
-        if override_prompt is None:
-            return self.system_message_chat_conversation_template.render(
-                follow_up_questions_prompt=follow_up_questions_prompt,
-                injected_prompt=""
-            )
-        elif override_prompt.startswith(">>>"):
-            return self.system_message_chat_conversation_template.render(
-                follow_up_questions_prompt=follow_up_questions_prompt,
-                injected_prompt=override_prompt[3:] + "\n"
+    def get_messages(
+        self, override_prompt: Optional[str], include_follow_up_questions: bool, user_query: str, content: str
+    ) -> list[ChatCompletionMessageParam]:
+        if override_prompt is None or override_prompt.startswith(">>>"):
+            injected_prompt = "" if override_prompt is None else override_prompt[3:]
+            return prompty.prepare(
+                self.answer_prompt,
+                {
+                    "include_follow_up_questions": include_follow_up_questions,
+                    "injected_prompt": injected_prompt,
+                    "user_query": user_query,
+                    "content": content,
+                },
             )
         else:
-            return override_prompt.format(follow_up_questions_prompt=follow_up_questions_prompt)
+            # TODO: Warn if follow-up is specified, follow-up won't be injected
+            return prompty.prepare(
+                self.answer_prompt, {"override_prompt": override_prompt, "user_query": user_query, "content": content}
+            )
 
     def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
         response_message = chat_completion.choices[0].message
@@ -153,4 +143,4 @@ async def run_stream(
     ) -> AsyncGenerator[dict[str, Any], None]:
         overrides = context.get("overrides", {})
         auth_claims = context.get("auth_claims", {})
-        return self.run_with_streaming(messages, overrides, auth_claims, session_state)
+        return self.run_with_streaming(messages, overrides, auth_claims, session_state)  #
@@ -1,5 +1,6 @@
 from typing import Any, Coroutine, List, Literal, Optional, Union, overload
 
+import prompty
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
 from openai import AsyncOpenAI, AsyncStream
@@ -39,7 +40,6 @@ def __init__(
         query_language: str,
         query_speller: str,
     ):
-        super().__init__()
         self.search_client = search_client
         self.openai_client = openai_client
         self.auth_helper = auth_helper
@@ -53,13 +53,9 @@ def __init__(
         self.query_language = query_language
         self.query_speller = query_speller
         self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=self.ALLOW_NON_GPT_MODELS)
-
-    @property
-    def system_message_chat_conversation(self):
-        return self.system_message_chat_conversation_template.render(
-            follow_up_questions_prompt="",
-            injected_prompt=""
-        )
+        self.query_rewrite_prompt = self.load_prompty("chat/query_rewrite.prompty")
+        self.query_rewrite_tools = self.load_tools("chat/query_rewrite_tools.json")
+        self.answer_prompt = self.load_prompty("chat/answer_question.prompty")
 
     @overload
     async def run_until_final_call(
@@ -99,37 +95,20 @@ async def run_until_final_call(
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
-        user_query_request = "Generate search query for: " + original_user_query
-
-        tools: List[ChatCompletionToolParam] = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "search_sources",
-                    "description": "Retrieve sources from the Azure AI Search index",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "search_query": {
-                                "type": "string",
-                                "description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
-                            }
-                        },
-                        "required": ["search_query"],
-                    },
-                },
-            }
-        ]
+
+        # Use prompty to prepare the query prompt
+        query_messages = prompty.prepare(self.query_rewrite_prompt, inputs={"user_query": original_user_query})
+        tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
 
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
         query_response_token_limit = 100
         query_messages = build_messages(
             model=self.chatgpt_model,
-            system_prompt=self.query_prompt_template,
-            tools=tools,
-            few_shots=self.query_prompt_few_shots,
+            system_prompt=query_messages[0]["content"],
+            few_shots=query_messages[1:-1],
             past_messages=messages[:-1],
-            new_user_content=user_query_request,
+            new_user_content=query_messages[-1]["content"],
+            tools=tools,
             max_tokens=self.chatgpt_token_limit - query_response_token_limit,
             fallback_to_default=self.ALLOW_NON_GPT_MODELS,
         )
@@ -172,19 +151,20 @@ async def run_until_final_call(
 
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
 
-        # Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
-        system_message = self.get_system_prompt(
+        # Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
+        formatted_messages = self.get_messages(
             overrides.get("prompt_template"),
-            self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""
+            include_follow_up_questions=bool(overrides.get("suggest_followup_questions")),
+            user_query=original_user_query,
+            content=content,
         )
 
         response_token_limit = 1024
         messages = build_messages(
             model=self.chatgpt_model,
-            system_prompt=system_message,
+            system_prompt=formatted_messages[0]["content"],
             past_messages=messages[:-1],
-            # Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
-            new_user_content=original_user_query + "\n\nSources:\n" + content,
+            new_user_content=formatted_messages[-1]["content"],
             max_tokens=self.chatgpt_token_limit - response_token_limit,
             fallback_to_default=self.ALLOW_NON_GPT_MODELS,
         )
 
@@ -1,5 +1,6 @@
-from typing import Any, Awaitable, Callable, Coroutine, Optional, Union
+from typing import Any, Awaitable, Callable, Coroutine, List, Optional, Union
 
+import prompty
 from azure.search.documents.aio import SearchClient
 from azure.storage.blob.aio import ContainerClient
 from openai import AsyncOpenAI, AsyncStream
@@ -9,6 +10,7 @@
     ChatCompletionContentPartImageParam,
     ChatCompletionContentPartParam,
     ChatCompletionMessageParam,
+    ChatCompletionToolParam,
 )
 from openai_messages_token_helper import build_messages, get_token_limit
 
@@ -46,7 +48,6 @@ def __init__(
         vision_endpoint: str,
         vision_token_provider: Callable[[], Awaitable[str]]
     ):
-        super().__init__()
         self.search_client = search_client
         self.blob_container_client = blob_container_client
         self.openai_client = openai_client
@@ -65,13 +66,9 @@ def __init__(
         self.vision_endpoint = vision_endpoint
         self.vision_token_provider = vision_token_provider
         self.chatgpt_token_limit = get_token_limit(gpt4v_model, default_to_minimum=self.ALLOW_NON_GPT_MODELS)
-
-    @property
-    def system_message_chat_conversation(self):
-        return self.system_message_chat_conversation_vision_template.render(
-            follow_up_questions_prompt="",
-            injected_prompt=""
-        )
+        self.query_rewrite_prompt = self.load_prompty("chat/query_rewrite.prompty")
+        self.query_rewrite_tools = self.load_tools("chat/query_rewrite_tools.json")
+        self.answer_prompt = self.load_prompty("chat/answer_question_vision.prompty")
 
     async def run_until_final_call(
         self,
@@ -97,29 +94,32 @@ async def run_until_final_call(
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
-        past_messages: list[ChatCompletionMessageParam] = messages[:-1]
 
-        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
-        user_query_request = "Generate search query for: " + original_user_query
+        # Use prompty to prepare the query prompt
+        query_messages = prompty.prepare(self.query_rewrite_prompt, inputs={"user_query": original_user_query})
+        tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
 
+        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
         query_response_token_limit = 100
         query_model = self.chatgpt_model
         query_deployment = self.chatgpt_deployment
         query_messages = build_messages(
             model=query_model,
-            system_prompt=self.query_prompt_template,
-            few_shots=self.query_prompt_few_shots,
-            past_messages=past_messages,
-            new_user_content=user_query_request,
+            system_prompt=query_messages[0]["content"],
+            few_shots=query_messages[1:-1],
+            past_messages=messages[:-1],
+            new_user_content=query_messages[-1]["content"],
             max_tokens=self.chatgpt_token_limit - query_response_token_limit,
         )
 
         chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
-            model=query_deployment if query_deployment else query_model,
             messages=query_messages,
+            # Azure OpenAI takes the deployment name as the model name
+            model=query_deployment if query_deployment else query_model,
             temperature=0.0,  # Minimize creativity for search query generation
             max_tokens=query_response_token_limit,
             n=1,
+            tools=tools,
             seed=seed,
         )
 
@@ -156,12 +156,16 @@ async def run_until_final_call(
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
 
         # Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
-        system_message = self.get_system_prompt(
+        formatted_messages = self.get_messages(
             overrides.get("prompt_template"),
-            self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
+            include_follow_up_questions=bool(overrides.get("suggest_followup_questions")),
+            user_query=original_user_query,
+            content=content,
         )
 
-        user_content: list[ChatCompletionContentPartParam] = [{"text": original_user_query, "type": "text"}]
+        user_content: list[ChatCompletionContentPartParam] = [
+            {"text": formatted_messages[-1]["content"], "type": "text"}
+        ]
         image_list: list[ChatCompletionContentPartImageParam] = []
 
         if send_text_to_gptvision:
@@ -176,7 +180,7 @@ async def run_until_final_call(
         response_token_limit = 1024
         messages = build_messages(
             model=self.gpt4v_model,
-            system_prompt=system_message,
+            system_prompt=formatted_messages[0]["content"],
             past_messages=messages[:-1],
             new_user_content=user_content,
             max_tokens=self.chatgpt_token_limit - response_token_limit,
 
@@ -0,0 +1,29 @@
+---
+model:
+    api: chat
+---
+You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions.
+Use 'you' to refer to the individual asking the questions even if they ask with 'I'.
+Answer the following question using only the data provided in the sources below.
+Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response.
+If you cannot answer using the sources below, say you don't know. Use below example to answer.
+
+example:
+
+user:
+What is the deductible for the employee plan for a visit to Overlake in Bellevue
+
+Sources:
+info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.",
+info2.pdf: Overlake is in-network for the employee plan.",
+info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.",
+info4.pdf: In-network institutions include Overlake, Swedish and others in the region."
+
+assistant:
+In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].
+
+user:
+{{ user_query }}
+
+Sources:
+{{ content }}
@@ -0,0 +1,31 @@
+---
+model:
+    api: chat
+---
+You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.
+Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:<file_name>.
+Each text source starts in a new line and has the file name followed by colon and the actual information.
+Always include the source name from the image or text for each fact you use in the response in the format: [filename].
+Answer the following question using only the data provided in the sources below.
+The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned.
+If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts.
+
+example:
+
+user:
+What is the deductible for the employee plan for a visit to Overlake in Bellevue
+
+Sources:
+info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.",
+info2.pdf: Overlake is in-network for the employee plan.",
+info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.",
+info4.pdf: In-network institutions include Overlake, Swedish and others in the region."
+
+assistant:
+In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].
+
+user:
+{{ user_query }}
+
+Sources:
+{{ content }}