Skip to content

Commit f45c34d

Browse files
committed
custom
1 parent ebc7f68 commit f45c34d

19 files changed

+1665
-9
lines changed

app/backend/approaches-old/__init__.py

Whitespace-only changes.

app/backend/approaches-old/approach.py

Lines changed: 436 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import json
2+
import re
3+
from abc import ABC, abstractmethod
4+
from typing import Any, AsyncGenerator, Awaitable, Optional, Union, cast
5+
6+
from openai import AsyncStream
7+
from openai.types.chat import (
8+
ChatCompletion,
9+
ChatCompletionChunk,
10+
ChatCompletionMessageParam,
11+
)
12+
13+
from approaches.approach import (
14+
Approach,
15+
ExtraInfo,
16+
)
17+
18+
19+
class ChatApproach(Approach, ABC):
20+
21+
NO_RESPONSE = "0"
22+
23+
@abstractmethod
24+
async def run_until_final_call(
25+
self, messages, overrides, auth_claims, should_stream
26+
) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
27+
pass
28+
29+
def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
30+
response_message = chat_completion.choices[0].message
31+
32+
if response_message.tool_calls:
33+
for tool in response_message.tool_calls:
34+
if tool.type != "function":
35+
continue
36+
function = tool.function
37+
if function.name == "search_sources":
38+
arg = json.loads(function.arguments)
39+
search_query = arg.get("search_query", self.NO_RESPONSE)
40+
if search_query != self.NO_RESPONSE:
41+
return search_query
42+
elif query_text := response_message.content:
43+
if query_text.strip() != self.NO_RESPONSE:
44+
return query_text
45+
return user_query
46+
47+
def extract_followup_questions(self, content: Optional[str]):
48+
if content is None:
49+
return content, []
50+
return content.split("<<")[0], re.findall(r"<<([^>>]+)>>", content)
51+
52+
async def run_without_streaming(
53+
self,
54+
messages: list[ChatCompletionMessageParam],
55+
overrides: dict[str, Any],
56+
auth_claims: dict[str, Any],
57+
session_state: Any = None,
58+
) -> dict[str, Any]:
59+
extra_info, chat_coroutine = await self.run_until_final_call(
60+
messages, overrides, auth_claims, should_stream=False
61+
)
62+
chat_completion_response: ChatCompletion = await cast(Awaitable[ChatCompletion], chat_coroutine)
63+
content = chat_completion_response.choices[0].message.content
64+
role = chat_completion_response.choices[0].message.role
65+
if overrides.get("suggest_followup_questions"):
66+
content, followup_questions = self.extract_followup_questions(content)
67+
extra_info.followup_questions = followup_questions
68+
# Assume last thought is for generating answer
69+
if self.include_token_usage and extra_info.thoughts and chat_completion_response.usage:
70+
extra_info.thoughts[-1].update_token_usage(chat_completion_response.usage)
71+
chat_app_response = {
72+
"message": {"content": content, "role": role},
73+
"context": extra_info,
74+
"session_state": session_state,
75+
}
76+
return chat_app_response
77+
78+
async def run_with_streaming(
79+
self,
80+
messages: list[ChatCompletionMessageParam],
81+
overrides: dict[str, Any],
82+
auth_claims: dict[str, Any],
83+
session_state: Any = None,
84+
) -> AsyncGenerator[dict, None]:
85+
extra_info, chat_coroutine = await self.run_until_final_call(
86+
messages, overrides, auth_claims, should_stream=True
87+
)
88+
chat_coroutine = cast(Awaitable[AsyncStream[ChatCompletionChunk]], chat_coroutine)
89+
yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
90+
91+
followup_questions_started = False
92+
followup_content = ""
93+
async for event_chunk in await chat_coroutine:
94+
# "2023-07-01-preview" API version has a bug where first response has empty choices
95+
event = event_chunk.model_dump() # Convert pydantic model to dict
96+
if event["choices"]:
97+
# No usage during streaming
98+
completion = {
99+
"delta": {
100+
"content": event["choices"][0]["delta"].get("content"),
101+
"role": event["choices"][0]["delta"]["role"],
102+
}
103+
}
104+
# if event contains << and not >>, it is start of follow-up question, truncate
105+
content = completion["delta"].get("content")
106+
content = content or "" # content may either not exist in delta, or explicitly be None
107+
if overrides.get("suggest_followup_questions") and "<<" in content:
108+
followup_questions_started = True
109+
earlier_content = content[: content.index("<<")]
110+
if earlier_content:
111+
completion["delta"]["content"] = earlier_content
112+
yield completion
113+
followup_content += content[content.index("<<") :]
114+
elif followup_questions_started:
115+
followup_content += content
116+
else:
117+
yield completion
118+
else:
119+
# Final chunk at end of streaming should contain usage
120+
# https://cookbook.openai.com/examples/how_to_stream_completions#4-how-to-get-token-usage-data-for-streamed-chat-completion-response
121+
if event_chunk.usage and extra_info.thoughts and self.include_token_usage:
122+
extra_info.thoughts[-1].update_token_usage(event_chunk.usage)
123+
yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
124+
125+
if followup_content:
126+
_, followup_questions = self.extract_followup_questions(followup_content)
127+
yield {
128+
"delta": {"role": "assistant"},
129+
"context": {"context": extra_info, "followup_questions": followup_questions},
130+
}
131+
132+
async def run(
133+
self,
134+
messages: list[ChatCompletionMessageParam],
135+
session_state: Any = None,
136+
context: dict[str, Any] = {},
137+
) -> dict[str, Any]:
138+
overrides = context.get("overrides", {})
139+
auth_claims = context.get("auth_claims", {})
140+
return await self.run_without_streaming(messages, overrides, auth_claims, session_state)
141+
142+
async def run_stream(
143+
self,
144+
messages: list[ChatCompletionMessageParam],
145+
session_state: Any = None,
146+
context: dict[str, Any] = {},
147+
) -> AsyncGenerator[dict[str, Any], None]:
148+
overrides = context.get("overrides", {})
149+
auth_claims = context.get("auth_claims", {})
150+
return self.run_with_streaming(messages, overrides, auth_claims, session_state)
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
from typing import Any, Awaitable, List, Optional, Union, cast
2+
3+
from azure.search.documents.aio import SearchClient
4+
from azure.search.documents.models import VectorQuery
5+
from openai import AsyncOpenAI, AsyncStream
6+
from openai.types.chat import (
7+
ChatCompletion,
8+
ChatCompletionChunk,
9+
ChatCompletionMessageParam,
10+
ChatCompletionToolParam,
11+
)
12+
13+
from approaches.approach import DataPoints, ExtraInfo, ThoughtStep
14+
from approaches.chatapproach import ChatApproach
15+
from approaches.promptmanager import PromptManager
16+
from core.authentication import AuthenticationHelper
17+
18+
19+
class ChatReadRetrieveReadApproach(ChatApproach):
20+
"""
21+
A multi-step approach that first uses OpenAI to turn the user's question into a search query,
22+
then uses Azure AI Search to retrieve relevant documents, and then sends the conversation history,
23+
original user question, and search results to OpenAI to generate a response.
24+
"""
25+
26+
def __init__(
27+
self,
28+
*,
29+
search_client: SearchClient,
30+
auth_helper: AuthenticationHelper,
31+
openai_client: AsyncOpenAI,
32+
chatgpt_model: str,
33+
chatgpt_deployment: Optional[str], # Not needed for non-Azure OpenAI
34+
embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text"
35+
embedding_model: str,
36+
embedding_dimensions: int,
37+
sourcepage_field: str,
38+
content_field: str,
39+
query_language: str,
40+
query_speller: str,
41+
prompt_manager: PromptManager,
42+
reasoning_effort: Optional[str] = None,
43+
):
44+
self.search_client = search_client
45+
self.openai_client = openai_client
46+
self.auth_helper = auth_helper
47+
self.chatgpt_model = chatgpt_model
48+
self.chatgpt_deployment = chatgpt_deployment
49+
self.embedding_deployment = embedding_deployment
50+
self.embedding_model = embedding_model
51+
self.embedding_dimensions = embedding_dimensions
52+
self.sourcepage_field = sourcepage_field
53+
self.content_field = content_field
54+
self.query_language = query_language
55+
self.query_speller = query_speller
56+
self.prompt_manager = prompt_manager
57+
self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
58+
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
59+
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
60+
self.reasoning_effort = reasoning_effort
61+
self.include_token_usage = True
62+
63+
async def run_until_final_call(
64+
self,
65+
messages: list[ChatCompletionMessageParam],
66+
overrides: dict[str, Any],
67+
auth_claims: dict[str, Any],
68+
should_stream: bool = False,
69+
) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
70+
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
71+
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
72+
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
73+
use_semantic_captions = True if overrides.get("semantic_captions") else False
74+
use_query_rewriting = True if overrides.get("query_rewriting") else False
75+
top = overrides.get("top", 3)
76+
minimum_search_score = overrides.get("minimum_search_score", 0.0)
77+
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
78+
filter = self.build_filter(overrides, auth_claims)
79+
80+
original_user_query = messages[-1]["content"]
81+
if not isinstance(original_user_query, str):
82+
raise ValueError("The most recent message content must be a string.")
83+
84+
reasoning_model_support = self.GPT_REASONING_MODELS.get(self.chatgpt_model)
85+
if reasoning_model_support and (not reasoning_model_support.streaming and should_stream):
86+
raise Exception(
87+
f"{self.chatgpt_model} does not support streaming. Please use a different model or disable streaming."
88+
)
89+
90+
query_messages = self.prompt_manager.render_prompt(
91+
self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
92+
)
93+
tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
94+
95+
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
96+
97+
chat_completion = cast(
98+
ChatCompletion,
99+
await self.create_chat_completion(
100+
self.chatgpt_deployment,
101+
self.chatgpt_model,
102+
messages=query_messages,
103+
overrides=overrides,
104+
response_token_limit=self.get_response_token_limit(
105+
self.chatgpt_model, 100
106+
), # Setting too low risks malformed JSON, setting too high may affect performance
107+
temperature=0.0, # Minimize creativity for search query generation
108+
tools=tools,
109+
reasoning_effort="low", # Minimize reasoning for search query generation
110+
),
111+
)
112+
113+
query_text = self.get_search_query(chat_completion, original_user_query)
114+
115+
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
116+
117+
# If retrieval mode includes vectors, compute an embedding for the query
118+
vectors: list[VectorQuery] = []
119+
if use_vector_search:
120+
vectors.append(await self.compute_text_embedding(query_text))
121+
122+
results = await self.search(
123+
top,
124+
query_text,
125+
filter,
126+
vectors,
127+
use_text_search,
128+
use_vector_search,
129+
use_semantic_ranker,
130+
use_semantic_captions,
131+
minimum_search_score,
132+
minimum_reranker_score,
133+
use_query_rewriting,
134+
)
135+
136+
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
137+
text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
138+
messages = self.prompt_manager.render_prompt(
139+
self.answer_prompt,
140+
self.get_system_prompt_variables(overrides.get("prompt_template"))
141+
| {
142+
"include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
143+
"past_messages": messages[:-1],
144+
"user_query": original_user_query,
145+
"text_sources": text_sources,
146+
},
147+
)
148+
149+
extra_info = ExtraInfo(
150+
DataPoints(text=text_sources),
151+
thoughts=[
152+
self.format_thought_step_for_chatcompletion(
153+
title="Prompt to generate search query",
154+
messages=query_messages,
155+
overrides=overrides,
156+
model=self.chatgpt_model,
157+
deployment=self.chatgpt_deployment,
158+
usage=chat_completion.usage,
159+
reasoning_effort="low",
160+
),
161+
ThoughtStep(
162+
"Search using generated search query",
163+
query_text,
164+
{
165+
"use_semantic_captions": use_semantic_captions,
166+
"use_semantic_ranker": use_semantic_ranker,
167+
"use_query_rewriting": use_query_rewriting,
168+
"top": top,
169+
"filter": filter,
170+
"use_vector_search": use_vector_search,
171+
"use_text_search": use_text_search,
172+
},
173+
),
174+
ThoughtStep(
175+
"Search results",
176+
[result.serialize_for_results() for result in results],
177+
),
178+
self.format_thought_step_for_chatcompletion(
179+
title="Prompt to generate answer",
180+
messages=messages,
181+
overrides=overrides,
182+
model=self.chatgpt_model,
183+
deployment=self.chatgpt_deployment,
184+
usage=None,
185+
),
186+
],
187+
)
188+
189+
chat_coroutine = cast(
190+
Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]],
191+
self.create_chat_completion(
192+
self.chatgpt_deployment,
193+
self.chatgpt_model,
194+
messages,
195+
overrides,
196+
self.get_response_token_limit(self.chatgpt_model, 1024),
197+
should_stream,
198+
),
199+
)
200+
return (extra_info, chat_coroutine)

0 commit comments

Comments
 (0)