Skip to content

Commit cdee1d5

Browse files
committed
Make changes from pamelas feedback
1 parent fff9c59 commit cdee1d5

13 files changed

+98
-18
lines changed

app/backend/approaches/approach.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ def __init__(
167167
vision_token_provider: Callable[[], Awaitable[str]],
168168
prompt_manager: PromptManager,
169169
reasoning_effort: Optional[str] = None,
170-
include_token_usage: Optional[bool] = None,
171170
):
172171
self.search_client = search_client
173172
self.openai_client = openai_client
@@ -182,7 +181,7 @@ def __init__(
182181
self.vision_token_provider = vision_token_provider
183182
self.prompt_manager = prompt_manager
184183
self.reasoning_effort = reasoning_effort
185-
self.include_token_usage = include_token_usage
184+
self.include_token_usage = True
186185

187186
def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
188187
include_category = overrides.get("include_category")
@@ -345,26 +344,25 @@ def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[st
345344
else:
346345
return {"override_prompt": override_prompt}
347346

348-
def get_response_token_limit(self, model: str) -> int:
347+
def get_response_token_limit(self, model: str, default_limit: int) -> int:
349348
if model in self.GPT_REASONING_MODELS:
350349
return self.RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT
351350

352-
return self.RESPONSE_DEFAULT_TOKEN_LIMIT
351+
return default_limit
353352

354353
def create_chat_completion(
355354
self,
356355
chatgpt_deployment: Optional[str],
357356
chatgpt_model: str,
358357
messages: list[ChatCompletionMessageParam],
359358
overrides: dict[str, Any],
359+
response_token_limit: int,
360360
should_stream: bool = False,
361-
response_token_limit: Optional[int] = None,
362361
tools: Optional[List[ChatCompletionToolParam]] = None,
363362
temperature: Optional[float] = None,
364363
n: Optional[int] = None,
365364
reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
366365
) -> Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]:
367-
response_token_limit = response_token_limit or self.get_response_token_limit(chatgpt_model)
368366
if chatgpt_model in self.GPT_REASONING_MODELS:
369367
params: Dict[str, Any] = {
370368
# max_tokens is not supported
@@ -399,7 +397,7 @@ def create_chat_completion(
399397
**params,
400398
)
401399

402-
def create_generate_thought_step(
400+
def format_thought_step_for_chatcompletion(
403401
self,
404402
title: str,
405403
messages: List[ChatCompletionMessageParam],

app/backend/approaches/chatreadretrieveread.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,17 @@ async def run_until_final_call(
9393
tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
9494

9595
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
96-
query_response_token_limit = (
97-
100 if self.chatgpt_model not in self.GPT_REASONING_MODELS else self.RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT
98-
)
96+
9997
chat_completion = cast(
10098
ChatCompletion,
10199
await self.create_chat_completion(
102100
self.chatgpt_deployment,
103101
self.chatgpt_model,
104102
messages=query_messages,
105103
overrides=overrides,
106-
response_token_limit=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
104+
response_token_limit=self.get_response_token_limit(
105+
self.chatgpt_model, 100
106+
), # Setting too low risks malformed JSON, setting too high may affect performance
107107
temperature=0.0, # Minimize creativity for search query generation
108108
tools=tools,
109109
reasoning_effort="low", # Minimize reasoning for search query generation
@@ -149,7 +149,7 @@ async def run_until_final_call(
149149
extra_info = ExtraInfo(
150150
DataPoints(text=text_sources),
151151
thoughts=[
152-
self.create_generate_thought_step(
152+
self.format_thought_step_for_chatcompletion(
153153
title="Prompt to generate search query",
154154
messages=query_messages,
155155
overrides=overrides,
@@ -175,7 +175,7 @@ async def run_until_final_call(
175175
"Search results",
176176
[result.serialize_for_results() for result in results],
177177
),
178-
self.create_generate_thought_step(
178+
self.format_thought_step_for_chatcompletion(
179179
title="Prompt to generate answer",
180180
messages=messages,
181181
overrides=overrides,
@@ -193,8 +193,8 @@ async def run_until_final_call(
193193
self.chatgpt_model,
194194
messages,
195195
overrides,
196+
self.get_response_token_limit(self.chatgpt_model, 1024),
196197
should_stream,
197-
response_token_limit=self.get_response_token_limit(self.chatgpt_model),
198198
),
199199
)
200200
return (extra_info, chat_coroutine)

app/backend/approaches/chatreadretrievereadvision.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def __init__(
6767
self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
6868
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
6969
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question_vision.prompty")
70+
# Currently disabled due to issues with rendering token usage in the UI
7071
self.include_token_usage = False
7172

7273
async def run_until_final_call(

app/backend/approaches/retrievethenread.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ async def run(
108108
self.chatgpt_model,
109109
messages=messages,
110110
overrides=overrides,
111+
response_token_limit=self.get_response_token_limit(self.chatgpt_model, 1024),
111112
),
112113
)
113114

@@ -131,7 +132,7 @@ async def run(
131132
"Search results",
132133
[result.serialize_for_results() for result in results],
133134
),
134-
self.create_generate_thought_step(
135+
self.format_thought_step_for_chatcompletion(
135136
title="Prompt to generate answer",
136137
messages=messages,
137138
overrides=overrides,

app/backend/approaches/retrievethenreadvision.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
self.vision_token_provider = vision_token_provider
5858
self.prompt_manager = prompt_manager
5959
self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question_vision.prompty")
60+
# Currently disabled due to issues with rendering token usage in the UI
6061
self.include_token_usage = False
6162

6263
async def run(

app/frontend/src/locales/da/translation.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@
8787
"useSemanticRanker": "Brug semantisk ranking til søgning",
8888
"useSemanticCaptions": "Brug semantiske billedtekster",
8989
"useQueryRewriting": "Brug forespørgselsomskrivning til informationsgenfinding",
90+
"reasoningEffort": "Ræsonnementsindsats",
91+
"reasoningEffortOptions": {
92+
"low": "Lav",
93+
"medium": "Medium",
94+
"high": "Høj"
95+
},
9096
"useSuggestFollowupQuestions": "Foreslå opfølgende spørgsmål",
9197
"useGPT4V": "Brug GPT vision model",
9298
"gpt4VInput": {
@@ -128,6 +134,8 @@
128134
"excludeCategory": "Angiver en kategori, der skal ekskluderes fra søgeresultaterne. Der er ingen kategorier i det standard datasæt.",
129135
"useSemanticReranker": "Aktiverer Azure AI Search semantisk omrangør, en model der rangerer søgeresultater baseret på semantisk lighed til brugerens forespørgsel.",
130136
"useSemanticCaptions": "Sender semantiske billedtekster til LLM'en i stedet for hele søgeresultatet. En semantisk billedtekst udtrækkes fra et søgeresultat under processen med semantisk rangering.",
137+
"useQueryRewriting": "Aktiverer Azure AI Search forespørgselsomskrivning, en proces der ændrer brugerens forespørgsel for at forbedre søgeresultaterne. Kræver at semantisk ranking er aktiveret.",
138+
"reasoningEffort": "Indstiller ræsonnementsindsatsen for sprogmodellen. Højere værdier resulterer i mere ræsonnement, men kan tage længere tid om at generere et svar. Standardværdien er medium.",
131139
"suggestFollowupQuestions": "Beder LLM'en om at foreslå opfølgende spørgsmål baseret på brugerens forespørgsel.",
132140
"useGPT4Vision": "Bruger GPT-4-Turbo med Vision til at generere svar baseret på billeder og tekst fra indekset.",
133141
"vectorFields": "Angiver hvilke indlejringsfelter i Azure AI Search Index, der vil blive søgt, enten både 'Billeder og tekst' indlejringer, 'Billeder' kun eller 'Tekst' kun.",

app/frontend/src/locales/es/translation.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@
9191
"useSemanticRanker": "Usar clasificador semántico para la recuperación",
9292
"useSemanticCaptions": "Usar subtítulos semánticos",
9393
"useQueryRewriting": "Utiliza la reescritura de consultas para la recuperación",
94+
"reasoningEffort": "Esfuerzo de razonamiento",
95+
"reasoningEffortOptions": {
96+
"low": "Bajo",
97+
"medium": "Medio",
98+
"high": "Alto"
99+
},
94100
"useSuggestFollowupQuestions": "Sugerir preguntas de seguimiento",
95101
"useGPT4V": "Usar modelo de visión GPT",
96102
"gpt4VInput": {
@@ -143,6 +149,10 @@
143149
"Habilita el re-clasificador semántico de Azure AI Search, un modelo que re-clasifica los resultados de búsqueda basándose en la similitud semántica con la consulta del usuario.",
144150
"useSemanticCaptions":
145151
"Envía subtítulos semánticos al LLM en lugar del resultado de búsqueda completo. Un subtítulo semántico se extrae de un resultado de búsqueda durante el proceso de clasificación semántica.",
152+
"useQueryRewriting":
153+
"Habilita la reescritura de consultas de Azure AI Search, un proceso que modifica la consulta del usuario para mejorar los resultados de búsqueda. Requiere que el clasificador semántico esté habilitado.",
154+
"reasoningEffort":
155+
"Establece el esfuerzo de razonamiento para el LLM. Los valores más altos resultan en más razonamiento, pero pueden tardar más en generar una respuesta. El valor predeterminado es medio.",
146156
"suggestFollowupQuestions": "Pide al LLM que sugiera preguntas de seguimiento basándose en la consulta del usuario.",
147157
"useGPT4Vision": "Utiliza GPT-4-Turbo con Visión para generar respuestas basándose en imágenes y texto del índice.",
148158
"vectorFields":

app/frontend/src/locales/fr/translation.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@
9292
"useSemanticCaptions": "Utiliser les titres sémantiques",
9393
"useSuggestFollowupQuestions": "Suggérer des questions de suivi",
9494
"useQueryRewriting": "Utilisez la réécriture des requêtes pour la récupération",
95+
"reasoningEffort": "Effort de raisonnement",
96+
"reasoningEffortOptions": {
97+
"low": "Faible",
98+
"medium": "Moyen",
99+
"high": "Élevé"
100+
},
95101
"useGPT4V": "Utiliser le modèle GPT Vision",
96102
"gpt4VInput": {
97103
"label": "Entrées du modèle GPT Vision",
@@ -144,6 +150,10 @@
144150
"useSemanticCaptions":
145151
"Envoie des légendes sémantiques à l'LLM au lieu du résultat de recherche complet. Une légende sémantique est extraite d'un résultat de recherche lors du processus de classement sémantique.",
146152
"suggestFollowupQuestions": "Demande à l'LLM de suggérer des questions de suivi en fonction de la requête de l'utilisateur.",
153+
"useQueryRewriting":
154+
"Active la réécriture de requêtes d'Azure AI Search, un processus qui modifie la requête de l'utilisateur pour améliorer les résultats de recherche. Nécessite que le reclasseur sémantique soit activé.",
155+
"reasoningEffort":
156+
"Définit l'effort de raisonnement pour le LLM. Des valeurs plus élevées entraînent plus de raisonnement, mais peuvent prendre plus de temps pour générer une réponse. La valeur par défaut est moyenne.",
147157
"useGPT4Vision": "Utilise GPT-4-Turbo avec Vision pour générer des réponses basées sur des images et du texte de l'index.",
148158
"vectorFields":
149159
"Spécifie quels champs d'incorporation dans l'index de recherche Azure AI seront recherchés, à la fois les incorporations 'Images et texte', 'Images' seulement, ou 'Texte' seulement.",

app/frontend/src/locales/it/translation.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@
9191
"useSemanticRanker": "Usa il reranker semantico",
9292
"useSemanticCaptions": "Usa didascalie semantiche",
9393
"useQueryRewriting": "Usa la riscrittura delle query per il recupero",
94+
"reasoningEffort": "Sforzo di ragionamento",
95+
"reasoningEffortOptions": {
96+
"low": "Basso",
97+
"medium": "Medio",
98+
"high": "Alto"
99+
},
94100
"useSuggestFollowupQuestions": "Suggerisci domande di follow-up",
95101
"useGPT4V": "Usa il modello GPT Vision",
96102
"gpt4VInput": {
@@ -143,6 +149,10 @@
143149
"Abilita il ranking semantico di Azure AI Search, un modello che riordina i risultati di ricerca in base alla somiglianza semantica con la query dell'utente.",
144150
"useSemanticCaptions":
145151
"Invia didascalie semantiche all'LLM invece del risultato di ricerca completo. Una didascalia semantica è estratta da un risultato di ricerca durante il processo di ranking semantico.",
152+
"useQueryRewriting":
153+
"Abilita la riscrittura delle query di Azure AI Search, un processo che modifica la query dell'utente per migliorare i risultati di ricerca. Richiede che il reranker semantico sia abilitato.",
154+
"reasoningEffort":
155+
"Imposta lo sforzo di ragionamento per l'LLM. Valori più alti comportano un maggiore ragionamento, ma potrebbero richiedere più tempo per generare una risposta. Il valore predefinito è medio.",
146156
"suggestFollowupQuestions": "Chiede all'LLM di suggerire domande di follow-up in base alla query dell'utente.",
147157
"useGPT4Vision": "Utilizza GPT-4-Turbo con Vision per generare risposte basate su immagini e testo dell'indice.",
148158
"vectorFields":

app/frontend/src/locales/ja/translation.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@
9191
"useSemanticRanker": "取得にセマンティック・ランカーを使用",
9292
"useSemanticCaptions": "セマンティック・キャプションを使用",
9393
"useQueryRewriting": "検索のためにクエリの書き換えを使用する",
94+
"reasoningEffort": "推論の労力",
95+
"reasoningEffortOptions": {
96+
"low": "",
97+
"medium": "",
98+
"high": ""
99+
},
94100
"useSuggestFollowupQuestions": "フォローアップの質問を提案",
95101
"useGPT4V": "GPT Visionモデルを使用",
96102
"gpt4VInput": {
@@ -140,6 +146,10 @@
140146
"Azure AI Searchのセマンティック・ランカーを有効にします(ユーザーのクエリに対するセマンティック類似性に基づいて検索結果をリランク付けするモデル)。",
141147
"useSemanticCaptions":
142148
"完全な検索結果ではなく、LLMにセマンティック・キャプションを送信します。セマンティック・キャプションは、セマンティック・ランキングの処理中に検索結果から抽出されます。",
149+
"useQueryRewriting":
150+
"Azure AI Searchのクエリの書き換えを有効にします。これは、ユーザーのクエリを変更して検索結果を改善するプロセスです。セマンティック・ランカーが有効になっている必要があります。",
151+
"reasoningEffort":
152+
"LLMの推論労力を設定します。値が高いほど推論が増加しますが、応答の生成に時間がかかる場合があります。デフォルトは中です。",
143153
"suggestFollowupQuestions": "ユーザーのクエリに基づいて、LLMにフォローアップの質問を提案するように問い合わせます。",
144154
"useGPT4Vision": "インデックスから画像とテキストを利用して回答を生成するためGPT-4-Turbo with Visionを使用します。",
145155
"vectorFields":

0 commit comments

Comments
 (0)