Skip to content

Commit a8fda2c

Browse files
authored
Merge branch 'main' into addrbacrole
2 parents 5ff30e1 + 305ab5b commit a8fda2c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+330
-821
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "Azure Search OpenAI Demo",
3-
"image": "mcr.microsoft.com/devcontainers/python:3.11",
3+
"image": "mcr.microsoft.com/devcontainers/python:3.11-bookworm",
44
"features": {
55
"ghcr.io/devcontainers/features/node:1": {
66
// This should match the version of Node.js in Github Actions workflows

.github/copilot-instructions.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ When adding new azd environment variables, update:
3939

4040
1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable
4141
1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `appEnvVariables` object
42-
1. azure.yaml: Add the new environment variable under pipeline config section
4342
1. .azdo/pipelines/azure-dev.yml: Add the new environment variable under `env` section
4443
1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section
4544

.github/workflows/azure-dev.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ jobs:
123123
uses: Azure/[email protected]
124124

125125
- name: Install Nodejs
126-
uses: actions/setup-node@v4
126+
uses: actions/setup-node@v5
127127
with:
128-
node-version: 18
128+
node-version: 20
129129

130130
- name: Log in with Azure (Federated Credentials)
131131
run: |

.github/workflows/evaluate.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ jobs:
109109
steps:
110110

111111
- name: Comment on pull request
112-
uses: actions/github-script@v7
112+
uses: actions/github-script@v8
113113
with:
114114
script: |
115115
github.rest.issues.createComment({
@@ -133,9 +133,9 @@ jobs:
133133
python-version: "3.11"
134134

135135
- name: Setup node
136-
uses: actions/setup-node@v4
136+
uses: actions/setup-node@v5
137137
with:
138-
node-version: 18
138+
node-version: 20
139139

140140
- name: Install azd
141141
uses: Azure/[email protected]
@@ -221,7 +221,7 @@ jobs:
221221
cat run-diff.md >> $GITHUB_STEP_SUMMARY
222222
223223
- name: Comment on pull request
224-
uses: actions/github-script@v7
224+
uses: actions/github-script@v8
225225
with:
226226
script: |
227227
const fs = require('fs');

.github/workflows/python-test.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,16 @@ jobs:
4141
python-version: ${{ matrix.python_version }}
4242
activate-environment: true
4343
- name: Setup node
44-
uses: actions/setup-node@v4
44+
uses: actions/setup-node@v5
4545
with:
4646
node-version: ${{ matrix.node_version }}
4747
- name: Build frontend
4848
run: |
4949
cd ./app/frontend
5050
npm install
5151
npm run build
52+
- name: Check i18n translations
53+
run: npx -y @lingual/i18n-check --locales app/frontend/src/locales -s en -f i18next -r summary
5254
- name: Install dependencies
5355
run: |
5456
uv pip install -r requirements-dev.txt

.github/workflows/stale-bot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
stale:
88
runs-on: ubuntu-latest
99
steps:
10-
- uses: actions/stale@v9
10+
- uses: actions/stale@v10
1111
with:
1212
stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this issue will be closed.'
1313
stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed.'

CONTRIBUTING.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ When adding new azd environment variables, please remember to update:
141141

142142
1. [main.parameters.json](./infra/main.parameters.json)
143143
1. [appEnvVariables in main.bicep](./infra/main.bicep)
144-
1. App Service's [azure.yaml](./azure.yaml)
145144
1. [ADO pipeline](.azdo/pipelines/azure-dev.yml).
146145
1. [Github workflows](.github/workflows/azure-dev.yml)
147146

app/backend/app.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,6 @@ async def setup_clients():
471471
USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
472472
USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
473473
USE_AGENTIC_RETRIEVAL = os.getenv("USE_AGENTIC_RETRIEVAL", "").lower() == "true"
474-
ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA = os.getenv("ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA", "").lower() == "true"
475474

476475
# WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
477476
RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
@@ -690,7 +689,6 @@ async def setup_clients():
690689
query_speller=AZURE_SEARCH_QUERY_SPELLER,
691690
prompt_manager=prompt_manager,
692691
reasoning_effort=OPENAI_REASONING_EFFORT,
693-
hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
694692
multimodal_enabled=USE_MULTIMODAL,
695693
image_embeddings_client=image_embeddings_client,
696694
global_blob_manager=global_blob_manager,
@@ -718,7 +716,6 @@ async def setup_clients():
718716
query_speller=AZURE_SEARCH_QUERY_SPELLER,
719717
prompt_manager=prompt_manager,
720718
reasoning_effort=OPENAI_REASONING_EFFORT,
721-
hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
722719
multimodal_enabled=USE_MULTIMODAL,
723720
image_embeddings_client=image_embeddings_client,
724721
global_blob_manager=global_blob_manager,

app/backend/approaches/approach.py

Lines changed: 34 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66

77
from azure.search.documents.agent.aio import KnowledgeAgentRetrievalClient
88
from azure.search.documents.agent.models import (
9-
KnowledgeAgentAzureSearchDocReference,
10-
KnowledgeAgentIndexParams,
119
KnowledgeAgentMessage,
1210
KnowledgeAgentMessageTextContent,
1311
KnowledgeAgentRetrievalRequest,
1412
KnowledgeAgentRetrievalResponse,
15-
KnowledgeAgentSearchActivityRecord,
13+
KnowledgeAgentSearchIndexActivityRecord,
14+
KnowledgeAgentSearchIndexReference,
15+
SearchIndexKnowledgeSourceParams,
1616
)
1717
from azure.search.documents.aio import SearchClient
1818
from azure.search.documents.models import (
@@ -162,7 +162,6 @@ def __init__(
162162
openai_host: str,
163163
prompt_manager: PromptManager,
164164
reasoning_effort: Optional[str] = None,
165-
hydrate_references: bool = False,
166165
multimodal_enabled: bool = False,
167166
image_embeddings_client: Optional[ImageEmbeddings] = None,
168167
global_blob_manager: Optional[BlobManager] = None,
@@ -180,7 +179,6 @@ def __init__(
180179
self.openai_host = openai_host
181180
self.prompt_manager = prompt_manager
182181
self.reasoning_effort = reasoning_effort
183-
self.hydrate_references = hydrate_references
184182
self.include_token_usage = True
185183
self.multimodal_enabled = multimodal_enabled
186184
self.image_embeddings_client = image_embeddings_client
@@ -276,7 +274,6 @@ async def run_agentic_retrieval(
276274
top: Optional[int] = None,
277275
filter_add_on: Optional[str] = None,
278276
minimum_reranker_score: Optional[float] = None,
279-
max_docs_for_reranker: Optional[int] = None,
280277
results_merge_strategy: Optional[str] = None,
281278
) -> tuple[KnowledgeAgentRetrievalResponse, list[Document]]:
282279
# STEP 1: Invoke agentic retrieval
@@ -289,13 +286,10 @@ async def run_agentic_retrieval(
289286
for msg in messages
290287
if msg["role"] != "system"
291288
],
292-
target_index_params=[
293-
KnowledgeAgentIndexParams(
294-
index_name=search_index_name,
295-
reranker_threshold=minimum_reranker_score,
296-
max_docs_for_reranker=max_docs_for_reranker,
289+
knowledge_source_params=[
290+
SearchIndexKnowledgeSourceParams(
291+
knowledge_source_name=search_index_name,
297292
filter_add_on=filter_add_on,
298-
include_reference_source_data=True,
299293
)
300294
],
301295
)
@@ -305,12 +299,12 @@ async def run_agentic_retrieval(
305299
activities = response.activity
306300
activity_mapping: dict[int, str] = (
307301
{
308-
activity.id: activity.query.search
302+
activity.id: activity.search_index_arguments.search
309303
for activity in activities
310304
if (
311-
isinstance(activity, KnowledgeAgentSearchActivityRecord)
312-
and activity.query
313-
and activity.query.search is not None
305+
isinstance(activity, KnowledgeAgentSearchIndexActivityRecord)
306+
and activity.search_index_arguments
307+
and activity.search_index_arguments.search is not None
314308
)
315309
}
316310
if activities
@@ -322,92 +316,42 @@ async def run_agentic_retrieval(
322316
return response, []
323317

324318
# Extract references
325-
refs = [r for r in response.references if isinstance(r, KnowledgeAgentAzureSearchDocReference)]
326-
319+
refs = [r for r in response.references if isinstance(r, KnowledgeAgentSearchIndexReference)]
327320
documents: list[Document] = []
328-
329-
if self.hydrate_references:
330-
# Hydrate references to get full documents
331-
documents = await self.hydrate_agent_references(
332-
references=refs,
333-
top=top,
334-
)
335-
else:
336-
# Create documents from reference source data
337-
for ref in refs:
338-
if ref.source_data:
339-
documents.append(
340-
Document(
341-
id=ref.doc_key,
342-
content=ref.source_data.get("content"),
343-
sourcepage=ref.source_data.get("sourcepage"),
344-
)
345-
)
346-
if top and len(documents) >= top:
347-
break
348-
349-
# Build mappings for agent queries and sorting
350-
ref_to_activity: dict[str, int] = {}
351321
doc_to_ref_id: dict[str, str] = {}
322+
323+
# Create documents from reference source data
352324
for ref in refs:
353-
if ref.doc_key:
354-
ref_to_activity[ref.doc_key] = ref.activity_source
325+
if ref.source_data and ref.doc_key:
326+
# Note that ref.doc_key is the same as source_data["id"]
327+
documents.append(
328+
Document(
329+
id=ref.doc_key,
330+
content=ref.source_data.get("content"),
331+
category=ref.source_data.get("category"),
332+
sourcepage=ref.source_data.get("sourcepage"),
333+
sourcefile=ref.source_data.get("sourcefile"),
334+
oids=ref.source_data.get("oids"),
335+
groups=ref.source_data.get("groups"),
336+
reranker_score=ref.reranker_score,
337+
images=ref.source_data.get("images"),
338+
search_agent_query=activity_mapping[ref.activity_source],
339+
)
340+
)
355341
doc_to_ref_id[ref.doc_key] = ref.id
342+
if top and len(documents) >= top:
343+
break
356344

357-
# Inject agent search queries into all documents
358-
for doc in documents:
359-
if doc.id and doc.id in ref_to_activity:
360-
activity_id = ref_to_activity[doc.id]
361-
doc.search_agent_query = activity_mapping.get(activity_id, "")
345+
if minimum_reranker_score is not None:
346+
documents = [doc for doc in documents if (doc.reranker_score or 0) >= minimum_reranker_score]
362347

363-
# Apply sorting strategy to the documents
364-
if results_merge_strategy == "interleaved": # Use interleaved reference order
348+
if results_merge_strategy == "interleaved":
365349
documents = sorted(
366350
documents,
367351
key=lambda d: int(doc_to_ref_id.get(d.id, 0)) if d.id and doc_to_ref_id.get(d.id) else 0,
368352
)
369-
# else: Default - preserve original order
370-
371353
return response, documents
372354

373-
async def hydrate_agent_references(
374-
self,
375-
references: list[KnowledgeAgentAzureSearchDocReference],
376-
top: Optional[int],
377-
) -> list[Document]:
378-
doc_keys: set[str] = set()
379-
380-
for ref in references:
381-
if not ref.doc_key:
382-
continue
383-
doc_keys.add(ref.doc_key)
384-
if top and len(doc_keys) >= top:
385-
break
386-
387-
if not doc_keys:
388-
return []
389-
390-
# Build search filter only on unique doc IDs
391-
id_csv = ",".join(doc_keys)
392-
id_filter = f"search.in(id, '{id_csv}', ',')"
393-
394-
# Fetch full documents
395-
hydrated_docs: list[Document] = await self.search(
396-
top=len(doc_keys),
397-
query_text=None,
398-
filter=id_filter,
399-
vectors=[],
400-
use_text_search=False,
401-
use_vector_search=False,
402-
use_semantic_ranker=False,
403-
use_semantic_captions=False,
404-
minimum_search_score=None,
405-
minimum_reranker_score=None,
406-
use_query_rewriting=False,
407-
)
408-
409-
return hydrated_docs
410-
411355
async def get_sources_content(
412356
self,
413357
results: list[Document],

app/backend/approaches/chatreadretrieveread.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ def __init__(
5656
query_speller: str,
5757
prompt_manager: PromptManager,
5858
reasoning_effort: Optional[str] = None,
59-
hydrate_references: bool = False,
6059
multimodal_enabled: bool = False,
6160
image_embeddings_client: Optional[ImageEmbeddings] = None,
6261
global_blob_manager: Optional[BlobManager] = None,
@@ -84,7 +83,6 @@ def __init__(
8483
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
8584
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
8685
self.reasoning_effort = reasoning_effort
87-
self.hydrate_references = hydrate_references
8886
self.include_token_usage = True
8987
self.multimodal_enabled = multimodal_enabled
9088
self.image_embeddings_client = image_embeddings_client
@@ -390,13 +388,10 @@ async def run_agentic_retrieval_approach(
390388
overrides: dict[str, Any],
391389
auth_claims: dict[str, Any],
392390
):
393-
minimum_reranker_score = overrides.get("minimum_reranker_score", 0)
394391
search_index_filter = self.build_filter(overrides, auth_claims)
392+
minimum_reranker_score = overrides.get("minimum_reranker_score", 0)
395393
top = overrides.get("top", 3)
396-
max_subqueries = overrides.get("max_subqueries", 10)
397394
results_merge_strategy = overrides.get("results_merge_strategy", "interleaved")
398-
# 50 is the amount of documents that the reranker can process per query
399-
max_docs_for_reranker = max_subqueries * 50
400395
send_text_sources = overrides.get("send_text_sources", True)
401396
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
402397

@@ -407,7 +402,6 @@ async def run_agentic_retrieval_approach(
407402
top=top,
408403
filter_add_on=search_index_filter,
409404
minimum_reranker_score=minimum_reranker_score,
410-
max_docs_for_reranker=max_docs_for_reranker,
411405
results_merge_strategy=results_merge_strategy,
412406
)
413407

@@ -426,7 +420,6 @@ async def run_agentic_retrieval_approach(
426420
messages,
427421
{
428422
"reranker_threshold": minimum_reranker_score,
429-
"max_docs_for_reranker": max_docs_for_reranker,
430423
"results_merge_strategy": results_merge_strategy,
431424
"filter": search_index_filter,
432425
},

0 commit comments

Comments
 (0)