Skip to content

Commit 6a4cebb

Browse files
Roopan-MicrosoftAjitPadhi-MicrosoftPavan-Microsoftross-p-smithgpickett
authored
build: merging dev changes to main branch (#1599)
Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: Ajit Padhi <[email protected]> Co-authored-by: Pavan-Microsoft <[email protected]> Co-authored-by: Ross Smith <[email protected]> Co-authored-by: gpickett <[email protected]> Co-authored-by: Francia Riesco <[email protected]> Co-authored-by: Francia Riesco <[email protected]> Co-authored-by: Prajwal D C <[email protected]> Co-authored-by: Harmanpreet-Microsoft <[email protected]> Co-authored-by: UtkarshMishra-Microsoft <[email protected]> Co-authored-by: Priyanka-Microsoft <[email protected]> Co-authored-by: Prasanjeet-Microsoft <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
1 parent 405d4bc commit 6a4cebb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+5589
-1808
lines changed

.env.sample

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ AZURE_SEARCH_DATASOURCE_NAME=
2222
# Azure OpenAI for generating the answer and computing the embedding of the documents
2323
AZURE_OPENAI_RESOURCE=
2424
AZURE_OPENAI_API_KEY=
25-
AZURE_OPENAI_MODEL_INFO="{\"model\":\"gpt-35-turbo-16k\",\"modelName\":\"gpt-35-turbo-16k\",\"modelVersion\":\"0613\"}"
26-
AZURE_OPENAI_EMBEDDING_MODEL_INFO="{\"model\":\"text-embedding-ada-002\",\"modelName\":\"text-embedding-ada-002\",\"modelVersion\":\"2\"}"
25+
AZURE_OPENAI_MODEL=gpt-35-turbo
26+
AZURE_OPENAI_MODEL_NAME=gpt-35-turbo
27+
AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
2728
AZURE_OPENAI_TEMPERATURE=0
2829
AZURE_OPENAI_TOP_P=1.0
2930
AZURE_OPENAI_MAX_TOKENS=1000
@@ -35,10 +36,12 @@ AZURE_OPENAI_STREAM=True
3536
AzureWebJobsStorage=
3637
BACKEND_URL=http://localhost:7071
3738
DOCUMENT_PROCESSING_QUEUE_NAME=
38-
# Azure Blob Storage for storing the original documents to be processed
39-
AZURE_BLOB_STORAGE_INFO="{\"containerName\":\"documents\",\"accountName\":\"\",\"accountKey\":\"\"}"
39+
AZURE_BLOB_ACCOUNT_NAME=
40+
AZURE_BLOB_ACCOUNT_KEY=
41+
AZURE_BLOB_CONTAINER_NAME=
4042
# Azure Form Recognizer for extracting the text from the documents
41-
AZURE_FORM_RECOGNIZER_INFO="{\"endpoint\":\"\",\"key\":\"\"}"
43+
AZURE_FORM_RECOGNIZER_ENDPOINT=
44+
AZURE_FORM_RECOGNIZER_KEY=
4245
# Azure AI Content Safety for filtering out the inappropriate questions or answers
4346
AZURE_CONTENT_SAFETY_ENDPOINT=
4447
AZURE_CONTENT_SAFETY_KEY=
@@ -60,8 +63,11 @@ AZURE_KEY_VAULT_ENDPOINT=
6063
# Chat conversation type to decide between custom or byod (bring your own data) conversation type
6164
CONVERSATION_FLOW=
6265
# Chat History CosmosDB Integration Settings
63-
AZURE_COSMOSDB_INFO="{\"accountName\":\"cosmos-abc123\",\"databaseName\":\"db_conversation_history\",\"containerName\":\"conversations\"}"
64-
AZURE_COSMOSDB_ACCOUNT_KEY=
66+
AZURE_COSMOSDB_ACCOUNT_NAME=
67+
AZURE_COSMOSDB_DATABASE_NAME=
68+
AZURE_COSMOSDB_CONVERSATIONS_CONTAINER_NAME=
6569
AZURE_COSMOSDB_ENABLE_FEEDBACK=
66-
AZURE_POSTGRESQL_INFO="{\"user\":\"\",\"dbname\":\"postgres\",\"host\":\"\"}"
70+
AZURE_POSTGRESQL_HOST_NAME=
71+
AZURE_POSTGRESQL_DATABASE_NAME=
72+
AZURE_POSTGRESQL_USER=
6773
DATABASE_TYPE="CosmosDB"

.github/workflows/build-docker-images.yml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
name: Build Docker Images
22

33
on:
4-
workflow_run:
5-
workflows: [Tests]
6-
types: [completed]
4+
push:
75
branches:
86
- main
97
- dev
@@ -22,7 +20,6 @@ on:
2220

2321
jobs:
2422
docker-build:
25-
if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
2623
strategy:
2724
matrix:
2825
include:
@@ -34,9 +31,9 @@ jobs:
3431
dockerfile: docker/Frontend.Dockerfile
3532
uses: ./.github/workflows/build-docker.yml
3633
with:
37-
registry: ${{ github.event.workflow_run.head_branch == 'main' && 'fruoccopublic.azurecr.io' || 'cwydcontainerreg.azurecr.io'}}
38-
username: ${{ github.event.workflow_run.head_branch == 'main' && 'fruoccopublic' || 'cwydcontainerreg'}}
34+
registry: ${{ github.ref_name == 'main' && 'fruoccopublic.azurecr.io' || 'cwydcontainerreg.azurecr.io'}}
35+
username: ${{ github.ref_name == 'main' && 'fruoccopublic' || 'cwydcontainerreg'}}
3936
app_name: ${{ matrix.app_name }}
4037
dockerfile: ${{ matrix.dockerfile }}
41-
push: ${{ github.event.workflow_run.head_branch == 'main' || github.event.workflow_run.head_branch == 'dev' || github.event.workflow_run.head_branch == 'demo' }}
38+
push: ${{ github.ref_name == 'main' || github.ref_name == 'dev' || github.ref_name == 'demo' }}
4239
secrets: inherit

.github/workflows/build-docker.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ jobs:
2828
docker-build:
2929
runs-on: ubuntu-latest
3030
steps:
31-
3231
- name: Checkout
3332
uses: actions/checkout@v4
3433

@@ -61,7 +60,7 @@ jobs:
6160
context: .
6261
file: ${{ inputs.dockerfile }}
6362
push: ${{ inputs.push }}
64-
cache-from: type=registry,ref=${{ inputs.registry }}/${{ inputs.app_name}}:${{ github.ref_name == 'main' && 'latest' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || 'latest' }}
63+
cache-from: type=registry,ref=${{ inputs.registry }}/${{ inputs.app_name}}:${{ github.ref_name == 'main' && 'latest' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.head_ref || github.ref_name }}
6564
tags: |
66-
${{ inputs.registry }}/${{ inputs.app_name}}:${{ github.ref_name == 'main' && 'latest' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || 'latest' }}
65+
${{ inputs.registry }}/${{ inputs.app_name}}:${{ github.ref_name == 'main' && 'latest' || github.ref_name == 'dev' && 'dev' || github.ref_name == 'demo' && 'demo' || github.head_ref || 'default' }}
6766
${{ inputs.registry }}/${{ inputs.app_name}}:${{ steps.date.outputs.date }}_${{ github.run_number }}

.github/workflows/sync-branches.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515

1616
steps:
1717
- name: Checkout repository
18-
uses: actions/checkout@v3
18+
uses: actions/checkout@v4
1919
with:
2020
fetch-depth: 0 # Fetch all history for accurate branch comparison
2121

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Welcome to the *Chat with your data* Solution accelerator repository! The *Chat
5050

5151

5252

53+
5354
### About this repo
5455

5556
This repository provides an end-to-end solution for users who want to query their data with natural language. It includes a well designed ingestion mechanism for multiple file types, an easy deployment, and a support team for maintenance. The accelerator demonstrates both Push or Pull Ingestion; the choice of orchestration (Semantic Kernel, LangChain, OpenAI Functions or [Prompt Flow](docs/prompt_flow.md)) and should be the minimum components needed to implement a RAG pattern. It is not intended to be put into Production as-is without experimentation or evaluation of your data. It provides the following features:

azure.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@ metadata:
55
66
hooks:
77
postprovision:
8-
run: ./infra/prompt-flow/create-prompt-flow.sh
8+
# run: ./infra/prompt-flow/create-prompt-flow.sh
9+
posix:
10+
shell: sh
11+
run: chmod +x ./scripts/parse_env.sh && ./scripts/parse_env.sh
12+
windows:
13+
shell: pwsh
14+
run: ./scripts/parse_env.ps1
915
services:
1016
web:
1117
project: ./code

code/backend/batch/batch_push_results.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,22 @@ def _get_file_name_from_message(message_body) -> str:
2828
)
2929
def batch_push_results(msg: func.QueueMessage) -> None:
3030
message_body = json.loads(msg.get_body().decode("utf-8"))
31-
logger.debug("Process Document Event queue function triggered: %s", message_body)
31+
logger.info("Process Document Event queue function triggered: %s", message_body)
3232

3333
event_type = message_body.get("eventType", "")
3434
# We handle "" in this scenario for backwards compatibility
3535
# This function is primarily triggered by an Event Grid queue message from the blob storage
3636
# However, it can also be triggered using a legacy schema from BatchStartProcessing
3737
if event_type in ("", "Microsoft.Storage.BlobCreated"):
38+
logger.info("Handling 'Blob Created' event with message body: %s", message_body)
3839
_process_document_created_event(message_body)
3940

4041
elif event_type == "Microsoft.Storage.BlobDeleted":
42+
logger.info("Handling 'Blob Deleted' event with message body: %s", message_body)
4143
_process_document_deleted_event(message_body)
4244

4345
else:
46+
logger.exception("Received an unrecognized event type: %s", event_type)
4447
raise NotImplementedError(f"Unknown event type received: {event_type}")
4548

4649

code/backend/batch/utilities/helpers/azure_blob_storage_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def get_container_sas(self):
247247
user_delegation_key=self.user_delegation_key,
248248
account_key=self.account_key,
249249
permission="r",
250-
expiry=datetime.utcnow() + timedelta(hours=1),
250+
expiry=datetime.utcnow() + timedelta(days=365 * 5),
251251
)
252252

253253
def get_blob_sas(self, file_name):

code/backend/batch/utilities/helpers/azure_form_recognizer_helper.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import logging
12
from azure.core.credentials import AzureKeyCredential
23
from azure.ai.formrecognizer import DocumentAnalysisClient
34
from azure.identity import DefaultAzureCredential
45
import html
56
import traceback
67
from .env_helper import EnvHelper
78

9+
logger = logging.getLogger(__name__)
10+
811

912
class AzureFormRecognizerClient:
1013
def __init__(self) -> None:
@@ -75,6 +78,8 @@ def begin_analyze_document_from_url(
7578
model_id = "prebuilt-layout" if use_layout else "prebuilt-read"
7679

7780
try:
81+
logger.info("Method begin_analyze_document_from_url started")
82+
logger.info(f"Model ID selected: {model_id}")
7883
poller = self.document_analysis_client.begin_analyze_document_from_url(
7984
model_id, document_url=source_url
8085
)
@@ -144,4 +149,7 @@ def begin_analyze_document_from_url(
144149

145150
return page_map
146151
except Exception as e:
152+
logger.exception(f"Exception in begin_analyze_document_from_url: {e}")
147153
raise ValueError(f"Error: {traceback.format_exc()}. Error: {e}")
154+
finally:
155+
logger.info("Method begin_analyze_document_from_url ended")

code/backend/batch/utilities/helpers/config/config_helper.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ def __init__(self, config: dict):
5252
)
5353
self.enable_chat_history = config["enable_chat_history"]
5454
self.database_type = config.get("database_type", self.env_helper.DATABASE_TYPE)
55+
self.conversational_flow = config.get(
56+
"conversational_flow", self.env_helper.CONVERSATION_FLOW
57+
)
5558

5659
def get_available_document_types(self) -> list[str]:
5760
document_types = {
@@ -187,21 +190,27 @@ def _set_new_config_properties(config: dict, default_config: dict):
187190
@staticmethod
188191
@functools.cache
189192
def get_active_config_or_default():
193+
logger.info("Method get_active_config_or_default started")
190194
env_helper = EnvHelper()
191195
config = ConfigHelper.get_default_config()
192196

193197
if env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE:
198+
logger.info("Loading configuration from Blob Storage")
194199
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
195200

196201
if blob_client.file_exists(CONFIG_FILE_NAME):
202+
logger.info("Configuration file found in Blob Storage")
197203
default_config = config
198204
config_file = blob_client.download_file(CONFIG_FILE_NAME)
199205
config = json.loads(config_file)
200206

201207
ConfigHelper._set_new_config_properties(config, default_config)
202208
else:
203-
logger.info("Returning default config")
209+
logger.info(
210+
"Configuration file not found in Blob Storage, using default configuration"
211+
)
204212

213+
logger.info("Method get_active_config_or_default ended")
205214
return Config(config)
206215

207216
@staticmethod
@@ -247,11 +256,7 @@ def get_default_config():
247256
logger.info("Loading default config from %s", config_file_path)
248257
ConfigHelper._default_config = json.loads(
249258
Template(f.read()).substitute(
250-
ORCHESTRATION_STRATEGY=(
251-
OrchestrationStrategy.SEMANTIC_KERNEL.value
252-
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
253-
else env_helper.ORCHESTRATION_STRATEGY
254-
),
259+
ORCHESTRATION_STRATEGY=env_helper.ORCHESTRATION_STRATEGY,
255260
LOG_USER_INTERACTIONS=(
256261
False
257262
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
@@ -262,6 +267,7 @@ def get_default_config():
262267
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
263268
else True
264269
),
270+
CONVERSATION_FLOW=env_helper.CONVERSATION_FLOW,
265271
DATABASE_TYPE=env_helper.DATABASE_TYPE,
266272
)
267273
)

0 commit comments

Comments
 (0)