Skip to content

Commit 0875b92

Browse files
fix: Post-Deployment Script for Managing Bicep Outputs in .env File and Update Conversation flow based on template selection (#1567)
Co-authored-by: Pavan Kumar <v-kupavan.microsoft.com>
1 parent 93b84ed commit 0875b92

21 files changed

+1176
-544
lines changed

.env.sample

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ AZURE_SEARCH_DATASOURCE_NAME=
2222
# Azure OpenAI for generating the answer and computing the embedding of the documents
2323
AZURE_OPENAI_RESOURCE=
2424
AZURE_OPENAI_API_KEY=
25-
AZURE_OPENAI_MODEL_INFO="{\"model\":\"gpt-35-turbo-16k\",\"modelName\":\"gpt-35-turbo-16k\",\"modelVersion\":\"0613\"}"
26-
AZURE_OPENAI_EMBEDDING_MODEL_INFO="{\"model\":\"text-embedding-ada-002\",\"modelName\":\"text-embedding-ada-002\",\"modelVersion\":\"2\"}"
25+
AZURE_OPENAI_MODEL=gpt-35-turbo
26+
AZURE_OPENAI_MODEL_NAME=gpt-35-turbo
27+
AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
2728
AZURE_OPENAI_TEMPERATURE=0
2829
AZURE_OPENAI_TOP_P=1.0
2930
AZURE_OPENAI_MAX_TOKENS=1000
@@ -35,10 +36,12 @@ AZURE_OPENAI_STREAM=True
3536
AzureWebJobsStorage=
3637
BACKEND_URL=http://localhost:7071
3738
DOCUMENT_PROCESSING_QUEUE_NAME=
38-
# Azure Blob Storage for storing the original documents to be processed
39-
AZURE_BLOB_STORAGE_INFO="{\"containerName\":\"documents\",\"accountName\":\"\",\"accountKey\":\"\"}"
39+
AZURE_BLOB_ACCOUNT_NAME=
40+
AZURE_BLOB_ACCOUNT_KEY=
41+
AZURE_BLOB_CONTAINER_NAME=
4042
# Azure Form Recognizer for extracting the text from the documents
41-
AZURE_FORM_RECOGNIZER_INFO="{\"endpoint\":\"\",\"key\":\"\"}"
43+
AZURE_FORM_RECOGNIZER_ENDPOINT=
44+
AZURE_FORM_RECOGNIZER_KEY=
4245
# Azure AI Content Safety for filtering out the inappropriate questions or answers
4346
AZURE_CONTENT_SAFETY_ENDPOINT=
4447
AZURE_CONTENT_SAFETY_KEY=
@@ -60,8 +63,11 @@ AZURE_KEY_VAULT_ENDPOINT=
6063
# Chat conversation type to decide between custom or byod (bring your own data) conversation type
6164
CONVERSATION_FLOW=
6265
# Chat History CosmosDB Integration Settings
63-
AZURE_COSMOSDB_INFO="{\"accountName\":\"cosmos-abc123\",\"databaseName\":\"db_conversation_history\",\"containerName\":\"conversations\"}"
64-
AZURE_COSMOSDB_ACCOUNT_KEY=
66+
AZURE_COSMOSDB_ACCOUNT_NAME=
67+
AZURE_COSMOSDB_DATABASE_NAME=
68+
AZURE_COSMOSDB_CONVERSATIONS_CONTAINER_NAME=
6569
AZURE_COSMOSDB_ENABLE_FEEDBACK=
66-
AZURE_POSTGRESQL_INFO="{\"user\":\"\",\"dbname\":\"postgres\",\"host\":\"\"}"
70+
AZURE_POSTGRESQL_HOST_NAME=
71+
AZURE_POSTGRESQL_DATABASE_NAME=
72+
AZURE_POSTGRESQL_USER=
6773
DATABASE_TYPE="CosmosDB"

azure.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ metadata:
66
hooks:
77
postprovision:
88
run: ./infra/prompt-flow/create-prompt-flow.sh
9+
posix:
10+
shell: sh
11+
run: ./scripts/parse_env.sh
12+
windows:
13+
shell: pwsh
14+
run: ./scripts/parse_env.ps1
915
services:
1016
web:
1117
project: ./code

code/backend/batch/utilities/helpers/config/config_helper.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ def __init__(self, config: dict):
5252
)
5353
self.enable_chat_history = config["enable_chat_history"]
5454
self.database_type = config.get("database_type", self.env_helper.DATABASE_TYPE)
55+
self.conversational_flow = config.get(
56+
"conversational_flow", self.env_helper.CONVERSATION_FLOW
57+
)
5558

5659
def get_available_document_types(self) -> list[str]:
5760
document_types = {
@@ -247,11 +250,7 @@ def get_default_config():
247250
logger.info("Loading default config from %s", config_file_path)
248251
ConfigHelper._default_config = json.loads(
249252
Template(f.read()).substitute(
250-
ORCHESTRATION_STRATEGY=(
251-
OrchestrationStrategy.SEMANTIC_KERNEL.value
252-
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
253-
else env_helper.ORCHESTRATION_STRATEGY
254-
),
253+
ORCHESTRATION_STRATEGY=env_helper.ORCHESTRATION_STRATEGY,
255254
LOG_USER_INTERACTIONS=(
256255
False
257256
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
@@ -262,6 +261,7 @@ def get_default_config():
262261
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
263262
else True
264263
),
264+
CONVERSATION_FLOW=env_helper.CONVERSATION_FLOW,
265265
DATABASE_TYPE=env_helper.DATABASE_TYPE,
266266
)
267267
)

code/backend/batch/utilities/helpers/config/default.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"enable_post_answering_prompt": false,
1010
"ai_assistant_type": "default",
1111
"enable_content_safety": true,
12-
"conversational_flow": "custom"
12+
"conversational_flow": "${CONVERSATION_FLOW}"
1313
},
1414
"example": {
1515
"documents": "{\n \"retrieved_documents\": [\n {\n \"[doc1]\": {\n \"content\": \"Dual Transformer Encoder (DTE) DTE (https://dev.azure.com/TScience/TSciencePublic/_wiki/wikis/TSciencePublic.wiki/82/Dual-Transformer-Encoder) DTE is a general pair-oriented sentence representation learning framework based on transformers. It provides training, inference and evaluation for sentence similarity models. Model Details DTE can be used to train a model for sentence similarity with the following features: - Build upon existing transformer-based text representations (e.g.TNLR, BERT, RoBERTa, BAG-NLR) - Apply smoothness inducing technology to improve the representation robustness - SMART (https://arxiv.org/abs/1911.03437) SMART - Apply NCE (Noise Contrastive Estimation) based similarity learning to speed up training of 100M pairs We use pretrained DTE model\"\n }\n },\n {\n \"[doc2]\": {\n \"content\": \"trained on internal data. You can find more details here - Models.md (https://dev.azure.com/TScience/_git/TSciencePublic?path=%2FDualTransformerEncoder%2FMODELS.md&version=GBmaster&_a=preview) Models.md DTE-pretrained for In-context Learning Research suggests that finetuned transformers can be used to retrieve semantically similar exemplars for e.g. KATE (https://arxiv.org/pdf/2101.06804.pdf) KATE . They show that finetuned models esp. tuned on related tasks give the maximum boost to GPT-3 in-context performance. DTE have lot of pretrained models that are trained on intent classification tasks. We can use these model embedding to find natural language utterances which are similar to our test utterances at test time. The steps are: 1. Embed\"\n }\n },\n {\n \"[doc3]\": {\n \"content\": \"train and test utterances using DTE model 2. For each test embedding, find K-nearest neighbors. 3. Prefix the prompt with nearest embeddings. The following diagram from the above paper (https://arxiv.org/pdf/2101.06804.pdf) the above paper visualizes this process: DTE-Finetuned This is an extension of DTE-pretrained method where we further finetune the embedding models for prompt crafting task. In summary, we sample random prompts from our training data and use them for GPT-3 inference for the another part of training data. Some prompts work better and lead to right results whereas other prompts lead\"\n }\n },\n {\n \"[doc4]\": {\n \"content\": \"to wrong completions. We finetune the model on the downstream task of whether a prompt is good or not based on whether it leads to right or wrong completion. This approach is similar to this paper: Learning To Retrieve Prompts for In-Context Learning (https://arxiv.org/pdf/2112.08633.pdf) this paper: Learning To Retrieve Prompts for In-Context Learning . This method is very general but it may require a lot of data to actually finetune a model to learn how to retrieve examples suitable for the downstream inference model like GPT-3.\"\n }\n }\n ]\n}",

code/backend/batch/utilities/helpers/env_helper.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
from dotenv import load_dotenv
66
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
77
from azure.keyvault.secrets import SecretClient
8+
9+
from backend.batch.utilities.orchestrator.orchestration_strategy import (
10+
OrchestrationStrategy,
11+
)
12+
from backend.batch.utilities.helpers.config.conversation_flow import ConversationFlow
813
from ..helpers.config.database_type import DatabaseType
914

1015
logger = logging.getLogger(__name__)
@@ -97,11 +102,24 @@ def __load_config(self, **kwargs) -> None:
97102
# Cosmos DB configuration
98103
if self.DATABASE_TYPE == DatabaseType.COSMOSDB.value:
99104
azure_cosmosdb_info = self.get_info_from_env("AZURE_COSMOSDB_INFO", "")
100-
self.AZURE_COSMOSDB_DATABASE = azure_cosmosdb_info.get("databaseName", "")
101-
self.AZURE_COSMOSDB_ACCOUNT = azure_cosmosdb_info.get("accountName", "")
102-
self.AZURE_COSMOSDB_CONVERSATIONS_CONTAINER = azure_cosmosdb_info.get(
103-
"containerName", ""
104-
)
105+
if azure_cosmosdb_info:
106+
self.AZURE_COSMOSDB_DATABASE = azure_cosmosdb_info.get(
107+
"databaseName", ""
108+
)
109+
self.AZURE_COSMOSDB_ACCOUNT = azure_cosmosdb_info.get("accountName", "")
110+
self.AZURE_COSMOSDB_CONVERSATIONS_CONTAINER = azure_cosmosdb_info.get(
111+
"containerName", ""
112+
)
113+
else:
114+
self.AZURE_COSMOSDB_DATABASE = os.getenv(
115+
"AZURE_COSMOSDB_DATABASE_NAME", ""
116+
)
117+
self.AZURE_COSMOSDB_ACCOUNT = os.getenv(
118+
"AZURE_COSMOSDB_ACCOUNT_NAME", ""
119+
)
120+
self.AZURE_COSMOSDB_CONVERSATIONS_CONTAINER = os.getenv(
121+
"AZURE_COSMOSDB_CONVERSATIONS_CONTAINER_NAME", ""
122+
)
105123
self.AZURE_COSMOSDB_ACCOUNT_KEY = self.secretHelper.get_secret(
106124
"AZURE_COSMOSDB_ACCOUNT_KEY"
107125
)
@@ -114,18 +132,32 @@ def __load_config(self, **kwargs) -> None:
114132
self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool(
115133
"USE_ADVANCED_IMAGE_PROCESSING", "False"
116134
)
135+
self.CONVERSATION_FLOW = os.getenv("CONVERSATION_FLOW", "custom")
136+
# Orchestration Settings
137+
self.ORCHESTRATION_STRATEGY = os.getenv(
138+
"ORCHESTRATION_STRATEGY", "openai_function"
139+
)
117140
# PostgreSQL configuration
118141
elif self.DATABASE_TYPE == DatabaseType.POSTGRESQL.value:
119142
self.AZURE_POSTGRES_SEARCH_TOP_K = self.get_env_var_int(
120143
"AZURE_POSTGRES_SEARCH_TOP_K", 5
121144
)
122145
azure_postgresql_info = self.get_info_from_env("AZURE_POSTGRESQL_INFO", "")
123-
self.POSTGRESQL_USER = azure_postgresql_info.get("user", "")
124-
self.POSTGRESQL_DATABASE = azure_postgresql_info.get("dbname", "")
125-
self.POSTGRESQL_HOST = azure_postgresql_info.get("host", "")
146+
if azure_postgresql_info:
147+
self.POSTGRESQL_USER = azure_postgresql_info.get("user", "")
148+
self.POSTGRESQL_DATABASE = azure_postgresql_info.get("dbname", "")
149+
self.POSTGRESQL_HOST = azure_postgresql_info.get("host", "")
150+
else:
151+
self.POSTGRESQL_USER = os.getenv("AZURE_POSTGRESQL_USER", "")
152+
self.POSTGRESQL_DATABASE = os.getenv(
153+
"AZURE_POSTGRESQL_DATABASE_NAME", ""
154+
)
155+
self.POSTGRESQL_HOST = os.getenv("AZURE_POSTGRESQL_HOST_NAME", "")
126156
# Ensure integrated vectorization is disabled for PostgreSQL
127157
self.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = False
128158
self.USE_ADVANCED_IMAGE_PROCESSING = False
159+
self.CONVERSATION_FLOW = ConversationFlow.CUSTOM.value
160+
self.ORCHESTRATION_STRATEGY = OrchestrationStrategy.SEMANTIC_KERNEL.value
129161
else:
130162
raise ValueError(
131163
"Unsupported DATABASE_TYPE. Please set DATABASE_TYPE to 'CosmosDB' or 'PostgreSQL'."
@@ -305,10 +337,6 @@ def __load_config(self, **kwargs) -> None:
305337
self.AZURE_CONTENT_SAFETY_KEY = self.secretHelper.get_secret(
306338
"AZURE_CONTENT_SAFETY_KEY"
307339
)
308-
# Orchestration Settings
309-
self.ORCHESTRATION_STRATEGY = os.getenv(
310-
"ORCHESTRATION_STRATEGY", "openai_function"
311-
)
312340
# Speech Service
313341
self.AZURE_SPEECH_SERVICE_NAME = os.getenv("AZURE_SPEECH_SERVICE_NAME", "")
314342
self.AZURE_SPEECH_SERVICE_REGION = os.getenv("AZURE_SPEECH_SERVICE_REGION")
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import pytest
2+
from unittest.mock import AsyncMock, patch
3+
from azure.cosmos import exceptions
4+
from backend.batch.utilities.chat_history.cosmosdb import CosmosConversationClient
5+
6+
7+
@pytest.fixture
8+
def mock_cosmos_client():
9+
mock_client = AsyncMock()
10+
mock_database_client = AsyncMock()
11+
mock_container_client = AsyncMock()
12+
13+
mock_client.get_database_client.return_value = mock_database_client
14+
mock_database_client.get_container_client.return_value = mock_container_client
15+
16+
return mock_client, mock_database_client, mock_container_client
17+
18+
19+
@pytest.fixture
20+
def cosmos_client(mock_cosmos_client):
21+
cosmosdb_client, database_client, container_client = mock_cosmos_client
22+
with patch("azure.cosmos.aio.CosmosClient", return_value=cosmosdb_client):
23+
client = CosmosConversationClient(
24+
cosmosdb_endpoint="https://test-cosmosdb.com",
25+
credential="test-credential",
26+
database_name="test-database",
27+
container_name="test-container",
28+
)
29+
client.cosmosdb_client = cosmosdb_client
30+
client.database_client = database_client
31+
client.container_client = container_client
32+
return client
33+
34+
35+
@pytest.mark.asyncio
36+
async def test_initialize_client_success(cosmos_client):
37+
client = cosmos_client
38+
39+
assert client.cosmosdb_endpoint == "https://test-cosmosdb.com"
40+
assert client.credential == "test-credential"
41+
assert client.database_name == "test-database"
42+
assert client.container_name == "test-container"
43+
44+
45+
@pytest.mark.asyncio
46+
async def test_ensure_client_initialized_success(cosmos_client):
47+
client = cosmos_client
48+
client.database_client.read = AsyncMock()
49+
client.container_client.read = AsyncMock()
50+
51+
result, message = await client.ensure()
52+
53+
assert result is True
54+
assert message == "CosmosDB client initialized successfully"
55+
client.database_client.read.assert_called_once()
56+
client.container_client.read.assert_called_once()
57+
58+
59+
@pytest.mark.asyncio
60+
async def test_ensure_client_not_initialized(cosmos_client):
61+
client = cosmos_client
62+
client.database_client.read = AsyncMock(
63+
side_effect=exceptions.CosmosHttpResponseError
64+
)
65+
client.container_client.read = AsyncMock()
66+
67+
result, message = await client.ensure()
68+
69+
assert result is False
70+
assert "not found" in message.lower()
71+
client.database_client.read.assert_called_once()
72+
73+
74+
@pytest.mark.asyncio
75+
async def test_create_conversation_success(cosmos_client):
76+
client = cosmos_client
77+
client.container_client.upsert_item = AsyncMock(
78+
return_value={"id": "500e77bd-26b9-441a-8fe3-cd0e02993671"}
79+
)
80+
81+
response = await client.create_conversation(
82+
"user-123", "500e77bd-26b9-441a-8fe3-cd0e02993671", "Test Conversation"
83+
)
84+
85+
assert response["id"] == "500e77bd-26b9-441a-8fe3-cd0e02993671"
86+
87+
88+
@pytest.mark.asyncio
89+
async def test_create_conversation_failure(cosmos_client):
90+
client = cosmos_client
91+
client.container_client.upsert_item = AsyncMock(return_value=None)
92+
93+
response = await client.create_conversation(
94+
"user-123", "500e77bd-26b9-441a-8fe3-cd0e02993671", "Test Conversation"
95+
)
96+
97+
assert response is False
98+
99+
100+
@pytest.mark.asyncio
101+
async def test_upsert_conversation_success(cosmos_client):
102+
client = cosmos_client
103+
client.container_client.upsert_item = AsyncMock(
104+
return_value={"id": "500e77bd-26b9-441a-8fe3-cd0e02993671"}
105+
)
106+
107+
conversation = {
108+
"id": "500e77bd-26b9-441a-8fe3-cd0e02993671",
109+
"type": "conversation",
110+
"userId": "user-123",
111+
"title": "Updated Conversation",
112+
}
113+
response = await client.upsert_conversation(conversation)
114+
115+
assert response["id"] == "500e77bd-26b9-441a-8fe3-cd0e02993671"
116+
117+
118+
@pytest.mark.asyncio
119+
async def test_delete_conversation_success(cosmos_client):
120+
client = cosmos_client
121+
client.container_client.read_item = AsyncMock(
122+
return_value={"id": "500e77bd-26b9-441a-8fe3-cd0e02993671"}
123+
)
124+
client.container_client.delete_item = AsyncMock(return_value={"status": "deleted"})
125+
126+
response = await client.delete_conversation(
127+
"user-123", "500e77bd-26b9-441a-8fe3-cd0e02993671"
128+
)
129+
130+
assert response["status"] == "deleted"
131+
client.container_client.delete_item.assert_called_once_with(
132+
item="500e77bd-26b9-441a-8fe3-cd0e02993671", partition_key="user-123"
133+
)
134+
135+
136+
@pytest.mark.asyncio
137+
async def test_delete_messages_success(cosmos_client):
138+
client = cosmos_client
139+
client.get_messages = AsyncMock(
140+
return_value=[
141+
{"id": "39c395da-e2f7-49c9-bca5-c9511d3c5172"},
142+
{"id": "39c395da-e2f7-49c9-bca5-c9511d3c5174"},
143+
]
144+
)
145+
client.container_client.delete_item = AsyncMock()
146+
147+
response = await client.delete_messages(
148+
"500e77bd-26b9-441a-8fe3-cd0e02993671", "user-123"
149+
)
150+
151+
assert len(response) == 2
152+
client.get_messages.assert_called_once_with(
153+
"user-123", "500e77bd-26b9-441a-8fe3-cd0e02993671"
154+
)
155+
client.container_client.delete_item.assert_any_call(
156+
item="39c395da-e2f7-49c9-bca5-c9511d3c5172", partition_key="user-123"
157+
)
158+
client.container_client.delete_item.assert_any_call(
159+
item="39c395da-e2f7-49c9-bca5-c9511d3c5174", partition_key="user-123"
160+
)
161+
162+
163+
@pytest.mark.asyncio
164+
async def test_update_message_feedback_success(cosmos_client):
165+
client = cosmos_client
166+
client.container_client.read_item = AsyncMock(
167+
return_value={"id": "39c395da-e2f7-49c9-bca5-c9511d3c5172", "feedback": ""}
168+
)
169+
client.container_client.upsert_item = AsyncMock(
170+
return_value={
171+
"id": "39c395da-e2f7-49c9-bca5-c9511d3c5172",
172+
"feedback": "positive",
173+
}
174+
)
175+
176+
response = await client.update_message_feedback(
177+
"user-123", "39c395da-e2f7-49c9-bca5-c9511d3c5172", "positive"
178+
)
179+
180+
assert response["feedback"] == "positive"
181+
client.container_client.upsert_item.assert_called_once()
File renamed without changes.

0 commit comments

Comments
 (0)