Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5d561e9
feat: Enable AVM WAF implementation to align with updated AVM standar…
Pavan-Microsoft Sep 15, 2025
1d0b33d
feat: Replaced AVM modules with local modules to reduce the main.json…
Prajwal-Microsoft Sep 16, 2025
7c96b33
fix: Integrate UMI Across Infra, Secure PostgreSQL Connections, and U…
Pavan-Microsoft Sep 16, 2025
d36cf84
Merge branch 'waf-avm' of https://github.com/Azure-Samples/chat-with-…
Prajwal-Microsoft Sep 16, 2025
1c88d7e
fix: Created minified the version of workbk json to reduce size f mai…
Prajwal-Microsoft Sep 17, 2025
bc3ec26
fix: Updates to networking, identity, and default settings in Cogniti…
Pavan-Microsoft Sep 17, 2025
f2559c3
fix: Removed private end points for Web, Admin & Function app
Prajwal-Microsoft Sep 17, 2025
b5c58af
fix: Refine Azure Cognitive Services Infra: Role Assignments and Stor…
Pavan-Microsoft Sep 17, 2025
ed5e62c
fix: Watnings fixed and added comments
Prajwal-Microsoft Sep 17, 2025
ea7e2bb
Merge branch 'waf-avm' of https://github.com/Azure-Samples/chat-with-…
Prajwal-Microsoft Sep 17, 2025
a3cdbcb
fix: deployment script issue for postgres in private network
Prajwal-Microsoft Sep 18, 2025
ee8edc4
fix: local debugging with integrated vectorization (#1903)
Pavan-Microsoft Sep 18, 2025
17ae566
fix: Refactored code and fix for deployment script
Prajwal-Microsoft Sep 18, 2025
4abd024
Merge branch 'waf-avm' of https://github.com/Azure-Samples/chat-with-…
Prajwal-Microsoft Sep 18, 2025
3247cf3
feat: Module refactroring for Database & identity
Prajwal-Microsoft Sep 18, 2025
8936f83
Fix: Fixed Postgres issue and commented bastion hist to decrease depl…
Prajwal-Microsoft Sep 18, 2025
2e1be1a
fix: Integrated vectorization for WAF
Prajwal-Microsoft Sep 18, 2025
02c8e7e
fix: Refactor OpenAI Networking for Integrated Vectorization Compatib…
Pavan-Microsoft Sep 19, 2025
aba8dea
fix: Refactored modules, descriptions for param & outputs
Prajwal-Microsoft Sep 19, 2025
84243fb
fix: Integrated Vectorization issue
Prajwal-Microsoft Sep 20, 2025
cde1750
fix: main.jsnon for integrated verctorization issue
Prajwal-Microsoft Sep 20, 2025
6ba6cc6
fix: Removed unwanted flag
Prajwal-Microsoft Sep 20, 2025
ca2fab9
fix: Updated default database type
Prajwal-Microsoft Sep 20, 2025
060df23
refactor: Removed old code & optimization to reduce deployment time
Prajwal-Microsoft Sep 20, 2025
30cd186
fix: File upload issue due to event grid
Prajwal-Microsoft Sep 21, 2025
70a0801
fix: Updated the delay time for postgresql
Prajwal-Microsoft Sep 21, 2025
f0b57c4
fix: Added Method to download the blob file via API
Prajwal-Microsoft Sep 21, 2025
aa5bd5c
fix: removed \n
Prajwal-Microsoft Sep 22, 2025
cb2e607
fix: Improve Deployment Docs, Single-Tenant Teams Extension, and Inge…
Pavan-Microsoft Sep 22, 2025
b2c6799
fix: Added Stop sequence for Open AI
Prajwal-Microsoft Sep 23, 2025
df5b326
Merge branch 'waf-avm' of https://github.com/Azure-Samples/chat-with-…
Prajwal-Microsoft Sep 23, 2025
e5141e5
refactor: Code refactoring
Prajwal-Microsoft Sep 23, 2025
5889528
fix: Revert the azure.yaml file
Prajwal-Microsoft Sep 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/group_dependabot_security_updates.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,10 @@ jobs:
git config --global user.email "[email protected]"

- name: Install required tools
uses: awalsh128/[email protected]
with:
packages: "jq gh"
run: |
sudo apt-get update
sudo apt-get install -y jq gh
shell: bash

- name: Enable strict error handling
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion azure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

name: chat-with-your-data-solution-accelerator
metadata:
template: [email protected]
template: [email protected]
hooks:
postprovision:
# run: ./infra/prompt-flow/create-prompt-flow.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_conversation_client():
f"https://{env_helper.AZURE_COSMOSDB_ACCOUNT}.documents.azure.com:443/"
)
credential = (
get_azure_credential()
get_azure_credential(env_helper.MANAGED_IDENTITY_CLIENT_ID)
if not env_helper.AZURE_COSMOSDB_ACCOUNT_KEY
else env_helper.AZURE_COSMOSDB_ACCOUNT_KEY
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import asyncpg
from datetime import datetime, timezone
from ..helpers.azure_credential_utils import get_azure_credential
from ..helpers.env_helper import EnvHelper

from .database_client_base import DatabaseClientBase

Expand All @@ -13,6 +14,7 @@ class PostgresConversationClient(DatabaseClientBase):
def __init__(
self, user: str, host: str, database: str, enable_message_feedback: bool = False
):
self.env_helper = EnvHelper()
self.user = user
self.host = host
self.database = database
Expand All @@ -21,7 +23,7 @@ def __init__(

async def connect(self):
try:
credential = get_azure_credential()
credential = get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
token = credential.get_token(
"https://ossrdbms-aad.database.windows.net/.default"
).token
Expand All @@ -31,7 +33,7 @@ async def connect(self):
database=self.database,
password=token,
port=5432,
ssl="require",
ssl=True,
)
except Exception as e:
logger.error("Failed to connect to PostgreSQL: %s", e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def create_queue_client():
return QueueClient(
account_url=f"https://{env_helper.AZURE_BLOB_ACCOUNT_NAME}.queue.core.windows.net/",
queue_name=env_helper.DOCUMENT_PROCESSING_QUEUE_NAME,
credential=get_azure_credential(),
credential=get_azure_credential(env_helper.MANAGED_IDENTITY_CLIENT_ID),
message_encode_policy=BinaryBase64EncodePolicy(),
)

Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(
if self.auth_type == "rbac":
self.account_key = None
self.blob_service_client = BlobServiceClient(
account_url=self.endpoint, credential=get_azure_credential()
account_url=self.endpoint, credential=get_azure_credential(env_helper.MANAGED_IDENTITY_CLIENT_ID)
)
self.user_delegation_key = self.request_user_delegation_key(
blob_service_client=self.blob_service_client
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, env_helper: EnvHelper) -> None:
self.model_version = (
env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION
)
self.managed_identity_client_id = env_helper.MANAGED_IDENTITY_CLIENT_ID

def vectorize_image(self, image_url: str) -> list[float]:
logger.info(f"Making call to computer vision to vectorize image: {image_url}")
Expand Down Expand Up @@ -57,7 +58,7 @@ def __make_request(self, path: str, body) -> Response:
headers["Ocp-Apim-Subscription-Key"] = self.key
else:
token_provider = get_bearer_token_provider(
get_azure_credential(), self.__TOKEN_SCOPE
get_azure_credential(self.managed_identity_client_id), self.__TOKEN_SCOPE
)
headers["Authorization"] = "Bearer " + token_provider()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self) -> None:
if env_helper.AZURE_AUTH_TYPE == "rbac":
self.document_analysis_client = DocumentAnalysisClient(
endpoint=self.AZURE_FORM_RECOGNIZER_ENDPOINT,
credential=get_azure_credential(),
credential=get_azure_credential(env_helper.MANAGED_IDENTITY_CLIENT_ID),
headers={
"x-ms-useragent": "chat-with-your-data-solution-accelerator/1.0.0"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ def _create_search_client(self):
dbname = self.env_helper.POSTGRESQL_DATABASE

# Acquire the access token
credential = get_azure_credential()
credential = get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
access_token = credential.get_token(
"https://ossrdbms-aad.database.windows.net/.default"
)

# Use the token in the connection string
conn_string = (
f"host={host} user={user} dbname={dbname} password={access_token.token}"
f"host={host} user={user} dbname={dbname} password={access_token.token} sslmode=require"
)
self.conn = psycopg2.connect(conn_string)
logger.info("Connected to Azure PostgreSQL successfully.")
Expand Down
4 changes: 2 additions & 2 deletions code/backend/batch/utilities/helpers/azure_search_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _search_credential(self):
if self.env_helper.is_auth_type_keys():
return AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
else:
return get_azure_credential()
return get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)

def _create_search_client(
self, search_credential: Union[AzureKeyCredential, get_azure_credential]
Expand Down Expand Up @@ -285,7 +285,7 @@ def get_conversation_logger(self):
]

if self.env_helper.AZURE_AUTH_TYPE == "rbac":
credential = get_azure_credential()
credential = get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
return AzureSearch(
azure_search_endpoint=self.env_helper.AZURE_SEARCH_SERVICE,
azure_search_key=None, # Remove API key
Expand Down
13 changes: 9 additions & 4 deletions code/backend/batch/utilities/helpers/env_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,13 @@ def __load_config(self, **kwargs) -> None:
self.secretHelper = SecretHelper()

self.LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
self.APP_ENV = os.getenv("APP_ENV", "Prod").lower()

# Azure
self.AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID", "")
self.AZURE_RESOURCE_GROUP = os.getenv("AZURE_RESOURCE_GROUP", "")
self.MANAGED_IDENTITY_CLIENT_ID = os.getenv("MANAGED_IDENTITY_CLIENT_ID", "")
self.MANAGED_IDENTITY_RESOURCE_ID = os.getenv("MANAGED_IDENTITY_RESOURCE_ID", "")

# Azure Search
self.AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE", "")
Expand Down Expand Up @@ -217,7 +220,7 @@ def __load_config(self, **kwargs) -> None:
)

self.AZURE_TOKEN_PROVIDER = get_bearer_token_provider(
get_azure_credential(), "https://cognitiveservices.azure.com/.default"
get_azure_credential(self.MANAGED_IDENTITY_CLIENT_ID), "https://cognitiveservices.azure.com/.default"
)
self.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = self.get_env_var_int(
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES", 1
Expand All @@ -234,7 +237,7 @@ def __load_config(self, **kwargs) -> None:
self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION = os.getenv(
"AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION", "2023-04-15"
)
self.FUNCTION_KEY = os.getenv("FUNCTION_KEY", "")
self.FUNCTION_KEY = self.secretHelper.get_secret("FUNCTION_KEY")

# Initialize Azure keys based on authentication type and environment settings.
# When AZURE_AUTH_TYPE is "rbac", azure keys are None or an empty string.
Expand All @@ -243,7 +246,6 @@ def __load_config(self, **kwargs) -> None:
self.AZURE_OPENAI_API_KEY = ""
self.AZURE_SPEECH_KEY = None
self.AZURE_COMPUTER_VISION_KEY = None
self.FUNCTION_KEY = self.secretHelper.get_secret("FUNCTION_KEY")
else:
self.AZURE_SEARCH_KEY = self.secretHelper.get_secret("AZURE_SEARCH_KEY")
self.AZURE_OPENAI_API_KEY = self.secretHelper.get_secret(
Expand Down Expand Up @@ -429,8 +431,11 @@ def __init__(self) -> None:
self.USE_KEY_VAULT = os.getenv("USE_KEY_VAULT", "").lower() == "true"
self.secret_client = None
if self.USE_KEY_VAULT:
vault_endpoint = os.environ.get("AZURE_KEY_VAULT_ENDPOINT")
if not vault_endpoint:
raise ValueError("AZURE_KEY_VAULT_ENDPOINT environment variable is required when USE_KEY_VAULT is true")
self.secret_client = SecretClient(
os.environ.get("AZURE_KEY_VAULT_ENDPOINT"), get_azure_credential()
vault_endpoint, get_azure_credential(client_id=os.getenv("MANAGED_IDENTITY_CLIENT_ID", None))
)

def get_secret(self, secret_name: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion code/backend/batch/utilities/helpers/llm_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def get_sk_service_settings(self, service: AzureChatCompletion):
def get_ml_client(self):
if not hasattr(self, "_ml_client"):
self._ml_client = MLClient(
get_azure_credential(),
get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID),
self.env_helper.AZURE_SUBSCRIPTION_ID,
self.env_helper.AZURE_RESOURCE_GROUP,
self.env_helper.AZURE_ML_WORKSPACE_NAME,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from azure.search.documents.indexes.models import (
SearchIndexerDataContainer,
SearchIndexerDataSourceConnection,
SearchIndexerDataUserAssignedIdentity,
)
from azure.search.documents.indexes._generated.models import (
NativeBlobSoftDeleteDeletionDetectionPolicy,
Expand All @@ -19,7 +20,7 @@ def __init__(self, env_helper: EnvHelper):
(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else get_azure_credential()
else get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
),
)

Expand All @@ -35,6 +36,13 @@ def create_or_update_datasource(self):
connection_string=connection_string,
container=container,
data_deletion_detection_policy=NativeBlobSoftDeleteDeletionDetectionPolicy(),
identity=(
None
if getattr(self.env_helper, "APP_ENV", "").lower() == "dev"
else SearchIndexerDataUserAssignedIdentity(
user_assigned_identity=self.env_helper.MANAGED_IDENTITY_RESOURCE_ID
)
),
)
self.indexer_client.create_or_update_data_source_connection(
data_source_connection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SemanticPrioritizedFields,
SemanticField,
SearchIndex,
SearchIndexerDataUserAssignedIdentity,
)
from ..helpers.env_helper import EnvHelper
from ..helpers.azure_credential_utils import get_azure_credential
Expand All @@ -39,7 +40,7 @@ def __init__(self, env_helper: EnvHelper, llm_helper: LLMHelper):
(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else get_azure_credential()
else get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
),
)

Expand Down Expand Up @@ -144,6 +145,13 @@ def get_vector_search_config(self):
azure_open_ai_parameters = AzureOpenAIParameters(
resource_uri=self.env_helper.AZURE_OPENAI_ENDPOINT,
deployment_id=self.env_helper.AZURE_OPENAI_EMBEDDING_MODEL,
auth_identity=(
None
if getattr(self.env_helper, "APP_ENV", "").lower() == "dev"
else SearchIndexerDataUserAssignedIdentity(
user_assigned_identity=self.env_helper.MANAGED_IDENTITY_RESOURCE_ID
)
),
)

return VectorSearch(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, env_helper: EnvHelper):
(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else get_azure_credential()
else get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
SearchIndexerIndexProjectionsParameters,
IndexProjectionMode,
SearchIndexerSkillset,
SearchIndexerDataUserAssignedIdentity,
)
from azure.search.documents.indexes import SearchIndexerClient
from ..helpers.config.config_helper import IntegratedVectorizationConfig
Expand All @@ -33,7 +34,7 @@ def __init__(
(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else get_azure_credential()
else get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
),
)
self.integrated_vectorization_config = integrated_vectorization_config
Expand Down Expand Up @@ -95,6 +96,13 @@ def create_skillset(self):
if self.env_helper.is_auth_type_keys()
else None
),
auth_identity=(
None
if getattr(self.env_helper, "APP_ENV", "").lower() == "dev"
else SearchIndexerDataUserAssignedIdentity(
user_assigned_identity=self.env_helper.MANAGED_IDENTITY_RESOURCE_ID
)
),
inputs=[
InputFieldMappingEntry(name="text", source="/document/pages/*"),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def create_search_client(self):
credential=(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else get_azure_credential()
else get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
),
)

Expand Down Expand Up @@ -170,7 +170,7 @@ def _check_index_exists(self) -> bool:
credential=(
AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
if self.env_helper.is_auth_type_keys()
else get_azure_credential()
else get_azure_credential(self.env_helper.MANAGED_IDENTITY_CLIENT_ID)
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self):
logger.info("Initializing ContentSafetyClient with RBAC authentication.")
self.content_safety_client = ContentSafetyClient(
env_helper.AZURE_CONTENT_SAFETY_ENDPOINT,
get_azure_credential(),
get_azure_credential(env_helper.MANAGED_IDENTITY_CLIENT_ID),
)
else:
logger.info(
Expand Down
11 changes: 8 additions & 3 deletions code/backend/pages/01_Ingest_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ def reprocess_all():

def add_urls():
urls = st.session_state["urls"].split("\n")
add_url_embeddings(urls)
result = add_url_embeddings(urls)
# If URLs are valid and processed, clear the textarea
if result:
st.session_state["urls"] = ""


def sanitize_metadata_value(value):
Expand All @@ -67,7 +70,7 @@ def add_url_embeddings(urls: list[str]):
has_valid_url = bool(list(filter(str.strip, urls)))
if not has_valid_url:
st.error("Please enter at least one valid URL.")
return
return False

params = {}
if env_helper.FUNCTION_KEY is not None:
Expand All @@ -80,9 +83,11 @@ def add_url_embeddings(urls: list[str]):
)
r = requests.post(url=backend_url, params=params, json=body)
if not r.ok:
raise ValueError(f"Error {r.status_code}: {r.text}")
st.error(f"Error {r.status_code}: {r.text}")
return False
else:
st.success(f"Embeddings added successfully for {url}")
return True


try:
Expand Down
Loading
Loading