From d7210993026938bd21e5509c56f6fc6fa2543db6 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Thu, 22 Aug 2024 11:51:54 +0000 Subject: [PATCH 01/41] Update bicep for ACA --- app/backend/.dockerignore | 7 + app/backend/Dockerfile | 11 + azure.yaml | 8 +- azure.yaml.backup | 113 +++ infra/abbreviations.json | 1 + infra/core/host/container-app-upsert.bicep | 123 +++ infra/core/host/container-app.bicep | 182 +++++ infra/core/host/container-apps.bicep | 56 ++ infra/core/host/containerapps.bicep.backup | 112 +++ infra/core/security/aca-identity.bicep | 9 + infra/core/security/registry-access.bicep | 19 + infra/main-aca.bicep | 890 +++++++++++++++++++++ infra/main-aca.parameters.json | 240 ++++++ infra/main.parameters.json | 3 + 14 files changed, 1772 insertions(+), 2 deletions(-) create mode 100644 app/backend/.dockerignore create mode 100644 app/backend/Dockerfile create mode 100644 azure.yaml.backup create mode 100644 infra/core/host/container-app-upsert.bicep create mode 100644 infra/core/host/container-app.bicep create mode 100644 infra/core/host/container-apps.bicep create mode 100644 infra/core/host/containerapps.bicep.backup create mode 100644 infra/core/security/aca-identity.bicep create mode 100644 infra/core/security/registry-access.bicep create mode 100644 infra/main-aca.bicep create mode 100644 infra/main-aca.parameters.json diff --git a/app/backend/.dockerignore b/app/backend/.dockerignore new file mode 100644 index 0000000000..13a9b73747 --- /dev/null +++ b/app/backend/.dockerignore @@ -0,0 +1,7 @@ +.git +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env \ No newline at end of file diff --git a/app/backend/Dockerfile b/app/backend/Dockerfile new file mode 100644 index 0000000000..a84bd6e0b7 --- /dev/null +++ b/app/backend/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.11-bullseye + +WORKDIR /app + +COPY ./ /app + +RUN python -m pip install -r requirements.txt + +RUN python -m pip install gunicorn + +CMD ["python3", "-m", "gunicorn", "-b", "0.0.0.0:8000", "main:app"] diff --git a/azure.yaml b/azure.yaml index 2d5ef5abb0..37b3f9ac4d 100644 --- a/azure.yaml +++ b/azure.yaml @@ -7,9 +7,9 @@ services: backend: project: ./app/backend language: py - host: appservice + host: containerapp hooks: - prepackage: + prebuild: windows: shell: pwsh run: cd ../frontend;npm install;npm run build @@ -20,6 +20,10 @@ services: run: cd ../frontend;npm install;npm run build interactive: false continueOnError: false +infra: + provider: bicep + module: main-aca + pipeline: variables: - AZURE_OPENAI_SERVICE diff --git a/azure.yaml.backup b/azure.yaml.backup new file mode 100644 index 0000000000..2d5ef5abb0 --- /dev/null +++ b/azure.yaml.backup @@ -0,0 +1,113 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json + +name: azure-search-openai-demo +metadata: + template: azure-search-openai-demo@0.0.2-beta +services: + backend: + project: ./app/backend + language: py + host: appservice + hooks: + prepackage: + windows: + shell: pwsh + run: cd ../frontend;npm install;npm run build + interactive: false + continueOnError: false + posix: + shell: sh + run: cd ../frontend;npm install;npm run build + interactive: false + continueOnError: false +pipeline: + variables: + - AZURE_OPENAI_SERVICE + - AZURE_OPENAI_API_VERSION + - AZURE_OPENAI_RESOURCE_GROUP + - AZURE_DOCUMENTINTELLIGENCE_SERVICE + - AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP + - AZURE_DOCUMENTINTELLIGENCE_SKU + - AZURE_DOCUMENTINTELLIGENCE_LOCATION + - AZURE_SEARCH_INDEX + - AZURE_SEARCH_SERVICE + - AZURE_SEARCH_SERVICE_RESOURCE_GROUP + - AZURE_SEARCH_SERVICE_LOCATION + - AZURE_SEARCH_SERVICE_SKU + - AZURE_SEARCH_QUERY_LANGUAGE + - AZURE_SEARCH_QUERY_SPELLER + - AZURE_SEARCH_SEMANTIC_RANKER + - AZURE_STORAGE_ACCOUNT + - AZURE_STORAGE_RESOURCE_GROUP + - AZURE_STORAGE_SKU + - AZURE_APP_SERVICE + - AZURE_APP_SERVICE_SKU + - AZURE_APP_SERVICE_PLAN + - AZURE_OPENAI_CHATGPT_MODEL + - AZURE_OPENAI_CHATGPT_DEPLOYMENT + - AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY + - AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION + - AZURE_OPENAI_EMB_MODEL_NAME + - AZURE_OPENAI_EMB_DEPLOYMENT + - AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY + - AZURE_OPENAI_EMB_DEPLOYMENT_VERSION + - AZURE_OPENAI_EMB_DIMENSIONS + - OPENAI_HOST + - OPENAI_API_KEY + - OPENAI_ORGANIZATION + - AZURE_USE_APPLICATION_INSIGHTS + - AZURE_APPLICATION_INSIGHTS + - AZURE_APPLICATION_INSIGHTS_DASHBOARD + - AZURE_LOG_ANALYTICS + - USE_VECTORS + - USE_GPT4V + - AZURE_VISION_ENDPOINT + - VISION_SECRET_NAME + - AZURE_COMPUTER_VISION_SERVICE + - AZURE_COMPUTER_VISION_RESOURCE_GROUP + - AZURE_COMPUTER_VISION_LOCATION + - AZURE_COMPUTER_VISION_SKU + - USE_SPEECH_INPUT_BROWSER + - USE_SPEECH_OUTPUT_BROWSER + - USE_SPEECH_OUTPUT_AZURE + - AZURE_SPEECH_SERVICE + - AZURE_SPEECH_SERVICE_RESOURCE_GROUP + - AZURE_SPEECH_SERVICE_LOCATION + - AZURE_SPEECH_SERVICE_SKU + - AZURE_KEY_VAULT_NAME + - AZURE_USE_AUTHENTICATION + - AZURE_ENFORCE_ACCESS_CONTROL + - AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS + - AZURE_AUTH_TENANT_ID + - AZURE_SERVER_APP_ID + - AZURE_CLIENT_APP_ID + - ALLOWED_ORIGIN + - AZURE_ADLS_GEN2_STORAGE_ACCOUNT + - AZURE_ADLS_GEN2_FILESYSTEM_PATH + - AZURE_ADLS_GEN2_FILESYSTEM + secrets: + - AZURE_SERVER_APP_SECRET + - AZURE_CLIENT_APP_SECRET +hooks: + preprovision: + windows: + shell: pwsh + run: ./scripts/auth_init.ps1 + interactive: true + continueOnError: false + posix: + shell: sh + run: ./scripts/auth_init.sh + interactive: true + continueOnError: false + postprovision: + windows: + shell: pwsh + run: ./scripts/auth_update.ps1;./scripts/prepdocs.ps1 + interactive: true + continueOnError: false + posix: + shell: sh + run: ./scripts/auth_update.sh;./scripts/prepdocs.sh + interactive: true + continueOnError: false diff --git a/infra/abbreviations.json b/infra/abbreviations.json index 75959585e8..5084711603 100644 --- a/infra/abbreviations.json +++ b/infra/abbreviations.json @@ -135,6 +135,7 @@ "virtualNetworks": "vnet-", "webServerFarms": "plan-", "webSitesAppService": "app-", + "webSitesContainerApps": "capps-", "webSitesAppServiceEnvironment": "ase-", "webSitesFunctions": "func-", "webStaticSites": "stapp-" diff --git a/infra/core/host/container-app-upsert.bicep b/infra/core/host/container-app-upsert.bicep new file mode 100644 index 0000000000..3de7660da0 --- /dev/null +++ b/infra/core/host/container-app-upsert.bicep @@ -0,0 +1,123 @@ +metadata description = 'Creates or updates an existing Azure Container App.' +param name string +param location string = resourceGroup().location +param tags object = {} + + +@description('The number of CPU cores allocated to a single container instance, e.g., 0.5') +param containerCpuCoreCount string = '0.5' + +@description('The maximum number of replicas to run. Must be at least 1.') +@minValue(1) +param containerMaxReplicas int = 10 + +@description('The amount of memory allocated to a single container instance, e.g., 1Gi') +param containerMemory string = '1.0Gi' + +@description('The minimum number of replicas to run. Must be at least 1.') +@minValue(1) +param containerMinReplicas int = 1 + +@description('The name of the container') +param containerName string = 'main' + +@description('The environment name for the container apps') +param containerAppsEnvironmentName string = '${containerName}env' + +@description('The name of the container registry') +param containerRegistryName string = '' + +@description('Hostname suffix for container registry. Set when deploying to sovereign clouds') +param containerRegistryHostSuffix string = 'azurecr.io' + +@allowed(['http', 'grpc']) +@description('The protocol used by Dapr to connect to the app, e.g., HTTP or gRPC') +param daprAppProtocol string = 'http' + +@description('Enable or disable Dapr for the container app') +param daprEnabled bool = false + +@description('The Dapr app ID') +param daprAppId string = containerName + +@description('Specifies if the resource already exists') +param exists bool = false + +@description('Specifies if Ingress is enabled for the container app') +param ingressEnabled bool = true + +@description('The type of identity for the resource') +@allowed(['None', 'SystemAssigned', 'UserAssigned']) +param identityType string = 'None' + +@description('The name of the user-assigned identity') +param identityName string = '' + +@description('The name of the container image') +param imageName string = '' + +@description('The secrets required for the container') +@secure() +param secrets object = {} + +@description('The keyvault identities required for the container') +@secure() +param keyvaultIdentities object = {} + +@description('The environment variables for the container in key value pairs') +param env object = {} + +@description('Specifies if the resource ingress is exposed externally') +param external bool = true + +@description('The service binds associated with the container') +param serviceBinds array = [] + +@description('The target port for the container') +param targetPort int = 80 + +resource existingApp 'Microsoft.App/containerApps@2023-05-02-preview' existing = if (exists) { + name: name +} + +module app 'container-app.bicep' = { + name: '${deployment().name}-update' + params: { + name: name + location: location + tags: tags + identityType: identityType + identityName: identityName + ingressEnabled: ingressEnabled + containerName: containerName + containerAppsEnvironmentName: containerAppsEnvironmentName + containerRegistryName: containerRegistryName + containerRegistryHostSuffix: containerRegistryHostSuffix + containerCpuCoreCount: containerCpuCoreCount + containerMemory: containerMemory + containerMinReplicas: containerMinReplicas + containerMaxReplicas: containerMaxReplicas + daprEnabled: daprEnabled + daprAppId: daprAppId + daprAppProtocol: daprAppProtocol + secrets: secrets + keyvaultIdentities: keyvaultIdentities + external: external + env: [ + for key in objectKeys(env): { + name: key + value: '${env[key]}' + } + ] + imageName: !empty(imageName) ? imageName : exists ? existingApp.properties.template.containers[0].image : '' + targetPort: targetPort + serviceBinds: serviceBinds + } +} + +output defaultDomain string = app.outputs.defaultDomain +output imageName string = app.outputs.imageName +output name string = app.outputs.name +output uri string = app.outputs.uri +output id string = app.outputs.id +output identityPrincipalId string = app.outputs.identityPrincipalId diff --git a/infra/core/host/container-app.bicep b/infra/core/host/container-app.bicep new file mode 100644 index 0000000000..0dd2957941 --- /dev/null +++ b/infra/core/host/container-app.bicep @@ -0,0 +1,182 @@ +metadata description = 'Creates a container app in an Azure Container App environment.' +param name string +param location string = resourceGroup().location +param tags object = {} + +@description('Allowed origins') +param allowedOrigins array = [] + +@description('Name of the environment for container apps') +param containerAppsEnvironmentName string + +@description('CPU cores allocated to a single container instance, e.g., 0.5') +param containerCpuCoreCount string = '0.5' + +@description('The maximum number of replicas to run. Must be at least 1.') +@minValue(1) +param containerMaxReplicas int = 10 + +@description('Memory allocated to a single container instance, e.g., 1Gi') +param containerMemory string = '1.0Gi' + +@description('The minimum number of replicas to run. Must be at least 1.') +param containerMinReplicas int = 1 + +@description('The name of the container') +param containerName string = 'main' + +@description('The name of the container registry') +param containerRegistryName string = '' + +@description('Hostname suffix for container registry. Set when deploying to sovereign clouds') +param containerRegistryHostSuffix string = 'azurecr.io' + +@description('The protocol used by Dapr to connect to the app, e.g., http or grpc') +@allowed([ 'http', 'grpc' ]) +param daprAppProtocol string = 'http' + +@description('The Dapr app ID') +param daprAppId string = containerName + +@description('Enable Dapr') +param daprEnabled bool = false + +@description('The environment variables for the container') +param env array = [] + +@description('Specifies if the resource ingress is exposed externally') +param external bool = true + +@description('The name of the user-assigned identity') +param identityName string = '' + +@description('The type of identity for the resource') +@allowed([ 'None', 'SystemAssigned', 'UserAssigned' ]) +param identityType string = 'None' + +@description('The name of the container image') +param imageName string = '' + +@description('Specifies if Ingress is enabled for the container app') +param ingressEnabled bool = true + +param revisionMode string = 'Single' + +@description('The secrets required for the container') +@secure() +param secrets object = {} + +@description('The keyvault identities required for the container') +@secure() +param keyvaultIdentities object = {} + +@description('The service binds associated with the container') +param serviceBinds array = [] + +@description('The name of the container apps add-on to use. e.g. redis') +param serviceType string = '' + +@description('The target port for the container') +param targetPort int = 80 + +resource userIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' existing = if (!empty(identityName)) { + name: identityName +} + +// Private registry support requires both an ACR name and a User Assigned managed identity +var usePrivateRegistry = !empty(identityName) && !empty(containerRegistryName) + +// Automatically set to `UserAssigned` when an `identityName` has been set +var normalizedIdentityType = !empty(identityName) ? 'UserAssigned' : identityType + +var keyvalueSecrets = [for secret in items(secrets): { + name: secret.key + value: secret.value +}] + +var keyvaultIdentitySecrets = [for secret in items(keyvaultIdentities): { + name: secret.key + keyVaultUrl: secret.value.keyVaultUrl + identity: secret.value.identity +}] + +module containerRegistryAccess '../security/registry-access.bicep' = if (usePrivateRegistry) { + name: '${deployment().name}-registry-access' + params: { + containerRegistryName: containerRegistryName + principalId: usePrivateRegistry ? userIdentity.properties.principalId : '' + } +} + +resource app 'Microsoft.App/containerApps@2023-05-02-preview' = { + name: name + location: location + tags: tags + // It is critical that the identity is granted ACR pull access before the app is created + // otherwise the container app will throw a provision error + // This also forces us to use an user assigned managed identity since there would no way to + // provide the system assigned identity with the ACR pull access before the app is created + dependsOn: usePrivateRegistry ? [ containerRegistryAccess ] : [] + identity: { + type: normalizedIdentityType + userAssignedIdentities: !empty(identityName) && normalizedIdentityType == 'UserAssigned' ? { '${userIdentity.id}': {} } : null + } + properties: { + managedEnvironmentId: containerAppsEnvironment.id + configuration: { + activeRevisionsMode: revisionMode + ingress: ingressEnabled ? { + external: external + targetPort: targetPort + transport: 'auto' + corsPolicy: { + allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) + } + } : null + dapr: daprEnabled ? { + enabled: true + appId: daprAppId + appProtocol: daprAppProtocol + appPort: ingressEnabled ? targetPort : 0 + } : { enabled: false } + secrets: concat(keyvalueSecrets, keyvaultIdentitySecrets) + service: !empty(serviceType) ? { type: serviceType } : null + registries: usePrivateRegistry ? [ + { + server: '${containerRegistryName}.${containerRegistryHostSuffix}' + identity: userIdentity.id + } + ] : [] + } + template: { + serviceBinds: !empty(serviceBinds) ? serviceBinds : null + containers: [ + { + image: !empty(imageName) ? imageName : 'mcr.microsoft.com/azuredocs/containerapps-helloworld:latest' + name: containerName + env: env + resources: { + cpu: json(containerCpuCoreCount) + memory: containerMemory + } + } + ] + scale: { + minReplicas: containerMinReplicas + maxReplicas: containerMaxReplicas + } + } + } +} + +resource containerAppsEnvironment 'Microsoft.App/managedEnvironments@2023-05-01' existing = { + name: containerAppsEnvironmentName +} + +output defaultDomain string = containerAppsEnvironment.properties.defaultDomain +output identityPrincipalId string = normalizedIdentityType == 'None' ? '' : (empty(identityName) ? app.identity.principalId : userIdentity.properties.principalId) +output imageName string = imageName +output name string = app.name +output serviceBind object = !empty(serviceType) ? { serviceId: app.id, name: name } : {} +output uri string = ingressEnabled ? 'https://${app.properties.configuration.ingress.fqdn}' : '' +output id string = app.id diff --git a/infra/core/host/container-apps.bicep b/infra/core/host/container-apps.bicep new file mode 100644 index 0000000000..6aa856514d --- /dev/null +++ b/infra/core/host/container-apps.bicep @@ -0,0 +1,56 @@ +metadata description = 'Creates an Azure Container Registry and an Azure Container Apps environment.' +param name string +param location string = resourceGroup().location +param tags object = {} + +param containerAppsEnvironmentName string +param containerRegistryName string +param containerRegistryResourceGroupName string = '' +param containerRegistryAdminUserEnabled bool = false +param logAnalyticsWorkspaceResourceId string +param applicationInsightsName string = '' // Not used here, was used for DAPR +param virtualNetworkSubnetId string = '' + +@description('Optional user assigned identity IDs to assign to the resource') +param userAssignedIdentityResourceIds array = [] + +module containerAppsEnvironment 'br/public:avm/res/app/managed-environment:0.5.2' = { + name: '${name}-container-apps-environment' + params: { + // Required parameters + logAnalyticsWorkspaceResourceId: logAnalyticsWorkspaceResourceId + + managedIdentities: empty(userAssignedIdentityResourceIds) ? { + systemAssigned: true + } : { + userAssignedResourceIds: userAssignedIdentityResourceIds + } + + name: containerAppsEnvironmentName + // Non-required parameters + infrastructureResourceGroupName: containerRegistryResourceGroupName + infrastructureSubnetId: virtualNetworkSubnetId + // internal: true + location: location + tags: tags + zoneRedundant: false + } +} + +module containerRegistry 'br/public:avm/res/container-registry/registry:0.3.1' = { + name: '${name}-container-registry' + scope: !empty(containerRegistryResourceGroupName) ? resourceGroup(containerRegistryResourceGroupName) : resourceGroup() + params: { + name: containerRegistryName + location: location + acrAdminUserEnabled: containerRegistryAdminUserEnabled + tags: tags + } +} + +output defaultDomain string = containerAppsEnvironment.outputs.defaultDomain +output environmentName string = containerAppsEnvironment.outputs.name +output environmentId string = containerAppsEnvironment.outputs.resourceId + +output registryLoginServer string = containerRegistry.outputs.loginServer +output registryName string = containerRegistry.outputs.name diff --git a/infra/core/host/containerapps.bicep.backup b/infra/core/host/containerapps.bicep.backup new file mode 100644 index 0000000000..ccb10e7b62 --- /dev/null +++ b/infra/core/host/containerapps.bicep.backup @@ -0,0 +1,112 @@ +param containerAppName string +param containerAppEnvName string = '${containerAppName}env' +param containerRegistryName string = '${containerAppName}cr' + +@description('Allowed origins') +param allowedOrigins array = [] + +param location string = resourceGroup().location +param logAnalyticsName string +param zoneRedundant bool = false +@description('The port that the container listens on') +param containerTargetPort int = 8000 +@secure() +param envVariables object = {} + +resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = { + name: logAnalyticsName +} + +resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' = { + name: containerRegistryName + location: location + sku: { + name: 'Basic' + } + properties: { + adminUserEnabled: true + } +} + +resource containerAppEnvironment 'Microsoft.App/managedEnvironments@2023-05-01' = { + name: containerAppEnvName + location: location + properties: { + appLogsConfiguration: { + destination: 'log-analytics' + logAnalyticsConfiguration: { + customerId: logAnalyticsWorkspace.properties.customerId + sharedKey: logAnalyticsWorkspace.listKeys().primarySharedKey + } + } + + workloadProfiles: [ + { + name: 'Consumption' + workloadProfileType: 'Consumption' + } + ] + + zoneRedundant: zoneRedundant + } +} + +resource myContainerApp 'Microsoft.App/containerApps@2023-05-01' = { + name: containerAppName + location: location + properties: { + managedEnvironmentId: containerAppEnvironment.id + configuration: { + ingress: { + transport: 'http' + external: true + targetPort: containerTargetPort + allowInsecure: false + traffic: [ + { + latestRevision: true + weight: 100 + } + ] + corsPolicy: { + allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) + } + } + registries: [ + { + server: containerRegistry.properties.loginServer + username: containerRegistry.name + passwordSecretRef: 'container-registry-password' + } + ] + secrets: [ + { + name: 'container-registry-password' + value: containerRegistry.listCredentials().passwords[0].value + } + ] + } + template: { + containers: [ + { + name: 'ai-search' + image: '${containerRegistry}.azurecr.io/ai-search:latest' + resources: { + cpu: 1 + memory: '2Gi' + } + env: [ + for key in objectKeys(envVariables): { + name: key + value: envVariables[key] + } + ] + } + ] + scale: { + minReplicas: 0 + maxReplicas: 1 + } + } + } +} diff --git a/infra/core/security/aca-identity.bicep b/infra/core/security/aca-identity.bicep new file mode 100644 index 0000000000..f2dc303185 --- /dev/null +++ b/infra/core/security/aca-identity.bicep @@ -0,0 +1,9 @@ +param identityName string +param location string + +resource webIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' = { + name: identityName + location: location +} + +output principalId string = webIdentity.properties.principalId diff --git a/infra/core/security/registry-access.bicep b/infra/core/security/registry-access.bicep new file mode 100644 index 0000000000..fc66837a12 --- /dev/null +++ b/infra/core/security/registry-access.bicep @@ -0,0 +1,19 @@ +metadata description = 'Assigns ACR Pull permissions to access an Azure Container Registry.' +param containerRegistryName string +param principalId string + +var acrPullRole = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d') + +resource aksAcrPull 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: containerRegistry // Use when specifying a scope that is different than the deployment scope + name: guid(subscription().id, resourceGroup().id, principalId, acrPullRole) + properties: { + roleDefinitionId: acrPullRole + principalType: 'ServicePrincipal' + principalId: principalId + } +} + +resource containerRegistry 'Microsoft.ContainerRegistry/registries@2023-01-01-preview' existing = { + name: containerRegistryName +} diff --git a/infra/main-aca.bicep b/infra/main-aca.bicep new file mode 100644 index 0000000000..c21d87cac6 --- /dev/null +++ b/infra/main-aca.bicep @@ -0,0 +1,890 @@ +targetScope = 'subscription' + +@minLength(1) +@maxLength(64) +@description('Name of the the environment which is used to generate a short unique hash used in all resources.') +param environmentName string + +@minLength(1) +@description('Primary location for all resources') +param location string + +param appServicePlanName string = '' // Set in main.parameters.json +param backendServiceName string = '' // Set in main.parameters.json +param resourceGroupName string = '' // Set in main.parameters.json + +// ACA parametors +param containerAppsEnvironmentName string +param containerRegistryName string +param webAppExists bool +param identityName string + +param applicationInsightsDashboardName string = '' // Set in main.parameters.json +param applicationInsightsName string = '' // Set in main.parameters.json +param logAnalyticsName string = '' // Set in main.parameters.json + +param searchServiceName string = '' // Set in main.parameters.json +param searchServiceResourceGroupName string = '' // Set in main.parameters.json +param searchServiceLocation string = '' // Set in main.parameters.json +// The free tier does not support managed identity (required) or semantic search (optional) +@allowed([ 'free', 'basic', 'standard', 'standard2', 'standard3', 'storage_optimized_l1', 'storage_optimized_l2' ]) +param searchServiceSkuName string // Set in main.parameters.json +param searchIndexName string // Set in main.parameters.json +param searchQueryLanguage string // Set in main.parameters.json +param searchQuerySpeller string // Set in main.parameters.json +param searchServiceSemanticRankerLevel string // Set in main.parameters.json +var actualSearchServiceSemanticRankerLevel = (searchServiceSkuName == 'free') ? 'disabled' : searchServiceSemanticRankerLevel + +param storageAccountName string = '' // Set in main.parameters.json +param storageResourceGroupName string = '' // Set in main.parameters.json +param storageResourceGroupLocation string = location +param storageContainerName string = 'content' +param storageSkuName string // Set in main.parameters.json + +param userStorageAccountName string = '' +param userStorageContainerName string = 'user-content' + +param appServiceSkuName string // Set in main.parameters.json + +@allowed([ 'azure', 'openai', 'azure_custom' ]) +param openAiHost string // Set in main.parameters.json +param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure') +param deployAzureOpenAi bool = openAiHost == 'azure' +param azureOpenAiCustomUrl string = '' +param azureOpenAiApiVersion string = '' +@secure() +param azureOpenAiApiKey string = '' +param openAiServiceName string = '' +param openAiResourceGroupName string = '' + +param speechServiceResourceGroupName string = '' +param speechServiceLocation string = '' +param speechServiceName string = '' +param speechServiceSkuName string // Set in main.parameters.json +param useGPT4V bool = false + +@description('Location for the OpenAI resource group') +@allowed([ 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'switzerlandnorth', 'uksouth', 'japaneast', 'northcentralus', 'australiaeast', 'swedencentral' ]) +@metadata({ + azd: { + type: 'location' + } +}) +param openAiResourceGroupLocation string + +param openAiSkuName string = 'S0' + +@secure() +param openAiApiKey string = '' +param openAiApiOrganization string = '' + +param documentIntelligenceServiceName string = '' // Set in main.parameters.json +param documentIntelligenceResourceGroupName string = '' // Set in main.parameters.json + +// Limited regions for new version: +// https://learn.microsoft.com/azure/ai-services/document-intelligence/concept-layout +@description('Location for the Document Intelligence resource group') +@allowed([ 'eastus', 'westus2', 'westeurope' ]) +@metadata({ + azd: { + type: 'location' + } +}) +param documentIntelligenceResourceGroupLocation string + +param documentIntelligenceSkuName string // Set in main.parameters.json + +param computerVisionServiceName string = '' // Set in main.parameters.json +param computerVisionResourceGroupName string = '' // Set in main.parameters.json +param computerVisionResourceGroupLocation string = '' // Set in main.parameters.json +param computerVisionSkuName string // Set in main.parameters.json + +param chatGptModelName string = '' +param chatGptDeploymentName string = '' +param chatGptDeploymentVersion string = '' +param chatGptDeploymentCapacity int = 0 +var chatGpt = { + modelName: !empty(chatGptModelName) ? chatGptModelName : startsWith(openAiHost, 'azure') ? 'gpt-35-turbo' : 'gpt-3.5-turbo' + deploymentName: !empty(chatGptDeploymentName) ? chatGptDeploymentName : 'chat' + deploymentVersion: !empty(chatGptDeploymentVersion) ? chatGptDeploymentVersion : '0613' + deploymentCapacity: chatGptDeploymentCapacity != 0 ? chatGptDeploymentCapacity : 30 +} + +param embeddingModelName string = '' +param embeddingDeploymentName string = '' +param embeddingDeploymentVersion string = '' +param embeddingDeploymentCapacity int = 0 +param embeddingDimensions int = 0 +var embedding = { + modelName: !empty(embeddingModelName) ? embeddingModelName : 'text-embedding-ada-002' + deploymentName: !empty(embeddingDeploymentName) ? embeddingDeploymentName : 'embedding' + deploymentVersion: !empty(embeddingDeploymentVersion) ? embeddingDeploymentVersion : '2' + deploymentCapacity: embeddingDeploymentCapacity != 0 ? embeddingDeploymentCapacity : 30 + dimensions: embeddingDimensions != 0 ? embeddingDimensions : 1536 +} + +param gpt4vModelName string = 'gpt-4o' +param gpt4vDeploymentName string = 'gpt-4o' +param gpt4vModelVersion string = '2024-05-13' +param gpt4vDeploymentCapacity int = 10 + +param tenantId string = tenant().tenantId +param authTenantId string = '' + +// Used for the optional login and document level access control system +param useAuthentication bool = false +param enforceAccessControl bool = false +param enableGlobalDocuments bool = false +param enableUnauthenticatedAccess bool = false +param serverAppId string = '' +@secure() +param serverAppSecret string = '' +param clientAppId string = '' +@secure() +param clientAppSecret string = '' + +// Used for optional CORS support for alternate frontends +param allowedOrigin string = '' // should start with https://, shouldn't end with a / + +@allowed([ 'None', 'AzureServices' ]) +@description('If allowedIp is set, whether azure services are allowed to bypass the storage and AI services firewall.') +param bypass string = 'AzureServices' + +@description('Public network access value for all deployed resources') +@allowed([ 'Enabled', 'Disabled' ]) +param publicNetworkAccess string = 'Enabled' + +@description('Add a private endpoints for network connectivity') +param usePrivateEndpoint bool = false + +@description('Id of the user or app to assign application roles') +param principalId string = '' + +@description('Use Application Insights for monitoring and performance tracing') +param useApplicationInsights bool = false + +@description('Use speech recognition feature in browser') +param useSpeechInputBrowser bool = false +@description('Use speech synthesis in browser') +param useSpeechOutputBrowser bool = false +@description('Use Azure speech service for reading out text') +param useSpeechOutputAzure bool = false +@description('Show options to use vector embeddings for searching in the app UI') +param useVectors bool = false +@description('Use Built-in integrated Vectorization feature of AI Search to vectorize and ingest documents') +param useIntegratedVectorization bool = false + +@description('Enable user document upload feature') +param useUserUpload bool = false +param useLocalPdfParser bool = false +param useLocalHtmlParser bool = false + +var abbrs = loadJsonContent('abbreviations.json') +var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) +var tags = { 'azd-env-name': environmentName } + +var tenantIdForAuth = !empty(authTenantId) ? authTenantId : tenantId +var authenticationIssuerUri = '${environment().authentication.loginEndpoint}${tenantIdForAuth}/v2.0' + +@description('Whether the deployment is running on GitHub Actions') +param runningOnGh string = '' + +@description('Whether the deployment is running on Azure DevOps Pipeline') +param runningOnAdo string = '' + +// Organize resources in a resource group +resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { + name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' + location: location + tags: tags +} + +resource openAiResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(openAiResourceGroupName)) { + name: !empty(openAiResourceGroupName) ? openAiResourceGroupName : resourceGroup.name +} + +resource documentIntelligenceResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(documentIntelligenceResourceGroupName)) { + name: !empty(documentIntelligenceResourceGroupName) ? documentIntelligenceResourceGroupName : resourceGroup.name +} + +resource computerVisionResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(computerVisionResourceGroupName)) { + name: !empty(computerVisionResourceGroupName) ? computerVisionResourceGroupName : resourceGroup.name +} + +resource searchServiceResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(searchServiceResourceGroupName)) { + name: !empty(searchServiceResourceGroupName) ? searchServiceResourceGroupName : resourceGroup.name +} + +resource storageResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(storageResourceGroupName)) { + name: !empty(storageResourceGroupName) ? storageResourceGroupName : resourceGroup.name +} + +resource speechResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(speechServiceResourceGroupName)) { + name: !empty(speechServiceResourceGroupName) ? speechServiceResourceGroupName : resourceGroup.name +} + +// Monitor application with Azure Monitor +module monitoring 'core/monitor/monitoring.bicep' = if (useApplicationInsights) { + name: 'monitoring' + scope: resourceGroup + params: { + location: location + tags: tags + applicationInsightsName: !empty(applicationInsightsName) ? applicationInsightsName : '${abbrs.insightsComponents}${resourceToken}' + logAnalyticsName: !empty(logAnalyticsName) ? logAnalyticsName : '${abbrs.operationalInsightsWorkspaces}${resourceToken}' + publicNetworkAccess: publicNetworkAccess + } +} + +module applicationInsightsDashboard 'backend-dashboard.bicep' = if (useApplicationInsights) { + name: 'application-insights-dashboard' + scope: resourceGroup + params: { + name: !empty(applicationInsightsDashboardName) ? applicationInsightsDashboardName : '${abbrs.portalDashboards}${resourceToken}' + location: location + applicationInsightsName: useApplicationInsights ? monitoring.outputs.applicationInsightsName : '' + } +} + +// Create an App Service Plan to group applications under the same payment plan and SKU +/* +module appServicePlan 'core/host/appserviceplan.bicep' = { + name: 'appserviceplan' + scope: resourceGroup + params: { + name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}' + location: location + tags: tags + sku: { + name: appServiceSkuName + capacity: 1 + } + kind: 'linux' + } +} +*/ + + +module acaIdentity 'core/security/aca-identity.bicep' = { + name: identityName + scope: resourceGroup + params: { + identityName: identityName + location: location + } +} + +module containerApps 'core/host/container-apps.bicep' = { + name: 'container-apps' + scope: resourceGroup + params: { + name: 'app' + location: location + containerAppsEnvironmentName: containerAppsEnvironmentName + containerRegistryName: containerRegistryName + logAnalyticsWorkspaceResourceId: monitoring.outputs.logAnalyticsWorkspaceId + // virtualNetworkSubnetId: virtualNetwork.outputs.subnetResourceIds[1] + } +} + +// The application frontend +module backend 'core/host/container-app-upsert.bicep' = { + name: 'aca-web' + scope: resourceGroup + params: { + name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesAppService}backend-${resourceToken}' + location: location + identityName: acaIdentity.name + exists: webAppExists + containerRegistryName: containerApps.outputs.registryName + containerAppsEnvironmentName: containerApps.outputs.environmentName + identityType: 'UserAssigned' + tags: union(tags, { 'azd-service-name': 'backend' }) + targetPort: 8000 + containerCpuCoreCount: '2.0' + containerMemory: '4Gi' + env: { + AZURE_STORAGE_ACCOUNT: storage.outputs.name + AZURE_STORAGE_CONTAINER: storageContainerName + AZURE_SEARCH_INDEX: searchIndexName + AZURE_SEARCH_SERVICE: searchService.outputs.name + AZURE_SEARCH_SEMANTIC_RANKER: actualSearchServiceSemanticRankerLevel + AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' + AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage + AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller + APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' + AZURE_SPEECH_SERVICE_ID: useSpeechOutputAzure ? speech.outputs.resourceId : '' + AZURE_SPEECH_SERVICE_LOCATION: useSpeechOutputAzure ? speech.outputs.location : '' + USE_SPEECH_INPUT_BROWSER: useSpeechInputBrowser + USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser + USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure + // Shared by all OpenAI deployments + OPENAI_HOST: openAiHost + AZURE_OPENAI_EMB_MODEL_NAME: embedding.modelName + AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions + AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName + AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName + // Specific to Azure OpenAI + AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' + AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName + AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName + AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' + AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion + AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey + AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl + // Used only with non-Azure OpenAI deployments + OPENAI_API_KEY: openAiApiKey + OPENAI_ORGANIZATION: openAiApiOrganization + // Optional login and document level access control system + AZURE_USE_AUTHENTICATION: useAuthentication + AZURE_ENFORCE_ACCESS_CONTROL: enforceAccessControl + AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: enableGlobalDocuments + AZURE_ENABLE_UNAUTHENTICATED_ACCESS: enableUnauthenticatedAccess + AZURE_SERVER_APP_ID: serverAppId + AZURE_SERVER_APP_SECRET: serverAppSecret + AZURE_CLIENT_APP_ID: clientAppId + AZURE_CLIENT_APP_SECRET: clientAppSecret + AZURE_TENANT_ID: tenantId + AZURE_AUTH_TENANT_ID: tenantIdForAuth + AZURE_AUTHENTICATION_ISSUER_URI: authenticationIssuerUri + // CORS support, for frontends on other hosts + ALLOWED_ORIGIN: allowedOrigin + USE_VECTORS: useVectors + USE_GPT4V: useGPT4V + USE_USER_UPLOAD: useUserUpload + AZURE_USERSTORAGE_ACCOUNT: useUserUpload ? userStorage.outputs.name : '' + AZURE_USERSTORAGE_CONTAINER: useUserUpload ? userStorageContainerName : '' + AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name + USE_LOCAL_PDF_PARSER: useLocalPdfParser + USE_LOCAL_HTML_PARSER: useLocalHtmlParser + } + } +} + +var defaultOpenAiDeployments = [ + { + name: chatGpt.deploymentName + model: { + format: 'OpenAI' + name: chatGpt.modelName + version: chatGpt.deploymentVersion + } + sku: { + name: 'Standard' + capacity: chatGpt.deploymentCapacity + } + } + { + name: embedding.deploymentName + model: { + format: 'OpenAI' + name: embedding.modelName + version: embedding.deploymentVersion + } + sku: { + name: 'Standard' + capacity: embedding.deploymentCapacity + } + } +] + +var openAiDeployments = concat(defaultOpenAiDeployments, useGPT4V ? [ + { + name: gpt4vDeploymentName + model: { + format: 'OpenAI' + name: gpt4vModelName + version: gpt4vModelVersion + } + sku: { + name: 'Standard' + capacity: gpt4vDeploymentCapacity + } + } + ] : []) + +module openAi 'br/public:avm/res/cognitive-services/account:0.5.4' = if (isAzureOpenAiHost && deployAzureOpenAi) { + name: 'openai' + scope: openAiResourceGroup + params: { + name: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' + location: openAiResourceGroupLocation + tags: tags + kind: 'OpenAI' + customSubDomainName: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' + publicNetworkAccess: publicNetworkAccess + networkAcls: { + defaultAction: 'Allow' + bypass: bypass + } + sku: openAiSkuName + deployments: openAiDeployments + disableLocalAuth: true + } +} + +// Formerly known as Form Recognizer +// Does not support bypass +module documentIntelligence 'br/public:avm/res/cognitive-services/account:0.5.4' = { + name: 'documentintelligence' + scope: documentIntelligenceResourceGroup + params: { + name: !empty(documentIntelligenceServiceName) ? documentIntelligenceServiceName : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' + kind: 'FormRecognizer' + customSubDomainName: !empty(documentIntelligenceServiceName) ? documentIntelligenceServiceName : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' + publicNetworkAccess: publicNetworkAccess + networkAcls: { + defaultAction: 'Allow' + } + location: documentIntelligenceResourceGroupLocation + disableLocalAuth: true + tags: tags + sku: documentIntelligenceSkuName + } +} + +module computerVision 'br/public:avm/res/cognitive-services/account:0.5.4' = if (useGPT4V) { + name: 'computerVision' + scope: computerVisionResourceGroup + params: { + name: !empty(computerVisionServiceName) + ? computerVisionServiceName + : '${abbrs.cognitiveServicesComputerVision}${resourceToken}' + kind: 'ComputerVision' + networkAcls: { + defaultAction: 'Allow' + } + customSubDomainName: !empty(computerVisionServiceName) + ? computerVisionServiceName + : '${abbrs.cognitiveServicesComputerVision}${resourceToken}' + location: computerVisionResourceGroupLocation + tags: tags + sku: computerVisionSkuName + } +} + +module speech 'br/public:avm/res/cognitive-services/account:0.5.4' = if (useSpeechOutputAzure) { + name: 'speech-service' + scope: speechResourceGroup + params: { + name: !empty(speechServiceName) ? speechServiceName : '${abbrs.cognitiveServicesSpeech}${resourceToken}' + kind: 'SpeechServices' + networkAcls: { + defaultAction: 'Allow' + } + customSubDomainName: !empty(speechServiceName) ? speechServiceName : '${abbrs.cognitiveServicesSpeech}${resourceToken}' + location: !empty(speechServiceLocation) ? speechServiceLocation : location + tags: tags + sku: speechServiceSkuName + } +} +module searchService 'core/search/search-services.bicep' = { + name: 'search-service' + scope: searchServiceResourceGroup + params: { + name: !empty(searchServiceName) ? searchServiceName : 'gptkb-${resourceToken}' + location: !empty(searchServiceLocation) ? searchServiceLocation : location + tags: tags + disableLocalAuth: true + sku: { + name: searchServiceSkuName + } + semanticSearch: actualSearchServiceSemanticRankerLevel + publicNetworkAccess: publicNetworkAccess == 'Enabled' ? 'enabled' : (publicNetworkAccess == 'Disabled' ? 'disabled' : null) + sharedPrivateLinkStorageAccounts: usePrivateEndpoint ? [ storage.outputs.id ] : [] + } +} + +module searchDiagnostics 'core/search/search-diagnostics.bicep' = if (useApplicationInsights) { + name: 'search-diagnostics' + scope: searchServiceResourceGroup + params: { + searchServiceName: searchService.outputs.name + workspaceId: useApplicationInsights ? monitoring.outputs.logAnalyticsWorkspaceId : '' + } +} + +module storage 'core/storage/storage-account.bicep' = { + name: 'storage' + scope: storageResourceGroup + params: { + name: !empty(storageAccountName) ? storageAccountName : '${abbrs.storageStorageAccounts}${resourceToken}' + location: storageResourceGroupLocation + tags: tags + publicNetworkAccess: publicNetworkAccess + bypass: bypass + allowBlobPublicAccess: false + allowSharedKeyAccess: false + sku: { + name: storageSkuName + } + deleteRetentionPolicy: { + enabled: true + days: 2 + } + containers: [ + { + name: storageContainerName + publicAccess: 'None' + } + ] + } +} + +module userStorage 'core/storage/storage-account.bicep' = if (useUserUpload) { + name: 'user-storage' + scope: storageResourceGroup + params: { + name: !empty(userStorageAccountName) ? userStorageAccountName : 'user${abbrs.storageStorageAccounts}${resourceToken}' + location: storageResourceGroupLocation + tags: tags + publicNetworkAccess: publicNetworkAccess + bypass: bypass + allowBlobPublicAccess: false + allowSharedKeyAccess: false + isHnsEnabled: true + sku: { + name: storageSkuName + } + containers: [ + { + name: userStorageContainerName + publicAccess: 'None' + } + ] + } +} + +// USER ROLES +var principalType = empty(runningOnGh) && empty(runningOnAdo) ? 'User' : 'ServicePrincipal' + +module openAiRoleUser 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi) { + scope: openAiResourceGroup + name: 'openai-role-user' + params: { + principalId: principalId + roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' + principalType: principalType + } +} + +// For both document intelligence and computer vision +module cognitiveServicesRoleUser 'core/security/role.bicep' = { + scope: resourceGroup + name: 'cognitiveservices-role-user' + params: { + principalId: principalId + roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' + principalType: principalType + } +} + +module speechRoleUser 'core/security/role.bicep' = { + scope: speechResourceGroup + name: 'speech-role-user' + params: { + principalId: principalId + roleDefinitionId: 'f2dc8367-1007-4938-bd23-fe263f013447' + principalType: principalType + } +} + +module storageRoleUser 'core/security/role.bicep' = { + scope: storageResourceGroup + name: 'storage-role-user' + params: { + principalId: principalId + roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' + principalType: principalType + } +} + +module storageContribRoleUser 'core/security/role.bicep' = { + scope: storageResourceGroup + name: 'storage-contrib-role-user' + params: { + principalId: principalId + roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' + principalType: principalType + } +} + +module storageOwnerRoleUser 'core/security/role.bicep' = if (useUserUpload) { + scope: storageResourceGroup + name: 'storage-owner-role-user' + params: { + principalId: principalId + roleDefinitionId: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' + principalType: principalType + } +} + +module searchRoleUser 'core/security/role.bicep' = { + scope: searchServiceResourceGroup + name: 'search-role-user' + params: { + principalId: principalId + roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f' + principalType: principalType + } +} + +module searchContribRoleUser 'core/security/role.bicep' = { + scope: searchServiceResourceGroup + name: 'search-contrib-role-user' + params: { + principalId: principalId + roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' + principalType: principalType + } +} + +module searchSvcContribRoleUser 'core/security/role.bicep' = { + scope: searchServiceResourceGroup + name: 'search-svccontrib-role-user' + params: { + principalId: principalId + roleDefinitionId: '7ca78c08-252a-4471-8644-bb5ff32d4ba0' + principalType: principalType + } +} + +// SYSTEM IDENTITIES +module openAiRoleBackend 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi) { + scope: openAiResourceGroup + name: 'openai-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' + principalType: 'ServicePrincipal' + } +} + +module openAiRoleSearchService 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi && useIntegratedVectorization) { + scope: openAiResourceGroup + name: 'openai-role-searchservice' + params: { + principalId: searchService.outputs.principalId + roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' + principalType: 'ServicePrincipal' + } +} + +module storageRoleBackend 'core/security/role.bicep' = { + scope: storageResourceGroup + name: 'storage-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' + principalType: 'ServicePrincipal' + } +} + +module storageOwnerRoleBackend 'core/security/role.bicep' = if (useUserUpload) { + scope: storageResourceGroup + name: 'storage-owner-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' + principalType: 'ServicePrincipal' + } +} + +module storageRoleSearchService 'core/security/role.bicep' = if (useIntegratedVectorization) { + scope: storageResourceGroup + name: 'storage-role-searchservice' + params: { + principalId: searchService.outputs.principalId + roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' + principalType: 'ServicePrincipal' + } +} + +// Used to issue search queries +// https://learn.microsoft.com/azure/search/search-security-rbac +module searchRoleBackend 'core/security/role.bicep' = { + scope: searchServiceResourceGroup + name: 'search-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f' + principalType: 'ServicePrincipal' + } +} + +module speechRoleBackend 'core/security/role.bicep' = { + scope: speechResourceGroup + name: 'speech-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: 'f2dc8367-1007-4938-bd23-fe263f013447' + principalType: 'ServicePrincipal' + } +} + +/* +module isolation 'network-isolation.bicep' = { + name: 'networks' + scope: resourceGroup + params: { + location: location + tags: tags + vnetName: '${abbrs.virtualNetworks}${resourceToken}' + appServicePlanName: appServicePlan.outputs.name + usePrivateEndpoint: usePrivateEndpoint + } +} + */ + +var environmentData = environment() + +var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi) ? [{ + groupId: 'account' + dnsZoneName: 'privatelink.openai.azure.com' + resourceIds: concat( + [ openAi.outputs.resourceId ], + useGPT4V ? [ computerVision.outputs.resourceId ] : [], + !useLocalPdfParser ? [ documentIntelligence.outputs.resourceId ] : [] + ) +}] : [] +var otherPrivateEndpointConnections = usePrivateEndpoint ? [ + { + groupId: 'blob' + dnsZoneName: 'privatelink.blob.${environmentData.suffixes.storage}' + resourceIds: concat( + [ storage.outputs.id ], + useUserUpload ? [ userStorage.outputs.id ] : [] + ) + } + { + groupId: 'searchService' + dnsZoneName: 'privatelink.search.windows.net' + resourceIds: [ searchService.outputs.id ] + } + { + groupId: 'sites' + dnsZoneName: 'privatelink.azurewebsites.net' + resourceIds: [ backend.outputs.id ] + } +] : [] + + +/* +var privateEndpointConnections = concat(otherPrivateEndpointConnections, openAiPrivateEndpointConnection) + +module privateEndpoints 'private-endpoints.bicep' = if (usePrivateEndpoint) { + name: 'privateEndpoints' + scope: resourceGroup + params: { + location: location + tags: tags + resourceToken: resourceToken + privateEndpointConnections: privateEndpointConnections + applicationInsightsId: useApplicationInsights ? monitoring.outputs.applicationInsightsId : '' + logAnalyticsWorkspaceId: useApplicationInsights ? monitoring.outputs.logAnalyticsWorkspaceId : '' + vnetName: isolation.outputs.vnetName + vnetPeSubnetName: isolation.outputs.backendSubnetId + } +} + */ + +// Used to read index definitions (required when using authentication) +// https://learn.microsoft.com/azure/search/search-security-rbac +module searchReaderRoleBackend 'core/security/role.bicep' = if (useAuthentication) { + scope: searchServiceResourceGroup + name: 'search-reader-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' + principalType: 'ServicePrincipal' + } +} + +// Used to add/remove documents from index (required for user upload feature) +module searchContribRoleBackend 'core/security/role.bicep' = if (useUserUpload) { + scope: searchServiceResourceGroup + name: 'search-contrib-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' + principalType: 'ServicePrincipal' + } +} + +// For computer vision access by the backend +module computerVisionRoleBackend 'core/security/role.bicep' = if (useGPT4V) { + scope: computerVisionResourceGroup + name: 'computervision-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' + principalType: 'ServicePrincipal' + } +} + +// For document intelligence access by the backend +module documentIntelligenceRoleBackend 'core/security/role.bicep' = if (useUserUpload) { + scope: documentIntelligenceResourceGroup + name: 'documentintelligence-role-backend' + params: { + principalId: backend.outputs.identityPrincipalId + roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' + principalType: 'ServicePrincipal' + } +} + +output AZURE_LOCATION string = location +output AZURE_TENANT_ID string = tenantId +output AZURE_AUTH_TENANT_ID string = authTenantId +output AZURE_RESOURCE_GROUP string = resourceGroup.name + +// Shared by all OpenAI deployments +output OPENAI_HOST string = openAiHost +output AZURE_OPENAI_EMB_MODEL_NAME string = embedding.modelName +output AZURE_OPENAI_CHATGPT_MODEL string = chatGpt.modelName +output AZURE_OPENAI_GPT4V_MODEL string = gpt4vModelName + +// Specific to Azure OpenAI +output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' +output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : '' +output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : '' +output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : '' +output AZURE_OPENAI_EMB_DEPLOYMENT string = isAzureOpenAiHost ? embedding.deploymentName : '' +output AZURE_OPENAI_GPT4V_DEPLOYMENT string = isAzureOpenAiHost ? gpt4vDeploymentName : '' + +output AZURE_SPEECH_SERVICE_ID string = useSpeechOutputAzure ? speech.outputs.resourceId : '' +output AZURE_SPEECH_SERVICE_LOCATION string = useSpeechOutputAzure ? speech.outputs.location : '' + +output AZURE_VISION_ENDPOINT string = useGPT4V ? computerVision.outputs.endpoint : '' + +output AZURE_DOCUMENTINTELLIGENCE_SERVICE string = documentIntelligence.outputs.name +output AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP string = documentIntelligenceResourceGroup.name + +output AZURE_SEARCH_INDEX string = searchIndexName +output AZURE_SEARCH_SERVICE string = searchService.outputs.name +output AZURE_SEARCH_SERVICE_RESOURCE_GROUP string = searchServiceResourceGroup.name +output AZURE_SEARCH_SEMANTIC_RANKER string = actualSearchServiceSemanticRankerLevel +output AZURE_SEARCH_SERVICE_ASSIGNED_USERID string = searchService.outputs.principalId + +output AZURE_STORAGE_ACCOUNT string = storage.outputs.name +output AZURE_STORAGE_CONTAINER string = storageContainerName +output AZURE_STORAGE_RESOURCE_GROUP string = storageResourceGroup.name + +output AZURE_USERSTORAGE_ACCOUNT string = useUserUpload ? userStorage.outputs.name : '' +output AZURE_USERSTORAGE_CONTAINER string = userStorageContainerName +output AZURE_USERSTORAGE_RESOURCE_GROUP string = storageResourceGroup.name + +output AZURE_USE_AUTHENTICATION bool = useAuthentication + +output BACKEND_URI string = backend.outputs.uri + +output AZURE_CONTAINER_ENVIRONMENT_NAME string = containerApps.outputs.environmentName +output AZURE_CONTAINER_REGISTRY_ENDPOINT string = containerApps.outputs.registryLoginServer +output AZURE_CONTAINER_REGISTRY_NAME string = containerApps.outputs.registryName + + +// output SERVICE_WEB_IDENTITY_PRINCIPAL_ID string = webIdentity.properties.principalId +output SERVICE_WEB_NAME string = backend.outputs.name +output SERVICE_WEB_URI string = backend.outputs.uri +output SERVICE_WEB_IMAGE_NAME string = backend.outputs.imageName + diff --git a/infra/main-aca.parameters.json b/infra/main-aca.parameters.json new file mode 100644 index 0000000000..566c148abb --- /dev/null +++ b/infra/main-aca.parameters.json @@ -0,0 +1,240 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "environmentName": { + "value": "${AZURE_ENV_NAME}" + }, + "resourceGroupName": { + "value": "${AZURE_RESOURCE_GROUP}" + }, + "location": { + "value": "${AZURE_LOCATION}" + }, + "principalId": { + "value": "${AZURE_PRINCIPAL_ID}" + }, + "openAiServiceName": { + "value": "${AZURE_OPENAI_SERVICE}" + }, + "openAiResourceGroupName": { + "value": "${AZURE_OPENAI_RESOURCE_GROUP}" + }, + "openAiSkuName": { + "value": "S0" + }, + "computerVisionServiceName": { + "value": "${AZURE_COMPUTER_VISION_SERVICE}" + }, + "computerVisionResourceGroupName": { + "value": "${AZURE_COMPUTER_VISION_RESOURCE_GROUP}" + }, + "computerVisionResourceGroupLocation": { + "value": "${AZURE_COMPUTER_VISION_LOCATION=eastus}" + }, + "computerVisionSkuName": { + "value": "${AZURE_COMPUTER_VISION_SKU=S1}" + }, + "documentIntelligenceServiceName": { + "value": "${AZURE_DOCUMENTINTELLIGENCE_SERVICE}" + }, + "documentIntelligenceResourceGroupName": { + "value": "${AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP}" + }, + "documentIntelligenceSkuName": { + "value": "${AZURE_DOCUMENTINTELLIGENCE_SKU=S0}" + }, + "documentIntelligenceResourceGroupLocation": { + "value": "${AZURE_DOCUMENTINTELLIGENCE_LOCATION}" + }, + "searchIndexName": { + "value": "${AZURE_SEARCH_INDEX=gptkbindex}" + }, + "searchServiceName": { + "value": "${AZURE_SEARCH_SERVICE}" + }, + "searchServiceResourceGroupName": { + "value": "${AZURE_SEARCH_SERVICE_RESOURCE_GROUP}" + }, + "searchServiceLocation": { + "value": "${AZURE_SEARCH_SERVICE_LOCATION}" + }, + "searchServiceSkuName": { + "value": "${AZURE_SEARCH_SERVICE_SKU=standard}" + }, + "searchQueryLanguage": { + "value": "${AZURE_SEARCH_QUERY_LANGUAGE=en-us}" + }, + "searchQuerySpeller": { + "value": "${AZURE_SEARCH_QUERY_SPELLER=lexicon}" + }, + "searchServiceSemanticRankerLevel": { + "value": "${AZURE_SEARCH_SEMANTIC_RANKER=free}" + }, + "storageAccountName": { + "value": "${AZURE_STORAGE_ACCOUNT}" + }, + "storageResourceGroupName": { + "value": "${AZURE_STORAGE_RESOURCE_GROUP}" + }, + "storageSkuName": { + "value": "${AZURE_STORAGE_SKU=Standard_LRS}" + }, + "appServicePlanName": { + "value": "${AZURE_APP_SERVICE_PLAN}" + }, + "appServiceSkuName": { + "value": "${AZURE_APP_SERVICE_SKU=B1}" + }, + "backendServiceName": { + "value": "${AZURE_APP_SERVICE}" + }, + "chatGptModelName":{ + "value": "${AZURE_OPENAI_CHATGPT_MODEL}" + }, + "chatGptDeploymentName": { + "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT}" + }, + "chatGptDeploymentVersion":{ + "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION}" + }, + "chatGptDeploymentCapacity":{ + "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY}" + }, + "embeddingModelName":{ + "value": "${AZURE_OPENAI_EMB_MODEL_NAME}" + }, + "embeddingDeploymentName": { + "value": "${AZURE_OPENAI_EMB_DEPLOYMENT}" + }, + "embeddingDeploymentVersion":{ + "value": "${AZURE_OPENAI_EMB_DEPLOYMENT_VERSION}" + }, + "embeddingDeploymentCapacity":{ + "value": "${AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY}" + }, + "embeddingDimensions": { + "value": "${AZURE_OPENAI_EMB_DIMENSIONS}" + }, + "openAiHost": { + "value": "${OPENAI_HOST=azure}" + }, + "azureOpenAiCustomUrl":{ + "value": "${AZURE_OPENAI_CUSTOM_URL}" + }, + "azureOpenAiApiVersion":{ + "value": "${AZURE_OPENAI_API_VERSION}" + }, + "azureOpenAiApiKey":{ + "value": "${AZURE_OPENAI_API_KEY_OVERRIDE}" + }, + "openAiApiKey": { + "value": "${OPENAI_API_KEY}" + }, + "openAiApiOrganization": { + "value": "${OPENAI_ORGANIZATION}" + }, + "useApplicationInsights": { + "value": "${AZURE_USE_APPLICATION_INSIGHTS=true}" + }, + "applicationInsightsName": { + "value": "${AZURE_APPLICATION_INSIGHTS}" + }, + "applicationInsightsDashboardName": { + "value": "${AZURE_APPLICATION_INSIGHTS_DASHBOARD}" + }, + "logAnalyticsName": { + "value": "${AZURE_LOG_ANALYTICS}" + }, + "useVectors": { + "value": "${USE_VECTORS=true}" + }, + "useGPT4V": { + "value": "${USE_GPT4V=false}" + }, + "useSpeechInputBrowser": { + "value": "${USE_SPEECH_INPUT_BROWSER=false}" + }, + "useSpeechOutputBrowser": { + "value": "${USE_SPEECH_OUTPUT_BROWSER=false}" + }, + "useSpeechOutputAzure": { + "value": "${USE_SPEECH_OUTPUT_AZURE=false}" + }, + "speechServiceName": { + "value": "${AZURE_SPEECH_SERVICE}" + }, + "speechServiceSkuName": { + "value": "${AZURE_SPEECH_SERVICE_SKU=S0}" + }, + "speechServiceResourceGroupName": { + "value": "${AZURE_SPEECH_SERVICE_RESOURCE_GROUP}" + }, + "speechServiceLocation": { + "value": "${AZURE_SPEECH_SERVICE_LOCATION}" + }, + "useAuthentication": { + "value": "${AZURE_USE_AUTHENTICATION=false}" + }, + "enforceAccessControl": { + "value": "${AZURE_ENFORCE_ACCESS_CONTROL=false}" + }, + "enableGlobalDocuments": { + "value": "${AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS=false}" + }, + "enableUnauthenticatedAccess": { + "value": "${AZURE_ENABLE_UNAUTHENTICATED_ACCESS=false}" + }, + "tenantId": { + "value": "${AZURE_TENANT_ID}" + }, + "authTenantId": { + "value": "${AZURE_AUTH_TENANT_ID}" + }, + "serverAppId": { + "value": "${AZURE_SERVER_APP_ID}" + }, + "serverAppSecret": { + "value": "${AZURE_SERVER_APP_SECRET}" + }, + "clientAppId": { + "value": "${AZURE_CLIENT_APP_ID}" + }, + "clientAppSecret": { + "value": "${AZURE_CLIENT_APP_SECRET}" + }, + "allowedOrigin": { + "value": "${ALLOWED_ORIGIN}" + }, + "publicNetworkAccess": { + "value": "${AZURE_PUBLIC_NETWORK_ACCESS=Enabled}" + }, + "usePrivateEndpoint": { + "value": "${AZURE_USE_PRIVATE_ENDPOINT=false}" + }, + "bypass": { + "value": "${AZURE_NETWORK_BYPASS=AzureServices}" + }, + "useIntegratedVectorization": { + "value": "${USE_FEATURE_INT_VECTORIZATION}" + }, + "useUserUpload": { + "value": "${USE_USER_UPLOAD}" + }, + "useLocalPdfParser": { + "value": "${USE_LOCAL_PDF_PARSER}" + }, + "useLocalHtmlParser": { + "value": "${USE_LOCAL_HTML_PARSER}" + }, + "runningOnGh": { + "value": "${GITHUB_ACTIONS}" + }, + "runningOnAdo": { + "value": "${TF_BUILD}" + }, + "webAppExists": { + "value": "${SERVICE_WEB_RESOURCE_EXISTS=false}" + } + } +} diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 3fbd3b65a1..566c148abb 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -232,6 +232,9 @@ }, "runningOnAdo": { "value": "${TF_BUILD}" + }, + "webAppExists": { + "value": "${SERVICE_WEB_RESOURCE_EXISTS=false}" } } } From 30f00e5372f5d5cd91e1a1f5eb7796249fdec086 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Fri, 23 Aug 2024 07:31:19 +0000 Subject: [PATCH 02/41] First working version --- infra/core/security/aca-identity.bicep | 1 + infra/main-aca.bicep | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/infra/core/security/aca-identity.bicep b/infra/core/security/aca-identity.bicep index f2dc303185..a7fcd5a459 100644 --- a/infra/core/security/aca-identity.bicep +++ b/infra/core/security/aca-identity.bicep @@ -7,3 +7,4 @@ resource webIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-3 } output principalId string = webIdentity.properties.principalId +output clientId string = webIdentity.properties.clientId diff --git a/infra/main-aca.bicep b/infra/main-aca.bicep index c21d87cac6..05e2eade5f 100644 --- a/infra/main-aca.bicep +++ b/infra/main-aca.bicep @@ -291,6 +291,10 @@ module containerApps 'core/host/container-apps.bicep' = { module backend 'core/host/container-app-upsert.bicep' = { name: 'aca-web' scope: resourceGroup + dependsOn: [ + containerApps + acaIdentity + ] params: { name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesAppService}backend-${resourceToken}' location: location @@ -357,6 +361,8 @@ module backend 'core/host/container-app-upsert.bicep' = { AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name USE_LOCAL_PDF_PARSER: useLocalPdfParser USE_LOCAL_HTML_PARSER: useLocalHtmlParser + // For using managed identity to access Azure resources. See https://github.com/microsoft/azure-container-apps/issues/442 + AZURE_CLIENT_ID: acaIdentity.outputs.clientId } } } @@ -557,6 +563,7 @@ module userStorage 'core/storage/storage-account.bicep' = if (useUserUpload) { // USER ROLES var principalType = empty(runningOnGh) && empty(runningOnAdo) ? 'User' : 'ServicePrincipal' +// var principalType = 'ServicePrincipal' module openAiRoleUser 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi) { scope: openAiResourceGroup @@ -796,7 +803,7 @@ module searchReaderRoleBackend 'core/security/role.bicep' = if (useAuthenticatio params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' - principalType: 'ServicePrincipal' + principalType: principalType } } @@ -807,7 +814,7 @@ module searchContribRoleBackend 'core/security/role.bicep' = if (useUserUpload) params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' - principalType: 'ServicePrincipal' + principalType: principalType } } @@ -818,7 +825,7 @@ module computerVisionRoleBackend 'core/security/role.bicep' = if (useGPT4V) { params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: 'ServicePrincipal' + principalType: principalType } } @@ -829,7 +836,7 @@ module documentIntelligenceRoleBackend 'core/security/role.bicep' = if (useUserU params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: 'ServicePrincipal' + principalType: principalType } } @@ -883,7 +890,7 @@ output AZURE_CONTAINER_REGISTRY_ENDPOINT string = containerApps.outputs.registry output AZURE_CONTAINER_REGISTRY_NAME string = containerApps.outputs.registryName -// output SERVICE_WEB_IDENTITY_PRINCIPAL_ID string = webIdentity.properties.principalId +output SERVICE_WEB_IDENTITY_PRINCIPAL_ID string = backend.outputs.identityPrincipalId output SERVICE_WEB_NAME string = backend.outputs.name output SERVICE_WEB_URI string = backend.outputs.uri output SERVICE_WEB_IMAGE_NAME string = backend.outputs.imageName From 72e34d2d6105a969e7decf601172e7fcfcf58e7c Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Wed, 28 Aug 2024 09:09:22 +0000 Subject: [PATCH 03/41] Support workload profile --- infra/core/host/container-app-upsert.bicep | 4 ++++ infra/core/host/container-app.bicep | 3 +++ infra/core/host/container-apps.bicep | 27 +++++++++++++++++++++- infra/main-aca.bicep | 10 +++++--- infra/main-aca.parameters.json | 3 +++ 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/infra/core/host/container-app-upsert.bicep b/infra/core/host/container-app-upsert.bicep index 3de7660da0..ba8f44147c 100644 --- a/infra/core/host/container-app-upsert.bicep +++ b/infra/core/host/container-app-upsert.bicep @@ -76,6 +76,9 @@ param serviceBinds array = [] @description('The target port for the container') param targetPort int = 80 +@allowed(['Consumption', 'D4', 'D8', 'D16', 'D32', 'E4', 'E8', 'E16', 'E32', 'NC24-A100', 'NC48-A100', 'NC96-A100']) +param workloadProfile string = 'Consumption' + resource existingApp 'Microsoft.App/containerApps@2023-05-02-preview' existing = if (exists) { name: name } @@ -84,6 +87,7 @@ module app 'container-app.bicep' = { name: '${deployment().name}-update' params: { name: name + workloadProfile: workloadProfile location: location tags: tags identityType: identityType diff --git a/infra/core/host/container-app.bicep b/infra/core/host/container-app.bicep index 0dd2957941..af41cb91e2 100644 --- a/infra/core/host/container-app.bicep +++ b/infra/core/host/container-app.bicep @@ -79,6 +79,8 @@ param serviceType string = '' @description('The target port for the container') param targetPort int = 80 +param workloadProfile string = 'Consumption' + resource userIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' existing = if (!empty(identityName)) { name: identityName } @@ -123,6 +125,7 @@ resource app 'Microsoft.App/containerApps@2023-05-02-preview' = { } properties: { managedEnvironmentId: containerAppsEnvironment.id + workloadProfileName: workloadProfile configuration: { activeRevisionsMode: revisionMode ingress: ingressEnabled ? { diff --git a/infra/core/host/container-apps.bicep b/infra/core/host/container-apps.bicep index 6aa856514d..7b64f5b101 100644 --- a/infra/core/host/container-apps.bicep +++ b/infra/core/host/container-apps.bicep @@ -10,6 +10,28 @@ param containerRegistryAdminUserEnabled bool = false param logAnalyticsWorkspaceResourceId string param applicationInsightsName string = '' // Not used here, was used for DAPR param virtualNetworkSubnetId string = '' +@allowed(['Consumption', 'D4', 'D8', 'D16', 'D32', 'E4', 'E8', 'E16', 'E32', 'NC24-A100', 'NC48-A100', 'NC96-A100']) +param workloadProfile string + +var workloadProfiles = workloadProfile == 'Consumption' + ? [ + { + name: 'Consumption' + workloadProfileType: 'Consumption' + } + ] + : [ + { + name: 'Consumption' + workloadProfileType: 'Consumption' + } + { + minimumCount: 0 + maximumCount: 2 + name: workloadProfile + workloadProfileType: workloadProfile + } + ] @description('Optional user assigned identity IDs to assign to the resource') param userAssignedIdentityResourceIds array = [] @@ -34,12 +56,15 @@ module containerAppsEnvironment 'br/public:avm/res/app/managed-environment:0.5.2 location: location tags: tags zoneRedundant: false + workloadProfiles: workloadProfiles } } module containerRegistry 'br/public:avm/res/container-registry/registry:0.3.1' = { name: '${name}-container-registry' - scope: !empty(containerRegistryResourceGroupName) ? resourceGroup(containerRegistryResourceGroupName) : resourceGroup() + scope: !empty(containerRegistryResourceGroupName) + ? resourceGroup(containerRegistryResourceGroupName) + : resourceGroup() params: { name: containerRegistryName location: location diff --git a/infra/main-aca.bicep b/infra/main-aca.bicep index 05e2eade5f..3677a4c6ce 100644 --- a/infra/main-aca.bicep +++ b/infra/main-aca.bicep @@ -9,15 +9,14 @@ param environmentName string @description('Primary location for all resources') param location string -param appServicePlanName string = '' // Set in main.parameters.json param backendServiceName string = '' // Set in main.parameters.json param resourceGroupName string = '' // Set in main.parameters.json // ACA parametors param containerAppsEnvironmentName string -param containerRegistryName string +param containerRegistryName string = '${replace(containerAppsEnvironmentName, '-', '')}acr' param webAppExists bool -param identityName string +param identityName string= '${environmentName}-aca-identity' param applicationInsightsDashboardName string = '' // Set in main.parameters.json param applicationInsightsName string = '' // Set in main.parameters.json @@ -63,6 +62,9 @@ param speechServiceName string = '' param speechServiceSkuName string // Set in main.parameters.json param useGPT4V bool = false +@allowed(['Consumption', 'D4', 'D8', 'D16', 'D32', 'E4', 'E8', 'E16', 'E32', 'NC24-A100', 'NC48-A100', 'NC96-A100']) +param azureContainerAppsWorkloadProfile string = 'Consumption' + @description('Location for the OpenAI resource group') @allowed([ 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'switzerlandnorth', 'uksouth', 'japaneast', 'northcentralus', 'australiaeast', 'swedencentral' ]) @metadata({ @@ -280,6 +282,7 @@ module containerApps 'core/host/container-apps.bicep' = { params: { name: 'app' location: location + workloadProfile: azureContainerAppsWorkloadProfile containerAppsEnvironmentName: containerAppsEnvironmentName containerRegistryName: containerRegistryName logAnalyticsWorkspaceResourceId: monitoring.outputs.logAnalyticsWorkspaceId @@ -300,6 +303,7 @@ module backend 'core/host/container-app-upsert.bicep' = { location: location identityName: acaIdentity.name exists: webAppExists + workloadProfile: azureContainerAppsWorkloadProfile containerRegistryName: containerApps.outputs.registryName containerAppsEnvironmentName: containerApps.outputs.environmentName identityType: 'UserAssigned' diff --git a/infra/main-aca.parameters.json b/infra/main-aca.parameters.json index 566c148abb..8b0a9a1567 100644 --- a/infra/main-aca.parameters.json +++ b/infra/main-aca.parameters.json @@ -235,6 +235,9 @@ }, "webAppExists": { "value": "${SERVICE_WEB_RESOURCE_EXISTS=false}" + }, + "azureContainerAppsWorkloadProfile": { + "value": "${AZURE_CONTAINER_APPS_WORKLOAD_PROFILE=Consumption}" } } } From 7edd2db4fac75ed6bd77ce56ceaa60544c8cb758 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Fri, 30 Aug 2024 09:19:42 +0000 Subject: [PATCH 04/41] Add support for CORS and fix identity for openai --- azure.yaml | 1 + infra/core/host/container-app-upsert.bicep | 3 +++ infra/main-aca.bicep | 11 ++++++----- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/azure.yaml b/azure.yaml index 37b3f9ac4d..187df6ebc6 100644 --- a/azure.yaml +++ b/azure.yaml @@ -22,6 +22,7 @@ services: continueOnError: false infra: provider: bicep + path: ./infra module: main-aca pipeline: diff --git a/infra/core/host/container-app-upsert.bicep b/infra/core/host/container-app-upsert.bicep index ba8f44147c..3e68ae4fd1 100644 --- a/infra/core/host/container-app-upsert.bicep +++ b/infra/core/host/container-app-upsert.bicep @@ -79,6 +79,8 @@ param targetPort int = 80 @allowed(['Consumption', 'D4', 'D8', 'D16', 'D32', 'E4', 'E8', 'E16', 'E32', 'NC24-A100', 'NC48-A100', 'NC96-A100']) param workloadProfile string = 'Consumption' +param allowedOrigins array = [] + resource existingApp 'Microsoft.App/containerApps@2023-05-02-preview' existing = if (exists) { name: name } @@ -106,6 +108,7 @@ module app 'container-app.bicep' = { daprAppProtocol: daprAppProtocol secrets: secrets keyvaultIdentities: keyvaultIdentities + allowedOrigins: allowedOrigins external: external env: [ for key in objectKeys(env): { diff --git a/infra/main-aca.bicep b/infra/main-aca.bicep index 3677a4c6ce..21eafb5858 100644 --- a/infra/main-aca.bicep +++ b/infra/main-aca.bicep @@ -299,7 +299,7 @@ module backend 'core/host/container-app-upsert.bicep' = { acaIdentity ] params: { - name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesAppService}backend-${resourceToken}' + name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesContainerApps}backend-${resourceToken}' location: location identityName: acaIdentity.name exists: webAppExists @@ -311,6 +311,7 @@ module backend 'core/host/container-app-upsert.bicep' = { targetPort: 8000 containerCpuCoreCount: '2.0' containerMemory: '4Gi' + allowedOrigins: [ allowedOrigin ] env: { AZURE_STORAGE_ACCOUNT: storage.outputs.name AZURE_STORAGE_CONTAINER: storageContainerName @@ -807,7 +808,7 @@ module searchReaderRoleBackend 'core/security/role.bicep' = if (useAuthenticatio params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' - principalType: principalType + principalType: 'ServicePrincipal' } } @@ -818,7 +819,7 @@ module searchContribRoleBackend 'core/security/role.bicep' = if (useUserUpload) params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' - principalType: principalType + principalType: 'ServicePrincipal' } } @@ -829,7 +830,7 @@ module computerVisionRoleBackend 'core/security/role.bicep' = if (useGPT4V) { params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: principalType + principalType: 'ServicePrincipal' } } @@ -840,7 +841,7 @@ module documentIntelligenceRoleBackend 'core/security/role.bicep' = if (useUserU params: { principalId: backend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: principalType + principalType: 'ServicePrincipal' } } From 8fc2d5abbcc076618c37262380f2cd37c3c0dc7c Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Mon, 2 Sep 2024 05:31:11 +0000 Subject: [PATCH 05/41] Add aca-host --- aca-host/app | 1 + azure.yaml.backup => aca-host/azure.yaml | 9 +++++++-- aca-host/data | 1 + aca-host/scripts | 1 + azure.yaml | 9 ++------- 5 files changed, 12 insertions(+), 9 deletions(-) create mode 120000 aca-host/app rename azure.yaml.backup => aca-host/azure.yaml (96%) create mode 120000 aca-host/data create mode 120000 aca-host/scripts diff --git a/aca-host/app b/aca-host/app new file mode 120000 index 0000000000..5df94d993a --- /dev/null +++ b/aca-host/app @@ -0,0 +1 @@ +../app \ No newline at end of file diff --git a/azure.yaml.backup b/aca-host/azure.yaml similarity index 96% rename from azure.yaml.backup rename to aca-host/azure.yaml index 2d5ef5abb0..7fb80e1025 100644 --- a/azure.yaml.backup +++ b/aca-host/azure.yaml @@ -5,9 +5,9 @@ metadata: template: azure-search-openai-demo@0.0.2-beta services: backend: - project: ./app/backend + project: ../app/backend language: py - host: appservice + host: containerapp hooks: prepackage: windows: @@ -20,6 +20,11 @@ services: run: cd ../frontend;npm install;npm run build interactive: false continueOnError: false +infra: + provider: bicep + path: ../infra + module: main-aca + pipeline: variables: - AZURE_OPENAI_SERVICE diff --git a/aca-host/data b/aca-host/data new file mode 120000 index 0000000000..4909e06efb --- /dev/null +++ b/aca-host/data @@ -0,0 +1 @@ +../data \ No newline at end of file diff --git a/aca-host/scripts b/aca-host/scripts new file mode 120000 index 0000000000..a339954dff --- /dev/null +++ b/aca-host/scripts @@ -0,0 +1 @@ +../scripts \ No newline at end of file diff --git a/azure.yaml b/azure.yaml index 187df6ebc6..2d5ef5abb0 100644 --- a/azure.yaml +++ b/azure.yaml @@ -7,9 +7,9 @@ services: backend: project: ./app/backend language: py - host: containerapp + host: appservice hooks: - prebuild: + prepackage: windows: shell: pwsh run: cd ../frontend;npm install;npm run build @@ -20,11 +20,6 @@ services: run: cd ../frontend;npm install;npm run build interactive: false continueOnError: false -infra: - provider: bicep - path: ./infra - module: main-aca - pipeline: variables: - AZURE_OPENAI_SERVICE From 9cadd14a613f9d46938e6773a16acf8fe73dab6b Mon Sep 17 00:00:00 2001 From: Yefu Wang Date: Mon, 2 Sep 2024 17:31:46 +0800 Subject: [PATCH 06/41] Make acr unique --- infra/core/host/container-app-upsert.bicep | 2 +- infra/main-aca.bicep | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/infra/core/host/container-app-upsert.bicep b/infra/core/host/container-app-upsert.bicep index 3e68ae4fd1..ac2c77db1d 100644 --- a/infra/core/host/container-app-upsert.bicep +++ b/infra/core/host/container-app-upsert.bicep @@ -25,7 +25,7 @@ param containerName string = 'main' param containerAppsEnvironmentName string = '${containerName}env' @description('The name of the container registry') -param containerRegistryName string = '' +param containerRegistryName string @description('Hostname suffix for container registry. Set when deploying to sovereign clouds') param containerRegistryHostSuffix string = 'azurecr.io' diff --git a/infra/main-aca.bicep b/infra/main-aca.bicep index 21eafb5858..382f00449f 100644 --- a/infra/main-aca.bicep +++ b/infra/main-aca.bicep @@ -14,7 +14,6 @@ param resourceGroupName string = '' // Set in main.parameters.json // ACA parametors param containerAppsEnvironmentName string -param containerRegistryName string = '${replace(containerAppsEnvironmentName, '-', '')}acr' param webAppExists bool param identityName string= '${environmentName}-aca-identity' @@ -184,6 +183,7 @@ param useLocalHtmlParser bool = false var abbrs = loadJsonContent('abbreviations.json') var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) var tags = { 'azd-env-name': environmentName } +param containerRegistryName string = '${replace(containerAppsEnvironmentName, '-', '')}acr' var tenantIdForAuth = !empty(authTenantId) ? authTenantId : tenantId var authenticationIssuerUri = '${environment().authentication.loginEndpoint}${tenantIdForAuth}/v2.0' @@ -284,7 +284,7 @@ module containerApps 'core/host/container-apps.bicep' = { location: location workloadProfile: azureContainerAppsWorkloadProfile containerAppsEnvironmentName: containerAppsEnvironmentName - containerRegistryName: containerRegistryName + containerRegistryName: '${containerRegistryName}${resourceToken}' logAnalyticsWorkspaceResourceId: monitoring.outputs.logAnalyticsWorkspaceId // virtualNetworkSubnetId: virtualNetwork.outputs.subnetResourceIds[1] } From 0623e9bbe2a2ecf7ea8d6dbf7dcf1648773bb9cc Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Tue, 3 Sep 2024 03:47:08 +0000 Subject: [PATCH 07/41] Add doc for aca host --- docs/azure_container_apps.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 docs/azure_container_apps.md diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md new file mode 100644 index 0000000000..53db95cb20 --- /dev/null +++ b/docs/azure_container_apps.md @@ -0,0 +1,20 @@ +# Deploying on Azure Container Apps +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file lives in folder `aca-host` along with symbolic links to `app`,`data` and `scripts` folder. +## For Linux/MacOS users +If you are on Linux/MacOS, it should work without any extra settings to deploy on Azure Container Apps. Please use: +```bash +cd aca-host +azd up +``` +## For windows users +Because windows allow symbolic links by default, you may need to enbale [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development), +and enable symlinks for git before cloning this repo. +To enable symlinks for git, please use +``` +# local setting +git config core.symlinks true +# Alternatively, enable symlinks globally +git config --global core.symlinks true +``` + +For more info please check [here](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows). \ No newline at end of file From e3625456138adbe86c4ec658332ed94e554b3a30 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Tue, 3 Sep 2024 06:01:54 +0000 Subject: [PATCH 08/41] Update ACA docs --- aca-host/README.md | 24 ++++++++++++++++++++++++ docs/azure_container_apps.md | 16 ++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 aca-host/README.md diff --git a/aca-host/README.md b/aca-host/README.md new file mode 100644 index 0000000000..ea66fa1650 --- /dev/null +++ b/aca-host/README.md @@ -0,0 +1,24 @@ +# Deploying on Azure Container Apps +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here along with symbolic links to `app`,`data` and `scripts` folder. +## For Linux/MacOS users +If you are on Linux/MacOS, it should work without any extra settings to deploy to Azure Container Apps. Please use: +```bash +cd aca-host +azd up +``` +## For Windows users +Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development), +and symlinks for git before cloning this repo. +To enable symlinks for git, please use +```bash +# local setting +git config core.symlinks true +# Alternatively, enable symlinks globally +git config --global core.symlinks true +``` + +Please ensure that the symlinks work correctly and then run: +```bash +cd aca-host +azd up +``` \ No newline at end of file diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 53db95cb20..82bb756c6b 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -1,20 +1,24 @@ # Deploying on Azure Container Apps -Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file lives in folder `aca-host` along with symbolic links to `app`,`data` and `scripts` folder. +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives in folder `aca-host` along with symbolic links to `app`,`data` and `scripts` folder. ## For Linux/MacOS users If you are on Linux/MacOS, it should work without any extra settings to deploy on Azure Container Apps. Please use: ```bash cd aca-host azd up ``` -## For windows users -Because windows allow symbolic links by default, you may need to enbale [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development), -and enable symlinks for git before cloning this repo. +## For Windows users +Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development), +and symlinks for git before cloning this repo. To enable symlinks for git, please use -``` +```bash # local setting git config core.symlinks true # Alternatively, enable symlinks globally git config --global core.symlinks true ``` -For more info please check [here](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows). \ No newline at end of file +Please check whether the symlink works correctly and then run: +```bash +cd aca-host +azd up +``` \ No newline at end of file From 24d668aab28751f823ed3b1523796fc9bb3065aa Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Tue, 3 Sep 2024 06:02:47 +0000 Subject: [PATCH 09/41] Remove unneeded bicep files --- infra/core/host/containerapps.bicep.backup | 112 --------------------- 1 file changed, 112 deletions(-) delete mode 100644 infra/core/host/containerapps.bicep.backup diff --git a/infra/core/host/containerapps.bicep.backup b/infra/core/host/containerapps.bicep.backup deleted file mode 100644 index ccb10e7b62..0000000000 --- a/infra/core/host/containerapps.bicep.backup +++ /dev/null @@ -1,112 +0,0 @@ -param containerAppName string -param containerAppEnvName string = '${containerAppName}env' -param containerRegistryName string = '${containerAppName}cr' - -@description('Allowed origins') -param allowedOrigins array = [] - -param location string = resourceGroup().location -param logAnalyticsName string -param zoneRedundant bool = false -@description('The port that the container listens on') -param containerTargetPort int = 8000 -@secure() -param envVariables object = {} - -resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' existing = { - name: logAnalyticsName -} - -resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' = { - name: containerRegistryName - location: location - sku: { - name: 'Basic' - } - properties: { - adminUserEnabled: true - } -} - -resource containerAppEnvironment 'Microsoft.App/managedEnvironments@2023-05-01' = { - name: containerAppEnvName - location: location - properties: { - appLogsConfiguration: { - destination: 'log-analytics' - logAnalyticsConfiguration: { - customerId: logAnalyticsWorkspace.properties.customerId - sharedKey: logAnalyticsWorkspace.listKeys().primarySharedKey - } - } - - workloadProfiles: [ - { - name: 'Consumption' - workloadProfileType: 'Consumption' - } - ] - - zoneRedundant: zoneRedundant - } -} - -resource myContainerApp 'Microsoft.App/containerApps@2023-05-01' = { - name: containerAppName - location: location - properties: { - managedEnvironmentId: containerAppEnvironment.id - configuration: { - ingress: { - transport: 'http' - external: true - targetPort: containerTargetPort - allowInsecure: false - traffic: [ - { - latestRevision: true - weight: 100 - } - ] - corsPolicy: { - allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) - } - } - registries: [ - { - server: containerRegistry.properties.loginServer - username: containerRegistry.name - passwordSecretRef: 'container-registry-password' - } - ] - secrets: [ - { - name: 'container-registry-password' - value: containerRegistry.listCredentials().passwords[0].value - } - ] - } - template: { - containers: [ - { - name: 'ai-search' - image: '${containerRegistry}.azurecr.io/ai-search:latest' - resources: { - cpu: 1 - memory: '2Gi' - } - env: [ - for key in objectKeys(envVariables): { - name: key - value: envVariables[key] - } - ] - } - ] - scale: { - minReplicas: 0 - maxReplicas: 1 - } - } - } -} From fbb4b055b590c1b0657ac6398ce51da0169a9ca9 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Tue, 3 Sep 2024 06:05:42 +0000 Subject: [PATCH 10/41] Revert chanes to infra/main.parameters.json --- infra/main.parameters.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/infra/main.parameters.json b/infra/main.parameters.json index b36d27aa3b..023cea7604 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -238,9 +238,6 @@ }, "runningOnAdo": { "value": "${TF_BUILD}" - }, - "webAppExists": { - "value": "${SERVICE_WEB_RESOURCE_EXISTS=false}" } } } From 4ced7ce788ae4cc3278014347137c3efb5361dda Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Tue, 3 Sep 2024 06:37:28 +0000 Subject: [PATCH 11/41] Fix markdown lint issues --- aca-host/README.md | 13 ++++++++++--- docs/azure_container_apps.md | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/aca-host/README.md b/aca-host/README.md index ea66fa1650..b1084417cc 100644 --- a/aca-host/README.md +++ b/aca-host/README.md @@ -1,15 +1,21 @@ # Deploying on Azure Container Apps + Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here along with symbolic links to `app`,`data` and `scripts` folder. + ## For Linux/MacOS users + If you are on Linux/MacOS, it should work without any extra settings to deploy to Azure Container Apps. Please use: + ```bash cd aca-host azd up ``` + ## For Windows users -Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development), -and symlinks for git before cloning this repo. + +Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/windows/apps/get-started/enable-your-device-for-development) and symlinks for git before cloning this repo. To enable symlinks for git, please use + ```bash # local setting git config core.symlinks true @@ -18,7 +24,8 @@ git config --global core.symlinks true ``` Please ensure that the symlinks work correctly and then run: + ```bash cd aca-host azd up -``` \ No newline at end of file +``` diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 82bb756c6b..521ec5f5d7 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -1,15 +1,21 @@ # Deploying on Azure Container Apps + Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives in folder `aca-host` along with symbolic links to `app`,`data` and `scripts` folder. + ## For Linux/MacOS users + If you are on Linux/MacOS, it should work without any extra settings to deploy on Azure Container Apps. Please use: + ```bash cd aca-host azd up ``` + ## For Windows users -Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development), -and symlinks for git before cloning this repo. + +Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/windows/apps/get-started/enable-your-device-for-development) and symlinks for git before cloning this repo. To enable symlinks for git, please use + ```bash # local setting git config core.symlinks true @@ -18,7 +24,8 @@ git config --global core.symlinks true ``` Please check whether the symlink works correctly and then run: + ```bash cd aca-host azd up -``` \ No newline at end of file +``` From 625866fa5be66f4b6d51f175931c747fbf85d741 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Wed, 4 Sep 2024 10:00:22 +0000 Subject: [PATCH 12/41] Run frontend build before building docker image --- aca-host/azure.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aca-host/azure.yaml b/aca-host/azure.yaml index 7fb80e1025..d0a01aa7bb 100644 --- a/aca-host/azure.yaml +++ b/aca-host/azure.yaml @@ -9,7 +9,7 @@ services: language: py host: containerapp hooks: - prepackage: + prebuild: windows: shell: pwsh run: cd ../frontend;npm install;npm run build From 40287f2977a168009b2be383388a89b9f905959a Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Thu, 5 Sep 2024 09:58:12 +0000 Subject: [PATCH 13/41] remove symlinks and update scripts with paths relative to its own folder instead of cwd --- aca-host/app | 1 - aca-host/azure.yaml | 8 ++++---- aca-host/data | 1 - aca-host/scripts | 1 - scripts/adlsgen2setup.ps1 | 4 ++-- scripts/adlsgen2setup.sh | 9 +++++++-- scripts/auth_init.ps1 | 7 ++++--- scripts/auth_init.sh | 11 ++++++++--- scripts/auth_update.ps1 | 7 ++++--- scripts/auth_update.sh | 10 +++++++--- scripts/load_python_env.ps1 | 3 ++- scripts/load_python_env.sh | 8 ++++++-- scripts/loadenv.ps1 | 6 ++++-- scripts/loadenv.sh | 8 ++++++-- scripts/manageacl.ps1 | 4 +++- scripts/manageacl.sh | 8 ++++++-- scripts/prepdocs.ps1 | 8 ++++---- scripts/prepdocs.sh | 10 ++++++++-- 18 files changed, 75 insertions(+), 39 deletions(-) delete mode 120000 aca-host/app delete mode 120000 aca-host/data delete mode 120000 aca-host/scripts diff --git a/aca-host/app b/aca-host/app deleted file mode 120000 index 5df94d993a..0000000000 --- a/aca-host/app +++ /dev/null @@ -1 +0,0 @@ -../app \ No newline at end of file diff --git a/aca-host/azure.yaml b/aca-host/azure.yaml index d0a01aa7bb..7b56fc21b8 100644 --- a/aca-host/azure.yaml +++ b/aca-host/azure.yaml @@ -97,22 +97,22 @@ hooks: preprovision: windows: shell: pwsh - run: ./scripts/auth_init.ps1 + run: ../scripts/auth_init.ps1 interactive: true continueOnError: false posix: shell: sh - run: ./scripts/auth_init.sh + run: ../scripts/auth_init.sh interactive: true continueOnError: false postprovision: windows: shell: pwsh - run: ./scripts/auth_update.ps1;./scripts/prepdocs.ps1 + run: ../scripts/auth_update.ps1; ../scripts/prepdocs.ps1 interactive: true continueOnError: false posix: shell: sh - run: ./scripts/auth_update.sh;./scripts/prepdocs.sh + run: ../scripts/auth_update.sh; ../scripts/prepdocs.sh interactive: true continueOnError: false diff --git a/aca-host/data b/aca-host/data deleted file mode 120000 index 4909e06efb..0000000000 --- a/aca-host/data +++ /dev/null @@ -1 +0,0 @@ -../data \ No newline at end of file diff --git a/aca-host/scripts b/aca-host/scripts deleted file mode 120000 index a339954dff..0000000000 --- a/aca-host/scripts +++ /dev/null @@ -1 +0,0 @@ -../scripts \ No newline at end of file diff --git a/scripts/adlsgen2setup.ps1 b/scripts/adlsgen2setup.ps1 index e6b80c0d46..b0a567f775 100644 --- a/scripts/adlsgen2setup.ps1 +++ b/scripts/adlsgen2setup.ps1 @@ -1,6 +1,7 @@ ## Set the preference to stop on the first error $ErrorActionPreference = "Stop" +$projectRoot = Split-Path -Parent $PSScriptRoot & $PSScriptRoot\loadenv.ps1 $venvPythonPath = "./.venv/scripts/python.exe" @@ -15,5 +16,4 @@ if ([string]::IsNullOrEmpty($env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT)) { } Write-Host 'Running "adlsgen2setup.py"' -$cwd = (Get-Location) -Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/adlsgen2setup.py `"$cwd/data`" --data-access-control ./scripts/sampleacls.json --storage-account $env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT -v" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/adlsgen2setup.py `"$projectRoot/data`" --data-access-control $projectRoot/scripts/sampleacls.json --storage-account $env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT -v" -Wait -NoNewWindow diff --git a/scripts/adlsgen2setup.sh b/scripts/adlsgen2setup.sh index 6118a5e8e6..d40722846c 100755 --- a/scripts/adlsgen2setup.sh +++ b/scripts/adlsgen2setup.sh @@ -1,6 +1,11 @@ #!/bin/sh -. ./scripts/loadenv.sh +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +script_dir="$project_root/scripts" +data_dir="$project_root/data" + +. $script_dir/loadenv.sh if [ -n "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" ]; then echo 'AZURE_ADLS_GEN2_STORAGE_ACCOUNT must be set to continue' @@ -9,4 +14,4 @@ fi echo 'Running "adlsgen2setup.py"' -./.venv/bin/python ./scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sampleacls.json' --storage-account "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" -v +./.venv/bin/python $script_dir/adlsgen2setup.py "$data_dir/*" --data-access-control "$script_dir/sampleacls.json" --storage-account "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" -v diff --git a/scripts/auth_init.ps1 b/scripts/auth_init.ps1 index 872cf5ab6e..8f725b1c1d 100755 --- a/scripts/auth_init.ps1 +++ b/scripts/auth_init.ps1 @@ -1,10 +1,11 @@ -. ./scripts/load_azd_env.ps1 +$projectRoot = Split-Path -Parent $PSScriptRoot +. $projectRoot/scripts/load_azd_env.ps1 if (-not $env:AZURE_USE_AUTHENTICATION) { Exit 0 } -. ./scripts/load_python_env.ps1 +. $projectRoot/scripts/load_python_env.ps1 $venvPythonPath = "./.venv/scripts/python.exe" if (Test-Path -Path "/usr") { @@ -12,4 +13,4 @@ if (Test-Path -Path "/usr") { $venvPythonPath = "./.venv/bin/python" } -Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/auth_init.py" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/auth_init.py" -Wait -NoNewWindow diff --git a/scripts/auth_init.sh b/scripts/auth_init.sh index bd7cfff552..0066731793 100755 --- a/scripts/auth_init.sh +++ b/scripts/auth_init.sh @@ -1,8 +1,13 @@ #!/bin/sh +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +script_dir="$project_root/scripts" +data_dir="$project_root/data" + echo "Checking if authentication should be setup..." -. ./scripts/load_azd_env.sh +. $script_dir/load_azd_env.sh if [ -z "$AZURE_USE_AUTHENTICATION" ]; then echo "AZURE_USE_AUTHENTICATION is not set, skipping authentication setup." @@ -11,6 +16,6 @@ fi echo "AZURE_USE_AUTHENTICATION is set, proceeding with authentication setup..." -. ./scripts/load_python_env.sh +. $script_dir/load_python_env.sh -./.venv/bin/python ./scripts/auth_init.py +./.venv/bin/python $script_dir/auth_init.py diff --git a/scripts/auth_update.ps1 b/scripts/auth_update.ps1 index 1dbf7efee2..f13e78d2b6 100644 --- a/scripts/auth_update.ps1 +++ b/scripts/auth_update.ps1 @@ -1,10 +1,11 @@ -. ./scripts/load_azd_env.ps1 +$projectRoot = Split-Path -Parent $PSScriptRoot +. $projectRoot/scripts/load_azd_env.ps1 if (-not $env:AZURE_USE_AUTHENTICATION) { Exit 0 } -. ./scripts/load_python_env.ps1 +. $projectRoot/scripts/load_python_env.ps1 $venvPythonPath = "./.venv/scripts/python.exe" if (Test-Path -Path "/usr") { @@ -12,4 +13,4 @@ if (Test-Path -Path "/usr") { $venvPythonPath = "./.venv/bin/python" } -Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/auth_update.py" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/auth_update.py" -Wait -NoNewWindow diff --git a/scripts/auth_update.sh b/scripts/auth_update.sh index 7b64995f75..1c71c9a4d4 100755 --- a/scripts/auth_update.sh +++ b/scripts/auth_update.sh @@ -1,11 +1,15 @@ #!/bin/sh -. ./scripts/load_azd_env.sh +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +script_dir="$project_root/scripts" + +. $script_dir/load_azd_env.sh if [ -z "$AZURE_USE_AUTHENTICATION" ]; then exit 0 fi -. ./scripts/load_python_env.sh +. $script_dir/load_python_env.sh -./.venv/bin/python ./scripts/auth_update.py +./.venv/bin/python $script_dir/auth_update.py diff --git a/scripts/load_python_env.ps1 b/scripts/load_python_env.ps1 index d13af9328d..4b2ae12d36 100644 --- a/scripts/load_python_env.ps1 +++ b/scripts/load_python_env.ps1 @@ -1,3 +1,4 @@ +$projectRoot = Split-Path -Parent $PSScriptRoot $pythonCmd = Get-Command python -ErrorAction SilentlyContinue if (-not $pythonCmd) { # fallback to python3 if python not found @@ -14,4 +15,4 @@ if (Test-Path -Path "/usr") { } Write-Host 'Installing dependencies from "requirements.txt" into virtual environment' -Start-Process -FilePath $venvPythonPath -ArgumentList "-m pip install -r app/backend/requirements.txt" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "-m pip install -r $projectRoot/app/backend/requirements.txt" -Wait -NoNewWindow diff --git a/scripts/load_python_env.sh b/scripts/load_python_env.sh index 49622244d2..bd7fb679cf 100755 --- a/scripts/load_python_env.sh +++ b/scripts/load_python_env.sh @@ -1,7 +1,11 @@ - #!/bin/sh +#!/bin/sh + +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +app_dir="$project_root/app" echo 'Creating Python virtual environment "app/backend/.venv"...' python3 -m venv .venv echo 'Installing dependencies from "requirements.txt" into virtual environment (in quiet mode)...' -.venv/bin/python -m pip --quiet --disable-pip-version-check install -r app/backend/requirements.txt +.venv/bin/python -m pip --quiet --disable-pip-version-check install -r $app_dir/backend/requirements.txt diff --git a/scripts/loadenv.ps1 b/scripts/loadenv.ps1 index 7b285a0f95..26a62fe86f 100644 --- a/scripts/loadenv.ps1 +++ b/scripts/loadenv.ps1 @@ -1,3 +1,5 @@ -./scripts/load_azd_env.ps1 +$projectRoot = Split-Path -Parent $PSScriptRoot -./scripts/load_python_env.ps1 +& $projectRoot/scripts/load_azd_env.ps1 + +& $projectRoot/scripts/load_python_env.ps1 diff --git a/scripts/loadenv.sh b/scripts/loadenv.sh index 152687e2c6..e667889bf7 100755 --- a/scripts/loadenv.sh +++ b/scripts/loadenv.sh @@ -1,5 +1,9 @@ #!/bin/sh -. ./scripts/load_azd_env.sh +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +script_dir="$project_root/scripts" -. ./scripts/load_python_env.sh +. $script_dir/load_azd_env.sh + +. $script_dir/load_python_env.sh diff --git a/scripts/manageacl.ps1 b/scripts/manageacl.ps1 index ca963e5189..7ce96ef0dd 100644 --- a/scripts/manageacl.ps1 +++ b/scripts/manageacl.ps1 @@ -9,5 +9,7 @@ if (Test-Path -Path "/usr") { $venvPythonPath = "./.venv/bin/python" } +$projectRoot = Split-Path -Parent $PSScriptRoot + Write-Host "Running manageacl.py. Arguments to script: $args" -Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/manageacl.py --search-service $env:AZURE_SEARCH_SERVICE --index $env:AZURE_SEARCH_INDEX $args" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/manageacl.py --search-service $env:AZURE_SEARCH_SERVICE --index $env:AZURE_SEARCH_INDEX $args" -Wait -NoNewWindow diff --git a/scripts/manageacl.sh b/scripts/manageacl.sh index 556dd35eeb..74ac943677 100755 --- a/scripts/manageacl.sh +++ b/scripts/manageacl.sh @@ -1,6 +1,10 @@ #!/bin/sh -. ./scripts/loadenv.sh +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +script_dir="$project_root/scripts" + +. $script_dir/loadenv.sh echo "Running manageacl.py. Arguments to script: $@" - ./.venv/bin/python ./scripts/manageacl.py --search-service "$AZURE_SEARCH_SERVICE" --index "$AZURE_SEARCH_INDEX" $@ + ./.venv/bin/python $script_dir/manageacl.py --search-service "$AZURE_SEARCH_SERVICE" --index "$AZURE_SEARCH_INDEX" $@ diff --git a/scripts/prepdocs.ps1 b/scripts/prepdocs.ps1 index 0d6e4aed5f..4985a1bbda 100755 --- a/scripts/prepdocs.ps1 +++ b/scripts/prepdocs.ps1 @@ -1,4 +1,5 @@ -./scripts/loadenv.ps1 +$projectRoot = Split-Path -Parent $PSScriptRoot +& $projectRoot/scripts/loadenv.ps1 $venvPythonPath = "./.venv/scripts/python.exe" if (Test-Path -Path "/usr") { @@ -71,14 +72,13 @@ if ($env:AZURE_OPENAI_API_KEY_OVERRIDE) { $openaiApiKeyArg = "--openaikey $env:OPENAI_API_KEY" } -$cwd = (Get-Location) -$dataArg = "`"$cwd/data/*`"" +$dataArg = "`"$projectRoot/data/*`"" $additionalArgs = "" if ($args) { $additionalArgs = "$args" } -$argumentList = "./app/backend/prepdocs.py $dataArg --verbose " + ` +$argumentList = "$projectRoot/app/backend/prepdocs.py $dataArg --verbose " + ` "--subscriptionid $env:AZURE_SUBSCRIPTION_ID " + ` "--storageaccount $env:AZURE_STORAGE_ACCOUNT --container $env:AZURE_STORAGE_CONTAINER --storageresourcegroup $env:AZURE_STORAGE_RESOURCE_GROUP " + ` "--searchservice $env:AZURE_SEARCH_SERVICE --index $env:AZURE_SEARCH_INDEX " + ` diff --git a/scripts/prepdocs.sh b/scripts/prepdocs.sh index c750eb1bfc..d0ee76fbbf 100755 --- a/scripts/prepdocs.sh +++ b/scripts/prepdocs.sh @@ -1,6 +1,11 @@ #!/bin/sh -. ./scripts/loadenv.sh +# Get the project root of the current script +project_root="$(cd "$(dirname $(dirname $0))" && pwd)" +script_dir="$project_root/scripts" +data_dir="$project_root/data" + +. $script_dir/loadenv.sh echo 'Running "prepdocs.py"' @@ -74,7 +79,8 @@ if [ $# -gt 0 ]; then additionalArgs="$@" fi -./.venv/bin/python ./app/backend/prepdocs.py './data/*' --verbose \ + +./.venv/bin/python $app_dir/backend/prepdocs.py "$data_dir/*" --verbose \ --subscriptionid $AZURE_SUBSCRIPTION_ID \ --storageaccount "$AZURE_STORAGE_ACCOUNT" --container "$AZURE_STORAGE_CONTAINER" --storageresourcegroup $AZURE_STORAGE_RESOURCE_GROUP \ --searchservice "$AZURE_SEARCH_SERVICE" --index "$AZURE_SEARCH_INDEX" \ From a99a6c532c19040dabbde7bdeab37036509f2b6b Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Fri, 6 Sep 2024 10:04:04 +0000 Subject: [PATCH 14/41] Merge with main.bicep --- aca-host/azure.yaml | 1 - infra/main-aca.bicep | 902 --------------------------------- infra/main-aca.parameters.json | 243 --------- infra/main.bicep | 161 +++++- infra/main.parameters.json | 9 + infra/network-isolation.bicep | 5 +- 6 files changed, 155 insertions(+), 1166 deletions(-) delete mode 100644 infra/main-aca.bicep delete mode 100644 infra/main-aca.parameters.json diff --git a/aca-host/azure.yaml b/aca-host/azure.yaml index 7b56fc21b8..0fe5abb3be 100644 --- a/aca-host/azure.yaml +++ b/aca-host/azure.yaml @@ -23,7 +23,6 @@ services: infra: provider: bicep path: ../infra - module: main-aca pipeline: variables: diff --git a/infra/main-aca.bicep b/infra/main-aca.bicep deleted file mode 100644 index 382f00449f..0000000000 --- a/infra/main-aca.bicep +++ /dev/null @@ -1,902 +0,0 @@ -targetScope = 'subscription' - -@minLength(1) -@maxLength(64) -@description('Name of the the environment which is used to generate a short unique hash used in all resources.') -param environmentName string - -@minLength(1) -@description('Primary location for all resources') -param location string - -param backendServiceName string = '' // Set in main.parameters.json -param resourceGroupName string = '' // Set in main.parameters.json - -// ACA parametors -param containerAppsEnvironmentName string -param webAppExists bool -param identityName string= '${environmentName}-aca-identity' - -param applicationInsightsDashboardName string = '' // Set in main.parameters.json -param applicationInsightsName string = '' // Set in main.parameters.json -param logAnalyticsName string = '' // Set in main.parameters.json - -param searchServiceName string = '' // Set in main.parameters.json -param searchServiceResourceGroupName string = '' // Set in main.parameters.json -param searchServiceLocation string = '' // Set in main.parameters.json -// The free tier does not support managed identity (required) or semantic search (optional) -@allowed([ 'free', 'basic', 'standard', 'standard2', 'standard3', 'storage_optimized_l1', 'storage_optimized_l2' ]) -param searchServiceSkuName string // Set in main.parameters.json -param searchIndexName string // Set in main.parameters.json -param searchQueryLanguage string // Set in main.parameters.json -param searchQuerySpeller string // Set in main.parameters.json -param searchServiceSemanticRankerLevel string // Set in main.parameters.json -var actualSearchServiceSemanticRankerLevel = (searchServiceSkuName == 'free') ? 'disabled' : searchServiceSemanticRankerLevel - -param storageAccountName string = '' // Set in main.parameters.json -param storageResourceGroupName string = '' // Set in main.parameters.json -param storageResourceGroupLocation string = location -param storageContainerName string = 'content' -param storageSkuName string // Set in main.parameters.json - -param userStorageAccountName string = '' -param userStorageContainerName string = 'user-content' - -param appServiceSkuName string // Set in main.parameters.json - -@allowed([ 'azure', 'openai', 'azure_custom' ]) -param openAiHost string // Set in main.parameters.json -param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure') -param deployAzureOpenAi bool = openAiHost == 'azure' -param azureOpenAiCustomUrl string = '' -param azureOpenAiApiVersion string = '' -@secure() -param azureOpenAiApiKey string = '' -param openAiServiceName string = '' -param openAiResourceGroupName string = '' - -param speechServiceResourceGroupName string = '' -param speechServiceLocation string = '' -param speechServiceName string = '' -param speechServiceSkuName string // Set in main.parameters.json -param useGPT4V bool = false - -@allowed(['Consumption', 'D4', 'D8', 'D16', 'D32', 'E4', 'E8', 'E16', 'E32', 'NC24-A100', 'NC48-A100', 'NC96-A100']) -param azureContainerAppsWorkloadProfile string = 'Consumption' - -@description('Location for the OpenAI resource group') -@allowed([ 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'switzerlandnorth', 'uksouth', 'japaneast', 'northcentralus', 'australiaeast', 'swedencentral' ]) -@metadata({ - azd: { - type: 'location' - } -}) -param openAiResourceGroupLocation string - -param openAiSkuName string = 'S0' - -@secure() -param openAiApiKey string = '' -param openAiApiOrganization string = '' - -param documentIntelligenceServiceName string = '' // Set in main.parameters.json -param documentIntelligenceResourceGroupName string = '' // Set in main.parameters.json - -// Limited regions for new version: -// https://learn.microsoft.com/azure/ai-services/document-intelligence/concept-layout -@description('Location for the Document Intelligence resource group') -@allowed([ 'eastus', 'westus2', 'westeurope' ]) -@metadata({ - azd: { - type: 'location' - } -}) -param documentIntelligenceResourceGroupLocation string - -param documentIntelligenceSkuName string // Set in main.parameters.json - -param computerVisionServiceName string = '' // Set in main.parameters.json -param computerVisionResourceGroupName string = '' // Set in main.parameters.json -param computerVisionResourceGroupLocation string = '' // Set in main.parameters.json -param computerVisionSkuName string // Set in main.parameters.json - -param chatGptModelName string = '' -param chatGptDeploymentName string = '' -param chatGptDeploymentVersion string = '' -param chatGptDeploymentCapacity int = 0 -var chatGpt = { - modelName: !empty(chatGptModelName) ? chatGptModelName : startsWith(openAiHost, 'azure') ? 'gpt-35-turbo' : 'gpt-3.5-turbo' - deploymentName: !empty(chatGptDeploymentName) ? chatGptDeploymentName : 'chat' - deploymentVersion: !empty(chatGptDeploymentVersion) ? chatGptDeploymentVersion : '0613' - deploymentCapacity: chatGptDeploymentCapacity != 0 ? chatGptDeploymentCapacity : 30 -} - -param embeddingModelName string = '' -param embeddingDeploymentName string = '' -param embeddingDeploymentVersion string = '' -param embeddingDeploymentCapacity int = 0 -param embeddingDimensions int = 0 -var embedding = { - modelName: !empty(embeddingModelName) ? embeddingModelName : 'text-embedding-ada-002' - deploymentName: !empty(embeddingDeploymentName) ? embeddingDeploymentName : 'embedding' - deploymentVersion: !empty(embeddingDeploymentVersion) ? embeddingDeploymentVersion : '2' - deploymentCapacity: embeddingDeploymentCapacity != 0 ? embeddingDeploymentCapacity : 30 - dimensions: embeddingDimensions != 0 ? embeddingDimensions : 1536 -} - -param gpt4vModelName string = 'gpt-4o' -param gpt4vDeploymentName string = 'gpt-4o' -param gpt4vModelVersion string = '2024-05-13' -param gpt4vDeploymentCapacity int = 10 - -param tenantId string = tenant().tenantId -param authTenantId string = '' - -// Used for the optional login and document level access control system -param useAuthentication bool = false -param enforceAccessControl bool = false -param enableGlobalDocuments bool = false -param enableUnauthenticatedAccess bool = false -param serverAppId string = '' -@secure() -param serverAppSecret string = '' -param clientAppId string = '' -@secure() -param clientAppSecret string = '' - -// Used for optional CORS support for alternate frontends -param allowedOrigin string = '' // should start with https://, shouldn't end with a / - -@allowed([ 'None', 'AzureServices' ]) -@description('If allowedIp is set, whether azure services are allowed to bypass the storage and AI services firewall.') -param bypass string = 'AzureServices' - -@description('Public network access value for all deployed resources') -@allowed([ 'Enabled', 'Disabled' ]) -param publicNetworkAccess string = 'Enabled' - -@description('Add a private endpoints for network connectivity') -param usePrivateEndpoint bool = false - -@description('Id of the user or app to assign application roles') -param principalId string = '' - -@description('Use Application Insights for monitoring and performance tracing') -param useApplicationInsights bool = false - -@description('Use speech recognition feature in browser') -param useSpeechInputBrowser bool = false -@description('Use speech synthesis in browser') -param useSpeechOutputBrowser bool = false -@description('Use Azure speech service for reading out text') -param useSpeechOutputAzure bool = false -@description('Show options to use vector embeddings for searching in the app UI') -param useVectors bool = false -@description('Use Built-in integrated Vectorization feature of AI Search to vectorize and ingest documents') -param useIntegratedVectorization bool = false - -@description('Enable user document upload feature') -param useUserUpload bool = false -param useLocalPdfParser bool = false -param useLocalHtmlParser bool = false - -var abbrs = loadJsonContent('abbreviations.json') -var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) -var tags = { 'azd-env-name': environmentName } -param containerRegistryName string = '${replace(containerAppsEnvironmentName, '-', '')}acr' - -var tenantIdForAuth = !empty(authTenantId) ? authTenantId : tenantId -var authenticationIssuerUri = '${environment().authentication.loginEndpoint}${tenantIdForAuth}/v2.0' - -@description('Whether the deployment is running on GitHub Actions') -param runningOnGh string = '' - -@description('Whether the deployment is running on Azure DevOps Pipeline') -param runningOnAdo string = '' - -// Organize resources in a resource group -resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { - name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' - location: location - tags: tags -} - -resource openAiResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(openAiResourceGroupName)) { - name: !empty(openAiResourceGroupName) ? openAiResourceGroupName : resourceGroup.name -} - -resource documentIntelligenceResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(documentIntelligenceResourceGroupName)) { - name: !empty(documentIntelligenceResourceGroupName) ? documentIntelligenceResourceGroupName : resourceGroup.name -} - -resource computerVisionResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(computerVisionResourceGroupName)) { - name: !empty(computerVisionResourceGroupName) ? computerVisionResourceGroupName : resourceGroup.name -} - -resource searchServiceResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(searchServiceResourceGroupName)) { - name: !empty(searchServiceResourceGroupName) ? searchServiceResourceGroupName : resourceGroup.name -} - -resource storageResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(storageResourceGroupName)) { - name: !empty(storageResourceGroupName) ? storageResourceGroupName : resourceGroup.name -} - -resource speechResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(speechServiceResourceGroupName)) { - name: !empty(speechServiceResourceGroupName) ? speechServiceResourceGroupName : resourceGroup.name -} - -// Monitor application with Azure Monitor -module monitoring 'core/monitor/monitoring.bicep' = if (useApplicationInsights) { - name: 'monitoring' - scope: resourceGroup - params: { - location: location - tags: tags - applicationInsightsName: !empty(applicationInsightsName) ? applicationInsightsName : '${abbrs.insightsComponents}${resourceToken}' - logAnalyticsName: !empty(logAnalyticsName) ? logAnalyticsName : '${abbrs.operationalInsightsWorkspaces}${resourceToken}' - publicNetworkAccess: publicNetworkAccess - } -} - -module applicationInsightsDashboard 'backend-dashboard.bicep' = if (useApplicationInsights) { - name: 'application-insights-dashboard' - scope: resourceGroup - params: { - name: !empty(applicationInsightsDashboardName) ? applicationInsightsDashboardName : '${abbrs.portalDashboards}${resourceToken}' - location: location - applicationInsightsName: useApplicationInsights ? monitoring.outputs.applicationInsightsName : '' - } -} - -// Create an App Service Plan to group applications under the same payment plan and SKU -/* -module appServicePlan 'core/host/appserviceplan.bicep' = { - name: 'appserviceplan' - scope: resourceGroup - params: { - name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}' - location: location - tags: tags - sku: { - name: appServiceSkuName - capacity: 1 - } - kind: 'linux' - } -} -*/ - - -module acaIdentity 'core/security/aca-identity.bicep' = { - name: identityName - scope: resourceGroup - params: { - identityName: identityName - location: location - } -} - -module containerApps 'core/host/container-apps.bicep' = { - name: 'container-apps' - scope: resourceGroup - params: { - name: 'app' - location: location - workloadProfile: azureContainerAppsWorkloadProfile - containerAppsEnvironmentName: containerAppsEnvironmentName - containerRegistryName: '${containerRegistryName}${resourceToken}' - logAnalyticsWorkspaceResourceId: monitoring.outputs.logAnalyticsWorkspaceId - // virtualNetworkSubnetId: virtualNetwork.outputs.subnetResourceIds[1] - } -} - -// The application frontend -module backend 'core/host/container-app-upsert.bicep' = { - name: 'aca-web' - scope: resourceGroup - dependsOn: [ - containerApps - acaIdentity - ] - params: { - name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesContainerApps}backend-${resourceToken}' - location: location - identityName: acaIdentity.name - exists: webAppExists - workloadProfile: azureContainerAppsWorkloadProfile - containerRegistryName: containerApps.outputs.registryName - containerAppsEnvironmentName: containerApps.outputs.environmentName - identityType: 'UserAssigned' - tags: union(tags, { 'azd-service-name': 'backend' }) - targetPort: 8000 - containerCpuCoreCount: '2.0' - containerMemory: '4Gi' - allowedOrigins: [ allowedOrigin ] - env: { - AZURE_STORAGE_ACCOUNT: storage.outputs.name - AZURE_STORAGE_CONTAINER: storageContainerName - AZURE_SEARCH_INDEX: searchIndexName - AZURE_SEARCH_SERVICE: searchService.outputs.name - AZURE_SEARCH_SEMANTIC_RANKER: actualSearchServiceSemanticRankerLevel - AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' - AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage - AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller - APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' - AZURE_SPEECH_SERVICE_ID: useSpeechOutputAzure ? speech.outputs.resourceId : '' - AZURE_SPEECH_SERVICE_LOCATION: useSpeechOutputAzure ? speech.outputs.location : '' - USE_SPEECH_INPUT_BROWSER: useSpeechInputBrowser - USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser - USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure - // Shared by all OpenAI deployments - OPENAI_HOST: openAiHost - AZURE_OPENAI_EMB_MODEL_NAME: embedding.modelName - AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions - AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName - AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName - // Specific to Azure OpenAI - AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' - AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName - AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName - AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' - AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion - AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey - AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl - // Used only with non-Azure OpenAI deployments - OPENAI_API_KEY: openAiApiKey - OPENAI_ORGANIZATION: openAiApiOrganization - // Optional login and document level access control system - AZURE_USE_AUTHENTICATION: useAuthentication - AZURE_ENFORCE_ACCESS_CONTROL: enforceAccessControl - AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: enableGlobalDocuments - AZURE_ENABLE_UNAUTHENTICATED_ACCESS: enableUnauthenticatedAccess - AZURE_SERVER_APP_ID: serverAppId - AZURE_SERVER_APP_SECRET: serverAppSecret - AZURE_CLIENT_APP_ID: clientAppId - AZURE_CLIENT_APP_SECRET: clientAppSecret - AZURE_TENANT_ID: tenantId - AZURE_AUTH_TENANT_ID: tenantIdForAuth - AZURE_AUTHENTICATION_ISSUER_URI: authenticationIssuerUri - // CORS support, for frontends on other hosts - ALLOWED_ORIGIN: allowedOrigin - USE_VECTORS: useVectors - USE_GPT4V: useGPT4V - USE_USER_UPLOAD: useUserUpload - AZURE_USERSTORAGE_ACCOUNT: useUserUpload ? userStorage.outputs.name : '' - AZURE_USERSTORAGE_CONTAINER: useUserUpload ? userStorageContainerName : '' - AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name - USE_LOCAL_PDF_PARSER: useLocalPdfParser - USE_LOCAL_HTML_PARSER: useLocalHtmlParser - // For using managed identity to access Azure resources. See https://github.com/microsoft/azure-container-apps/issues/442 - AZURE_CLIENT_ID: acaIdentity.outputs.clientId - } - } -} - -var defaultOpenAiDeployments = [ - { - name: chatGpt.deploymentName - model: { - format: 'OpenAI' - name: chatGpt.modelName - version: chatGpt.deploymentVersion - } - sku: { - name: 'Standard' - capacity: chatGpt.deploymentCapacity - } - } - { - name: embedding.deploymentName - model: { - format: 'OpenAI' - name: embedding.modelName - version: embedding.deploymentVersion - } - sku: { - name: 'Standard' - capacity: embedding.deploymentCapacity - } - } -] - -var openAiDeployments = concat(defaultOpenAiDeployments, useGPT4V ? [ - { - name: gpt4vDeploymentName - model: { - format: 'OpenAI' - name: gpt4vModelName - version: gpt4vModelVersion - } - sku: { - name: 'Standard' - capacity: gpt4vDeploymentCapacity - } - } - ] : []) - -module openAi 'br/public:avm/res/cognitive-services/account:0.5.4' = if (isAzureOpenAiHost && deployAzureOpenAi) { - name: 'openai' - scope: openAiResourceGroup - params: { - name: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' - location: openAiResourceGroupLocation - tags: tags - kind: 'OpenAI' - customSubDomainName: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' - publicNetworkAccess: publicNetworkAccess - networkAcls: { - defaultAction: 'Allow' - bypass: bypass - } - sku: openAiSkuName - deployments: openAiDeployments - disableLocalAuth: true - } -} - -// Formerly known as Form Recognizer -// Does not support bypass -module documentIntelligence 'br/public:avm/res/cognitive-services/account:0.5.4' = { - name: 'documentintelligence' - scope: documentIntelligenceResourceGroup - params: { - name: !empty(documentIntelligenceServiceName) ? documentIntelligenceServiceName : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' - kind: 'FormRecognizer' - customSubDomainName: !empty(documentIntelligenceServiceName) ? documentIntelligenceServiceName : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' - publicNetworkAccess: publicNetworkAccess - networkAcls: { - defaultAction: 'Allow' - } - location: documentIntelligenceResourceGroupLocation - disableLocalAuth: true - tags: tags - sku: documentIntelligenceSkuName - } -} - -module computerVision 'br/public:avm/res/cognitive-services/account:0.5.4' = if (useGPT4V) { - name: 'computerVision' - scope: computerVisionResourceGroup - params: { - name: !empty(computerVisionServiceName) - ? computerVisionServiceName - : '${abbrs.cognitiveServicesComputerVision}${resourceToken}' - kind: 'ComputerVision' - networkAcls: { - defaultAction: 'Allow' - } - customSubDomainName: !empty(computerVisionServiceName) - ? computerVisionServiceName - : '${abbrs.cognitiveServicesComputerVision}${resourceToken}' - location: computerVisionResourceGroupLocation - tags: tags - sku: computerVisionSkuName - } -} - -module speech 'br/public:avm/res/cognitive-services/account:0.5.4' = if (useSpeechOutputAzure) { - name: 'speech-service' - scope: speechResourceGroup - params: { - name: !empty(speechServiceName) ? speechServiceName : '${abbrs.cognitiveServicesSpeech}${resourceToken}' - kind: 'SpeechServices' - networkAcls: { - defaultAction: 'Allow' - } - customSubDomainName: !empty(speechServiceName) ? speechServiceName : '${abbrs.cognitiveServicesSpeech}${resourceToken}' - location: !empty(speechServiceLocation) ? speechServiceLocation : location - tags: tags - sku: speechServiceSkuName - } -} -module searchService 'core/search/search-services.bicep' = { - name: 'search-service' - scope: searchServiceResourceGroup - params: { - name: !empty(searchServiceName) ? searchServiceName : 'gptkb-${resourceToken}' - location: !empty(searchServiceLocation) ? searchServiceLocation : location - tags: tags - disableLocalAuth: true - sku: { - name: searchServiceSkuName - } - semanticSearch: actualSearchServiceSemanticRankerLevel - publicNetworkAccess: publicNetworkAccess == 'Enabled' ? 'enabled' : (publicNetworkAccess == 'Disabled' ? 'disabled' : null) - sharedPrivateLinkStorageAccounts: usePrivateEndpoint ? [ storage.outputs.id ] : [] - } -} - -module searchDiagnostics 'core/search/search-diagnostics.bicep' = if (useApplicationInsights) { - name: 'search-diagnostics' - scope: searchServiceResourceGroup - params: { - searchServiceName: searchService.outputs.name - workspaceId: useApplicationInsights ? monitoring.outputs.logAnalyticsWorkspaceId : '' - } -} - -module storage 'core/storage/storage-account.bicep' = { - name: 'storage' - scope: storageResourceGroup - params: { - name: !empty(storageAccountName) ? storageAccountName : '${abbrs.storageStorageAccounts}${resourceToken}' - location: storageResourceGroupLocation - tags: tags - publicNetworkAccess: publicNetworkAccess - bypass: bypass - allowBlobPublicAccess: false - allowSharedKeyAccess: false - sku: { - name: storageSkuName - } - deleteRetentionPolicy: { - enabled: true - days: 2 - } - containers: [ - { - name: storageContainerName - publicAccess: 'None' - } - ] - } -} - -module userStorage 'core/storage/storage-account.bicep' = if (useUserUpload) { - name: 'user-storage' - scope: storageResourceGroup - params: { - name: !empty(userStorageAccountName) ? userStorageAccountName : 'user${abbrs.storageStorageAccounts}${resourceToken}' - location: storageResourceGroupLocation - tags: tags - publicNetworkAccess: publicNetworkAccess - bypass: bypass - allowBlobPublicAccess: false - allowSharedKeyAccess: false - isHnsEnabled: true - sku: { - name: storageSkuName - } - containers: [ - { - name: userStorageContainerName - publicAccess: 'None' - } - ] - } -} - -// USER ROLES -var principalType = empty(runningOnGh) && empty(runningOnAdo) ? 'User' : 'ServicePrincipal' -// var principalType = 'ServicePrincipal' - -module openAiRoleUser 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi) { - scope: openAiResourceGroup - name: 'openai-role-user' - params: { - principalId: principalId - roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' - principalType: principalType - } -} - -// For both document intelligence and computer vision -module cognitiveServicesRoleUser 'core/security/role.bicep' = { - scope: resourceGroup - name: 'cognitiveservices-role-user' - params: { - principalId: principalId - roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: principalType - } -} - -module speechRoleUser 'core/security/role.bicep' = { - scope: speechResourceGroup - name: 'speech-role-user' - params: { - principalId: principalId - roleDefinitionId: 'f2dc8367-1007-4938-bd23-fe263f013447' - principalType: principalType - } -} - -module storageRoleUser 'core/security/role.bicep' = { - scope: storageResourceGroup - name: 'storage-role-user' - params: { - principalId: principalId - roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' - principalType: principalType - } -} - -module storageContribRoleUser 'core/security/role.bicep' = { - scope: storageResourceGroup - name: 'storage-contrib-role-user' - params: { - principalId: principalId - roleDefinitionId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' - principalType: principalType - } -} - -module storageOwnerRoleUser 'core/security/role.bicep' = if (useUserUpload) { - scope: storageResourceGroup - name: 'storage-owner-role-user' - params: { - principalId: principalId - roleDefinitionId: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' - principalType: principalType - } -} - -module searchRoleUser 'core/security/role.bicep' = { - scope: searchServiceResourceGroup - name: 'search-role-user' - params: { - principalId: principalId - roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f' - principalType: principalType - } -} - -module searchContribRoleUser 'core/security/role.bicep' = { - scope: searchServiceResourceGroup - name: 'search-contrib-role-user' - params: { - principalId: principalId - roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' - principalType: principalType - } -} - -module searchSvcContribRoleUser 'core/security/role.bicep' = { - scope: searchServiceResourceGroup - name: 'search-svccontrib-role-user' - params: { - principalId: principalId - roleDefinitionId: '7ca78c08-252a-4471-8644-bb5ff32d4ba0' - principalType: principalType - } -} - -// SYSTEM IDENTITIES -module openAiRoleBackend 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi) { - scope: openAiResourceGroup - name: 'openai-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' - principalType: 'ServicePrincipal' - } -} - -module openAiRoleSearchService 'core/security/role.bicep' = if (isAzureOpenAiHost && deployAzureOpenAi && useIntegratedVectorization) { - scope: openAiResourceGroup - name: 'openai-role-searchservice' - params: { - principalId: searchService.outputs.principalId - roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' - principalType: 'ServicePrincipal' - } -} - -module storageRoleBackend 'core/security/role.bicep' = { - scope: storageResourceGroup - name: 'storage-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' - principalType: 'ServicePrincipal' - } -} - -module storageOwnerRoleBackend 'core/security/role.bicep' = if (useUserUpload) { - scope: storageResourceGroup - name: 'storage-owner-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' - principalType: 'ServicePrincipal' - } -} - -module storageRoleSearchService 'core/security/role.bicep' = if (useIntegratedVectorization) { - scope: storageResourceGroup - name: 'storage-role-searchservice' - params: { - principalId: searchService.outputs.principalId - roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' - principalType: 'ServicePrincipal' - } -} - -// Used to issue search queries -// https://learn.microsoft.com/azure/search/search-security-rbac -module searchRoleBackend 'core/security/role.bicep' = { - scope: searchServiceResourceGroup - name: 'search-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f' - principalType: 'ServicePrincipal' - } -} - -module speechRoleBackend 'core/security/role.bicep' = { - scope: speechResourceGroup - name: 'speech-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: 'f2dc8367-1007-4938-bd23-fe263f013447' - principalType: 'ServicePrincipal' - } -} - -/* -module isolation 'network-isolation.bicep' = { - name: 'networks' - scope: resourceGroup - params: { - location: location - tags: tags - vnetName: '${abbrs.virtualNetworks}${resourceToken}' - appServicePlanName: appServicePlan.outputs.name - usePrivateEndpoint: usePrivateEndpoint - } -} - */ - -var environmentData = environment() - -var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi) ? [{ - groupId: 'account' - dnsZoneName: 'privatelink.openai.azure.com' - resourceIds: concat( - [ openAi.outputs.resourceId ], - useGPT4V ? [ computerVision.outputs.resourceId ] : [], - !useLocalPdfParser ? [ documentIntelligence.outputs.resourceId ] : [] - ) -}] : [] -var otherPrivateEndpointConnections = usePrivateEndpoint ? [ - { - groupId: 'blob' - dnsZoneName: 'privatelink.blob.${environmentData.suffixes.storage}' - resourceIds: concat( - [ storage.outputs.id ], - useUserUpload ? [ userStorage.outputs.id ] : [] - ) - } - { - groupId: 'searchService' - dnsZoneName: 'privatelink.search.windows.net' - resourceIds: [ searchService.outputs.id ] - } - { - groupId: 'sites' - dnsZoneName: 'privatelink.azurewebsites.net' - resourceIds: [ backend.outputs.id ] - } -] : [] - - -/* -var privateEndpointConnections = concat(otherPrivateEndpointConnections, openAiPrivateEndpointConnection) - -module privateEndpoints 'private-endpoints.bicep' = if (usePrivateEndpoint) { - name: 'privateEndpoints' - scope: resourceGroup - params: { - location: location - tags: tags - resourceToken: resourceToken - privateEndpointConnections: privateEndpointConnections - applicationInsightsId: useApplicationInsights ? monitoring.outputs.applicationInsightsId : '' - logAnalyticsWorkspaceId: useApplicationInsights ? monitoring.outputs.logAnalyticsWorkspaceId : '' - vnetName: isolation.outputs.vnetName - vnetPeSubnetName: isolation.outputs.backendSubnetId - } -} - */ - -// Used to read index definitions (required when using authentication) -// https://learn.microsoft.com/azure/search/search-security-rbac -module searchReaderRoleBackend 'core/security/role.bicep' = if (useAuthentication) { - scope: searchServiceResourceGroup - name: 'search-reader-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' - principalType: 'ServicePrincipal' - } -} - -// Used to add/remove documents from index (required for user upload feature) -module searchContribRoleBackend 'core/security/role.bicep' = if (useUserUpload) { - scope: searchServiceResourceGroup - name: 'search-contrib-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' - principalType: 'ServicePrincipal' - } -} - -// For computer vision access by the backend -module computerVisionRoleBackend 'core/security/role.bicep' = if (useGPT4V) { - scope: computerVisionResourceGroup - name: 'computervision-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: 'ServicePrincipal' - } -} - -// For document intelligence access by the backend -module documentIntelligenceRoleBackend 'core/security/role.bicep' = if (useUserUpload) { - scope: documentIntelligenceResourceGroup - name: 'documentintelligence-role-backend' - params: { - principalId: backend.outputs.identityPrincipalId - roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' - principalType: 'ServicePrincipal' - } -} - -output AZURE_LOCATION string = location -output AZURE_TENANT_ID string = tenantId -output AZURE_AUTH_TENANT_ID string = authTenantId -output AZURE_RESOURCE_GROUP string = resourceGroup.name - -// Shared by all OpenAI deployments -output OPENAI_HOST string = openAiHost -output AZURE_OPENAI_EMB_MODEL_NAME string = embedding.modelName -output AZURE_OPENAI_CHATGPT_MODEL string = chatGpt.modelName -output AZURE_OPENAI_GPT4V_MODEL string = gpt4vModelName - -// Specific to Azure OpenAI -output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' -output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : '' -output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : '' -output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : '' -output AZURE_OPENAI_EMB_DEPLOYMENT string = isAzureOpenAiHost ? embedding.deploymentName : '' -output AZURE_OPENAI_GPT4V_DEPLOYMENT string = isAzureOpenAiHost ? gpt4vDeploymentName : '' - -output AZURE_SPEECH_SERVICE_ID string = useSpeechOutputAzure ? speech.outputs.resourceId : '' -output AZURE_SPEECH_SERVICE_LOCATION string = useSpeechOutputAzure ? speech.outputs.location : '' - -output AZURE_VISION_ENDPOINT string = useGPT4V ? computerVision.outputs.endpoint : '' - -output AZURE_DOCUMENTINTELLIGENCE_SERVICE string = documentIntelligence.outputs.name -output AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP string = documentIntelligenceResourceGroup.name - -output AZURE_SEARCH_INDEX string = searchIndexName -output AZURE_SEARCH_SERVICE string = searchService.outputs.name -output AZURE_SEARCH_SERVICE_RESOURCE_GROUP string = searchServiceResourceGroup.name -output AZURE_SEARCH_SEMANTIC_RANKER string = actualSearchServiceSemanticRankerLevel -output AZURE_SEARCH_SERVICE_ASSIGNED_USERID string = searchService.outputs.principalId - -output AZURE_STORAGE_ACCOUNT string = storage.outputs.name -output AZURE_STORAGE_CONTAINER string = storageContainerName -output AZURE_STORAGE_RESOURCE_GROUP string = storageResourceGroup.name - -output AZURE_USERSTORAGE_ACCOUNT string = useUserUpload ? userStorage.outputs.name : '' -output AZURE_USERSTORAGE_CONTAINER string = userStorageContainerName -output AZURE_USERSTORAGE_RESOURCE_GROUP string = storageResourceGroup.name - -output AZURE_USE_AUTHENTICATION bool = useAuthentication - -output BACKEND_URI string = backend.outputs.uri - -output AZURE_CONTAINER_ENVIRONMENT_NAME string = containerApps.outputs.environmentName -output AZURE_CONTAINER_REGISTRY_ENDPOINT string = containerApps.outputs.registryLoginServer -output AZURE_CONTAINER_REGISTRY_NAME string = containerApps.outputs.registryName - - -output SERVICE_WEB_IDENTITY_PRINCIPAL_ID string = backend.outputs.identityPrincipalId -output SERVICE_WEB_NAME string = backend.outputs.name -output SERVICE_WEB_URI string = backend.outputs.uri -output SERVICE_WEB_IMAGE_NAME string = backend.outputs.imageName - diff --git a/infra/main-aca.parameters.json b/infra/main-aca.parameters.json deleted file mode 100644 index 8b0a9a1567..0000000000 --- a/infra/main-aca.parameters.json +++ /dev/null @@ -1,243 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", - "contentVersion": "1.0.0.0", - "parameters": { - "environmentName": { - "value": "${AZURE_ENV_NAME}" - }, - "resourceGroupName": { - "value": "${AZURE_RESOURCE_GROUP}" - }, - "location": { - "value": "${AZURE_LOCATION}" - }, - "principalId": { - "value": "${AZURE_PRINCIPAL_ID}" - }, - "openAiServiceName": { - "value": "${AZURE_OPENAI_SERVICE}" - }, - "openAiResourceGroupName": { - "value": "${AZURE_OPENAI_RESOURCE_GROUP}" - }, - "openAiSkuName": { - "value": "S0" - }, - "computerVisionServiceName": { - "value": "${AZURE_COMPUTER_VISION_SERVICE}" - }, - "computerVisionResourceGroupName": { - "value": "${AZURE_COMPUTER_VISION_RESOURCE_GROUP}" - }, - "computerVisionResourceGroupLocation": { - "value": "${AZURE_COMPUTER_VISION_LOCATION=eastus}" - }, - "computerVisionSkuName": { - "value": "${AZURE_COMPUTER_VISION_SKU=S1}" - }, - "documentIntelligenceServiceName": { - "value": "${AZURE_DOCUMENTINTELLIGENCE_SERVICE}" - }, - "documentIntelligenceResourceGroupName": { - "value": "${AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP}" - }, - "documentIntelligenceSkuName": { - "value": "${AZURE_DOCUMENTINTELLIGENCE_SKU=S0}" - }, - "documentIntelligenceResourceGroupLocation": { - "value": "${AZURE_DOCUMENTINTELLIGENCE_LOCATION}" - }, - "searchIndexName": { - "value": "${AZURE_SEARCH_INDEX=gptkbindex}" - }, - "searchServiceName": { - "value": "${AZURE_SEARCH_SERVICE}" - }, - "searchServiceResourceGroupName": { - "value": "${AZURE_SEARCH_SERVICE_RESOURCE_GROUP}" - }, - "searchServiceLocation": { - "value": "${AZURE_SEARCH_SERVICE_LOCATION}" - }, - "searchServiceSkuName": { - "value": "${AZURE_SEARCH_SERVICE_SKU=standard}" - }, - "searchQueryLanguage": { - "value": "${AZURE_SEARCH_QUERY_LANGUAGE=en-us}" - }, - "searchQuerySpeller": { - "value": "${AZURE_SEARCH_QUERY_SPELLER=lexicon}" - }, - "searchServiceSemanticRankerLevel": { - "value": "${AZURE_SEARCH_SEMANTIC_RANKER=free}" - }, - "storageAccountName": { - "value": "${AZURE_STORAGE_ACCOUNT}" - }, - "storageResourceGroupName": { - "value": "${AZURE_STORAGE_RESOURCE_GROUP}" - }, - "storageSkuName": { - "value": "${AZURE_STORAGE_SKU=Standard_LRS}" - }, - "appServicePlanName": { - "value": "${AZURE_APP_SERVICE_PLAN}" - }, - "appServiceSkuName": { - "value": "${AZURE_APP_SERVICE_SKU=B1}" - }, - "backendServiceName": { - "value": "${AZURE_APP_SERVICE}" - }, - "chatGptModelName":{ - "value": "${AZURE_OPENAI_CHATGPT_MODEL}" - }, - "chatGptDeploymentName": { - "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT}" - }, - "chatGptDeploymentVersion":{ - "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION}" - }, - "chatGptDeploymentCapacity":{ - "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY}" - }, - "embeddingModelName":{ - "value": "${AZURE_OPENAI_EMB_MODEL_NAME}" - }, - "embeddingDeploymentName": { - "value": "${AZURE_OPENAI_EMB_DEPLOYMENT}" - }, - "embeddingDeploymentVersion":{ - "value": "${AZURE_OPENAI_EMB_DEPLOYMENT_VERSION}" - }, - "embeddingDeploymentCapacity":{ - "value": "${AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY}" - }, - "embeddingDimensions": { - "value": "${AZURE_OPENAI_EMB_DIMENSIONS}" - }, - "openAiHost": { - "value": "${OPENAI_HOST=azure}" - }, - "azureOpenAiCustomUrl":{ - "value": "${AZURE_OPENAI_CUSTOM_URL}" - }, - "azureOpenAiApiVersion":{ - "value": "${AZURE_OPENAI_API_VERSION}" - }, - "azureOpenAiApiKey":{ - "value": "${AZURE_OPENAI_API_KEY_OVERRIDE}" - }, - "openAiApiKey": { - "value": "${OPENAI_API_KEY}" - }, - "openAiApiOrganization": { - "value": "${OPENAI_ORGANIZATION}" - }, - "useApplicationInsights": { - "value": "${AZURE_USE_APPLICATION_INSIGHTS=true}" - }, - "applicationInsightsName": { - "value": "${AZURE_APPLICATION_INSIGHTS}" - }, - "applicationInsightsDashboardName": { - "value": "${AZURE_APPLICATION_INSIGHTS_DASHBOARD}" - }, - "logAnalyticsName": { - "value": "${AZURE_LOG_ANALYTICS}" - }, - "useVectors": { - "value": "${USE_VECTORS=true}" - }, - "useGPT4V": { - "value": "${USE_GPT4V=false}" - }, - "useSpeechInputBrowser": { - "value": "${USE_SPEECH_INPUT_BROWSER=false}" - }, - "useSpeechOutputBrowser": { - "value": "${USE_SPEECH_OUTPUT_BROWSER=false}" - }, - "useSpeechOutputAzure": { - "value": "${USE_SPEECH_OUTPUT_AZURE=false}" - }, - "speechServiceName": { - "value": "${AZURE_SPEECH_SERVICE}" - }, - "speechServiceSkuName": { - "value": "${AZURE_SPEECH_SERVICE_SKU=S0}" - }, - "speechServiceResourceGroupName": { - "value": "${AZURE_SPEECH_SERVICE_RESOURCE_GROUP}" - }, - "speechServiceLocation": { - "value": "${AZURE_SPEECH_SERVICE_LOCATION}" - }, - "useAuthentication": { - "value": "${AZURE_USE_AUTHENTICATION=false}" - }, - "enforceAccessControl": { - "value": "${AZURE_ENFORCE_ACCESS_CONTROL=false}" - }, - "enableGlobalDocuments": { - "value": "${AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS=false}" - }, - "enableUnauthenticatedAccess": { - "value": "${AZURE_ENABLE_UNAUTHENTICATED_ACCESS=false}" - }, - "tenantId": { - "value": "${AZURE_TENANT_ID}" - }, - "authTenantId": { - "value": "${AZURE_AUTH_TENANT_ID}" - }, - "serverAppId": { - "value": "${AZURE_SERVER_APP_ID}" - }, - "serverAppSecret": { - "value": "${AZURE_SERVER_APP_SECRET}" - }, - "clientAppId": { - "value": "${AZURE_CLIENT_APP_ID}" - }, - "clientAppSecret": { - "value": "${AZURE_CLIENT_APP_SECRET}" - }, - "allowedOrigin": { - "value": "${ALLOWED_ORIGIN}" - }, - "publicNetworkAccess": { - "value": "${AZURE_PUBLIC_NETWORK_ACCESS=Enabled}" - }, - "usePrivateEndpoint": { - "value": "${AZURE_USE_PRIVATE_ENDPOINT=false}" - }, - "bypass": { - "value": "${AZURE_NETWORK_BYPASS=AzureServices}" - }, - "useIntegratedVectorization": { - "value": "${USE_FEATURE_INT_VECTORIZATION}" - }, - "useUserUpload": { - "value": "${USE_USER_UPLOAD}" - }, - "useLocalPdfParser": { - "value": "${USE_LOCAL_PDF_PARSER}" - }, - "useLocalHtmlParser": { - "value": "${USE_LOCAL_HTML_PARSER}" - }, - "runningOnGh": { - "value": "${GITHUB_ACTIONS}" - }, - "runningOnAdo": { - "value": "${TF_BUILD}" - }, - "webAppExists": { - "value": "${SERVICE_WEB_RESOURCE_EXISTS=false}" - }, - "azureContainerAppsWorkloadProfile": { - "value": "${AZURE_CONTAINER_APPS_WORKLOAD_PROFILE=Consumption}" - } - } -} diff --git a/infra/main.bicep b/infra/main.bicep index f64c80321e..a63b2cc61f 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -191,6 +191,18 @@ param runningOnGh string = '' @description('Whether the deployment is running on Azure DevOps Pipeline') param runningOnAdo string = '' +@description('Used by azd for containerapps deployment') +param webAppExists bool + +@allowed(['Consumption', 'D4', 'D8', 'D16', 'D32', 'E4', 'E8', 'E16', 'E32', 'NC24-A100', 'NC48-A100', 'NC96-A100']) +param azureContainerAppsWorkloadProfile string + +@allowed(['appservice', 'containerapps']) +param deploymentTarget string = 'appservice' +param acaIdentityName string = deploymentTarget == 'containerapps' ? '${environmentName}-aca-identity' : '' +param acaManagedEnvironmentName string = deploymentTarget == 'containerapps' ? '${environmentName}-aca-env' : '' +param containerRegistryName string = deploymentTarget == 'containerapps' ? '${replace(environmentName, '-', '')}acr' : '' + // Organize resources in a resource group resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' @@ -246,7 +258,7 @@ module applicationInsightsDashboard 'backend-dashboard.bicep' = if (useApplicati } // Create an App Service Plan to group applications under the same payment plan and SKU -module appServicePlan 'core/host/appserviceplan.bicep' = { +module appServicePlan 'core/host/appserviceplan.bicep' = if (deploymentTarget == 'appservice') { name: 'appserviceplan' scope: resourceGroup params: { @@ -262,14 +274,15 @@ module appServicePlan 'core/host/appserviceplan.bicep' = { } // The application frontend -module backend 'core/host/appservice.bicep' = { +module backend 'core/host/appservice.bicep' = if (deploymentTarget == 'appservice') { name: 'web' scope: resourceGroup params: { name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesAppService}backend-${resourceToken}' location: location tags: union(tags, { 'azd-service-name': 'backend' }) - appServicePlanId: appServicePlan.outputs.id + // Need to check deploymentTarget again due to https://github.com/Azure/bicep/issues/3990 + appServicePlanId: deploymentTarget == 'appservice' ? appServicePlan.outputs.id : '' runtimeName: 'python' runtimeVersion: '3.11' appCommandLine: 'python3 -m gunicorn main:app' @@ -345,6 +358,114 @@ module backend 'core/host/appservice.bicep' = { } } +// Azure container apps resources + +// identity for pulling images from ACR +module acaIdentity 'core/security/aca-identity.bicep' = if (deploymentTarget == 'containerapps') { + name: acaIdentityName + scope: resourceGroup + params: { + identityName: acaIdentityName + location: location + } +} + +module containerApps 'core/host/container-apps.bicep' = if (deploymentTarget == 'containerapps') { + name: 'container-apps' + scope: resourceGroup + params: { + name: 'app' + location: location + workloadProfile: azureContainerAppsWorkloadProfile + containerAppsEnvironmentName: acaManagedEnvironmentName + containerRegistryName: '${containerRegistryName}${resourceToken}' + logAnalyticsWorkspaceResourceId: monitoring.outputs.logAnalyticsWorkspaceId + } +} + +// The application frontend +module acaBackend 'core/host/container-app-upsert.bicep' = if (deploymentTarget == 'containerapps') { + name: 'aca-web' + scope: resourceGroup + dependsOn: [ + containerApps + acaIdentity + ] + params: { + name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesContainerApps}backend-${resourceToken}' + location: location + identityName: acaIdentity.name + exists: webAppExists + workloadProfile: azureContainerAppsWorkloadProfile + containerRegistryName: containerApps.outputs.registryName + containerAppsEnvironmentName: containerApps.outputs.environmentName + identityType: 'UserAssigned' + tags: union(tags, { 'azd-service-name': 'backend' }) + targetPort: 8000 + containerCpuCoreCount: '2.0' + containerMemory: '4Gi' + allowedOrigins: [ allowedOrigin ] + env: { + AZURE_STORAGE_ACCOUNT: storage.outputs.name + AZURE_STORAGE_CONTAINER: storageContainerName + AZURE_SEARCH_INDEX: searchIndexName + AZURE_SEARCH_SERVICE: searchService.outputs.name + AZURE_SEARCH_SEMANTIC_RANKER: actualSearchServiceSemanticRankerLevel + AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' + AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage + AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller + APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' + AZURE_SPEECH_SERVICE_ID: useSpeechOutputAzure ? speech.outputs.resourceId : '' + AZURE_SPEECH_SERVICE_LOCATION: useSpeechOutputAzure ? speech.outputs.location : '' + USE_SPEECH_INPUT_BROWSER: useSpeechInputBrowser + USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser + USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure + // Shared by all OpenAI deployments + OPENAI_HOST: openAiHost + AZURE_OPENAI_EMB_MODEL_NAME: embedding.modelName + AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions + AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName + AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName + // Specific to Azure OpenAI + AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' + AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName + AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName + AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' + AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion + AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey + AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl + // Used only with non-Azure OpenAI deployments + OPENAI_API_KEY: openAiApiKey + OPENAI_ORGANIZATION: openAiApiOrganization + // Optional login and document level access control system + AZURE_USE_AUTHENTICATION: useAuthentication + AZURE_ENFORCE_ACCESS_CONTROL: enforceAccessControl + AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: enableGlobalDocuments + AZURE_ENABLE_UNAUTHENTICATED_ACCESS: enableUnauthenticatedAccess + AZURE_SERVER_APP_ID: serverAppId + AZURE_SERVER_APP_SECRET: serverAppSecret + AZURE_CLIENT_APP_ID: clientAppId + AZURE_CLIENT_APP_SECRET: clientAppSecret + AZURE_TENANT_ID: tenantId + AZURE_AUTH_TENANT_ID: tenantIdForAuth + AZURE_AUTHENTICATION_ISSUER_URI: authenticationIssuerUri + // CORS support, for frontends on other hosts + ALLOWED_ORIGIN: allowedOrigin + USE_VECTORS: useVectors + USE_GPT4V: useGPT4V + USE_USER_UPLOAD: useUserUpload + AZURE_USERSTORAGE_ACCOUNT: useUserUpload ? userStorage.outputs.name : '' + AZURE_USERSTORAGE_CONTAINER: useUserUpload ? userStorageContainerName : '' + AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name + USE_LOCAL_PDF_PARSER: useLocalPdfParser + USE_LOCAL_HTML_PARSER: useLocalHtmlParser + // For using managed identity to access Azure resources. See https://github.com/microsoft/azure-container-apps/issues/442 + AZURE_CLIENT_ID: acaIdentity.outputs.clientId + } + } +} + + var defaultOpenAiDeployments = [ { name: chatGpt.deploymentName @@ -638,7 +759,7 @@ module openAiRoleBackend 'core/security/role.bicep' = if (isAzureOpenAiHost && d scope: openAiResourceGroup name: 'openai-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' principalType: 'ServicePrincipal' } @@ -648,7 +769,7 @@ module openAiRoleSearchService 'core/security/role.bicep' = if (isAzureOpenAiHos scope: openAiResourceGroup name: 'openai-role-searchservice' params: { - principalId: searchService.outputs.principalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' principalType: 'ServicePrincipal' } @@ -658,7 +779,7 @@ module storageRoleBackend 'core/security/role.bicep' = { scope: storageResourceGroup name: 'storage-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' principalType: 'ServicePrincipal' } @@ -668,7 +789,7 @@ module storageOwnerRoleBackend 'core/security/role.bicep' = if (useUserUpload) { scope: storageResourceGroup name: 'storage-owner-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' principalType: 'ServicePrincipal' } @@ -678,7 +799,7 @@ module storageRoleSearchService 'core/security/role.bicep' = if (useIntegratedVe scope: storageResourceGroup name: 'storage-role-searchservice' params: { - principalId: searchService.outputs.principalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' principalType: 'ServicePrincipal' } @@ -690,7 +811,7 @@ module searchRoleBackend 'core/security/role.bicep' = { scope: searchServiceResourceGroup name: 'search-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f' principalType: 'ServicePrincipal' } @@ -700,7 +821,7 @@ module speechRoleBackend 'core/security/role.bicep' = { scope: speechResourceGroup name: 'speech-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'f2dc8367-1007-4938-bd23-fe263f013447' principalType: 'ServicePrincipal' } @@ -710,17 +831,19 @@ module isolation 'network-isolation.bicep' = { name: 'networks' scope: resourceGroup params: { + deploymentTarget: deploymentTarget location: location tags: tags vnetName: '${abbrs.virtualNetworks}${resourceToken}' - appServicePlanName: appServicePlan.outputs.name + // Need to check deploymentTarget due to https://github.com/Azure/bicep/issues/3990 + appServicePlanName: deploymentTarget == 'appservice' ? appServicePlan.outputs.name : '' usePrivateEndpoint: usePrivateEndpoint } } var environmentData = environment() -var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi) ? [{ +var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi && deploymentTarget == 'appservice') ? [{ groupId: 'account' dnsZoneName: 'privatelink.openai.azure.com' resourceIds: concat( @@ -729,7 +852,7 @@ var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi) ? !useLocalPdfParser ? [ documentIntelligence.outputs.resourceId ] : [] ) }] : [] -var otherPrivateEndpointConnections = usePrivateEndpoint ? [ +var otherPrivateEndpointConnections = (usePrivateEndpoint && deploymentTarget == 'appservice') ? [ { groupId: 'blob' dnsZoneName: 'privatelink.blob.${environmentData.suffixes.storage}' @@ -753,7 +876,7 @@ var otherPrivateEndpointConnections = usePrivateEndpoint ? [ var privateEndpointConnections = concat(otherPrivateEndpointConnections, openAiPrivateEndpointConnection) -module privateEndpoints 'private-endpoints.bicep' = if (usePrivateEndpoint) { +module privateEndpoints 'private-endpoints.bicep' = if (usePrivateEndpoint && deploymentTarget == 'appservice') { name: 'privateEndpoints' scope: resourceGroup params: { @@ -774,7 +897,7 @@ module searchReaderRoleBackend 'core/security/role.bicep' = if (useAuthenticatio scope: searchServiceResourceGroup name: 'search-reader-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' principalType: 'ServicePrincipal' } @@ -785,7 +908,7 @@ module searchContribRoleBackend 'core/security/role.bicep' = if (useUserUpload) scope: searchServiceResourceGroup name: 'search-contrib-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' principalType: 'ServicePrincipal' } @@ -796,7 +919,7 @@ module computerVisionRoleBackend 'core/security/role.bicep' = if (useGPT4V) { scope: computerVisionResourceGroup name: 'computervision-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' principalType: 'ServicePrincipal' } @@ -807,7 +930,7 @@ module documentIntelligenceRoleBackend 'core/security/role.bicep' = if (useUserU scope: documentIntelligenceResourceGroup name: 'documentintelligence-role-backend' params: { - principalId: backend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' principalType: 'ServicePrincipal' } @@ -856,4 +979,4 @@ output AZURE_USERSTORAGE_RESOURCE_GROUP string = storageResourceGroup.name output AZURE_USE_AUTHENTICATION bool = useAuthentication -output BACKEND_URI string = backend.outputs.uri +output BACKEND_URI string = deploymentTarget == 'appservice' ? backend.outputs.uri : acaBackend.outputs.uri diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 023cea7604..dc657ddd9d 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -238,6 +238,15 @@ }, "runningOnAdo": { "value": "${TF_BUILD}" + }, + "deploymentTarget": { + "value": "${DEPLOYMENT_TARGET=appservice}" + }, + "webAppExists": { + "value": "${SERVICE_WEB_RESOURCE_EXISTS=false}" + }, + "azureContainerAppsWorkloadProfile": { + "value": "${AZURE_CONTAINER_APPS_WORKLOAD_PROFILE=Consumption}" } } } diff --git a/infra/network-isolation.bicep b/infra/network-isolation.bicep index fcc69ba5ef..4dd1e49f86 100644 --- a/infra/network-isolation.bicep +++ b/infra/network-isolation.bicep @@ -14,7 +14,10 @@ param appServicePlanName string param usePrivateEndpoint bool = false -resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' existing = { +@allowed(['appservice', 'containerapps']) +param deploymentTarget string + +resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' existing = if (deploymentTarget == 'appservice') { name: appServicePlanName } From 9dc65ca1f0fd0866369aab39c986817f308cc3f9 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Mon, 9 Sep 2024 03:02:10 +0000 Subject: [PATCH 15/41] output AZURE_CONTAINER_REGISTRY_ENDPOINT --- infra/main.bicep | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/main.bicep b/infra/main.bicep index a63b2cc61f..a439df62fd 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -980,3 +980,4 @@ output AZURE_USERSTORAGE_RESOURCE_GROUP string = storageResourceGroup.name output AZURE_USE_AUTHENTICATION bool = useAuthentication output BACKEND_URI string = deploymentTarget == 'appservice' ? backend.outputs.uri : acaBackend.outputs.uri +output AZURE_CONTAINER_REGISTRY_ENDPOINT string = deploymentTarget == 'containerapps' ? containerApps.outputs.registryLoginServer : '' From 7f523a0473c00e7a98a95a9c2ca8af7352ebcb0c Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Mon, 9 Sep 2024 05:31:08 +0000 Subject: [PATCH 16/41] Fix deployment with app service --- infra/main.bicep | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/infra/main.bicep b/infra/main.bicep index a439df62fd..eb91e16cb1 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -362,7 +362,7 @@ module backend 'core/host/appservice.bicep' = if (deploymentTarget == 'appservic // identity for pulling images from ACR module acaIdentity 'core/security/aca-identity.bicep' = if (deploymentTarget == 'containerapps') { - name: acaIdentityName + name: 'aca-identity' scope: resourceGroup params: { identityName: acaIdentityName @@ -394,11 +394,11 @@ module acaBackend 'core/host/container-app-upsert.bicep' = if (deploymentTarget params: { name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesContainerApps}backend-${resourceToken}' location: location - identityName: acaIdentity.name + identityName: (deploymentTarget == 'containerapps') ? acaIdentity.name : '' exists: webAppExists workloadProfile: azureContainerAppsWorkloadProfile - containerRegistryName: containerApps.outputs.registryName - containerAppsEnvironmentName: containerApps.outputs.environmentName + containerRegistryName: (deploymentTarget == 'containerapps') ? containerApps.outputs.registryName : '' + containerAppsEnvironmentName: (deploymentTarget == 'containerapps') ? containerApps.outputs.environmentName : '' identityType: 'UserAssigned' tags: union(tags, { 'azd-service-name': 'backend' }) targetPort: 8000 @@ -460,7 +460,7 @@ module acaBackend 'core/host/container-app-upsert.bicep' = if (deploymentTarget USE_LOCAL_PDF_PARSER: useLocalPdfParser USE_LOCAL_HTML_PARSER: useLocalHtmlParser // For using managed identity to access Azure resources. See https://github.com/microsoft/azure-container-apps/issues/442 - AZURE_CLIENT_ID: acaIdentity.outputs.clientId + AZURE_CLIENT_ID: (deploymentTarget == 'containerapps') ? acaIdentity.outputs.clientId : '' } } } From 9e6e145e395595e90dbb194eb30afe90e3cc5251 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Mon, 9 Sep 2024 06:25:35 +0000 Subject: [PATCH 17/41] Improve naming and README --- README.md | 12 ++++++++++ aca-host/README.md | 31 -------------------------- containerapps/README.md | 22 ++++++++++++++++++ {aca-host => containerapps}/azure.yaml | 0 4 files changed, 34 insertions(+), 31 deletions(-) delete mode 100644 aca-host/README.md create mode 100644 containerapps/README.md rename {aca-host => containerapps}/azure.yaml (100%) diff --git a/README.md b/README.md index 411361f280..3009299c83 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,18 @@ It will look like the following: > NOTE: It may take 5-10 minutes after you see 'SUCCESS' for the application to be fully deployed. If you see a "Python Developer" welcome screen or an error page, then wait a bit and refresh the page. See [guide on debugging App Service deployments](docs/appservice.md). +### Deploying to Azure Container Apps + +By default, this project is deployed to Azure App Service. If you want to deploy to Azure Container Apps, please run: + +```bash +cd containerapps +azd env new +azd env set DEPLOYMENT_TARGET containerapps +azd up +``` +The `.azure/{env name}/.env` file can be found in the `containerapps` folder. + ### Deploying again If you've only changed the backend/frontend code in the `app` folder, then you don't need to re-provision the Azure resources. You can just run: diff --git a/aca-host/README.md b/aca-host/README.md deleted file mode 100644 index b1084417cc..0000000000 --- a/aca-host/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Deploying on Azure Container Apps - -Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here along with symbolic links to `app`,`data` and `scripts` folder. - -## For Linux/MacOS users - -If you are on Linux/MacOS, it should work without any extra settings to deploy to Azure Container Apps. Please use: - -```bash -cd aca-host -azd up -``` - -## For Windows users - -Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/windows/apps/get-started/enable-your-device-for-development) and symlinks for git before cloning this repo. -To enable symlinks for git, please use - -```bash -# local setting -git config core.symlinks true -# Alternatively, enable symlinks globally -git config --global core.symlinks true -``` - -Please ensure that the symlinks work correctly and then run: - -```bash -cd aca-host -azd up -``` diff --git a/containerapps/README.md b/containerapps/README.md new file mode 100644 index 0000000000..9c178ba1fd --- /dev/null +++ b/containerapps/README.md @@ -0,0 +1,22 @@ +# Deploying on Azure Container Apps + +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here. +To deploy to azure container apps, please run from project root folder: + +```bash +cd containerapps +azd env new +azd env set DEPLOYMENT_TARGET containerapps +azd up +``` + +## Customizing Workload Profile + +The default workload profile is Consumption. If you want to use a dedicated workload profile like D4, please run: + +```bash +azd env AZURE_CONTAINER_APPS_WORKLOAD_PROFILE D4 +``` + +For a full list of workload profiles, please check [here](https://learn.microsoft.com/azure/container-apps/workload-profiles-overview#profile-types). +Please note dedicated workload profiles have a different billing model than Consumption plan. Please check [here](https://learn.microsoft.com/azure/container-apps/billing) for details. diff --git a/aca-host/azure.yaml b/containerapps/azure.yaml similarity index 100% rename from aca-host/azure.yaml rename to containerapps/azure.yaml From 4ec32f7d3d2c0898c415f41e5164d27d4ed3e5f1 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Mon, 9 Sep 2024 08:23:41 +0000 Subject: [PATCH 18/41] Fix identity name and cost esitmation for aca --- README.md | 1 + infra/main.bicep | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3009299c83..fd549ca35b 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Pricing varies per region and usage, so it isn't possible to predict exact costs However, you can try the [Azure pricing calculator](https://azure.com/e/a87a169b256e43c089015fda8182ca87) for the resources below. - Azure App Service: Basic Tier with 1 CPU core, 1.75 GB RAM. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/app-service/linux/) +- Azure Container Apps: Consumption plan with 1 CPU core, 2.0 GB RAM. Pricing with Pay-as-You-Go. [Pricing](https://azure.microsoft.com/pricing/details/container-apps/) - Azure OpenAI: Standard tier, GPT and Ada models. Pricing per 1K tokens used, and at least 1K tokens are used per question. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/) - Azure AI Document Intelligence: SO (Standard) tier using pre-built layout. Pricing per document page, sample documents have 261 pages total. [Pricing](https://azure.microsoft.com/pricing/details/form-recognizer/) - Azure AI Search: Basic tier, 1 replica, free level of semantic search. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/search/) diff --git a/infra/main.bicep b/infra/main.bicep index eb91e16cb1..cdc1db2ec4 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -394,7 +394,7 @@ module acaBackend 'core/host/container-app-upsert.bicep' = if (deploymentTarget params: { name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesContainerApps}backend-${resourceToken}' location: location - identityName: (deploymentTarget == 'containerapps') ? acaIdentity.name : '' + identityName: (deploymentTarget == 'containerapps') ? acaIdentityName : '' exists: webAppExists workloadProfile: azureContainerAppsWorkloadProfile containerRegistryName: (deploymentTarget == 'containerapps') ? containerApps.outputs.registryName : '' @@ -402,8 +402,8 @@ module acaBackend 'core/host/container-app-upsert.bicep' = if (deploymentTarget identityType: 'UserAssigned' tags: union(tags, { 'azd-service-name': 'backend' }) targetPort: 8000 - containerCpuCoreCount: '2.0' - containerMemory: '4Gi' + containerCpuCoreCount: '1.0' + containerMemory: '2Gi' allowedOrigins: [ allowedOrigin ] env: { AZURE_STORAGE_ACCOUNT: storage.outputs.name From 4174fd37c68b7aaa068777d9c183eca9b5ba6323 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Tue, 10 Sep 2024 06:42:19 +0000 Subject: [PATCH 19/41] Share env vars in bicep and update docs --- CONTRIBUTING.md | 9 + README.md | 4 +- containerapps/README.md | 23 +- containerapps/azure.yaml | 2 + docs/azure_container_apps.md | 29 +-- infra/main.bicep | 395 ++++++++++++++++++----------------- 6 files changed, 231 insertions(+), 231 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index efffe0b9a8..bbf6de3aba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,6 +22,7 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio - [Running unit tests](#running-unit-tests) - [Running E2E tests](#running-e2e-tests) - [Code Style](#code-style) +- [Adding new azd environment variables](#add-new-azd-environment-variables) ## Code of Conduct @@ -160,3 +161,11 @@ python -m black ``` If you followed the steps above to install the pre-commit hooks, then you can just wait for those hooks to run `ruff` and `black` for you. + +## Adding new azd environment variables + +When adding new azd environment variables, please remember to update: +1. App Service's [azure.yaml](./azure.yaml) +2. Azure Container Apps' [azure.yaml](./containerapps/azure.yaml) +3. [ADO pipeline](.azdo/pipelines/azure-dev.yml). +4. [Github workflows](.github/workflows/azure-dev.yml) diff --git a/README.md b/README.md index fd549ca35b..13eccaa3a7 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Pricing varies per region and usage, so it isn't possible to predict exact costs However, you can try the [Azure pricing calculator](https://azure.com/e/a87a169b256e43c089015fda8182ca87) for the resources below. - Azure App Service: Basic Tier with 1 CPU core, 1.75 GB RAM. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/app-service/linux/) -- Azure Container Apps: Consumption plan with 1 CPU core, 2.0 GB RAM. Pricing with Pay-as-You-Go. [Pricing](https://azure.microsoft.com/pricing/details/container-apps/) +- Azure Container Apps: Only provisioned if you deploy to Azure Container Apps following instructions [here](docs/azure_container_apps.md). Consumption plan with 1 CPU core, 2.0 GB RAM. Pricing with Pay-as-You-Go. [Pricing](https://azure.microsoft.com/pricing/details/container-apps/) - Azure OpenAI: Standard tier, GPT and Ada models. Pricing per 1K tokens used, and at least 1K tokens are used per question. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/) - Azure AI Document Intelligence: SO (Standard) tier using pre-built layout. Pricing per document page, sample documents have 261 pages total. [Pricing](https://azure.microsoft.com/pricing/details/form-recognizer/) - Azure AI Search: Basic tier, 1 replica, free level of semantic search. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/search/) @@ -127,7 +127,7 @@ A related option is VS Code Dev Containers, which will open the project in your ## Deploying -Follow these steps to provision Azure resources and deploy the application code: +The steps below will provision Azure resources and deploy the application code to Azure App Service. To deploy to Azure Container Apps instead, follow [the container apps deployment guide](docs/azure_container_apps.md). 1. Login to your Azure account: diff --git a/containerapps/README.md b/containerapps/README.md index 9c178ba1fd..64e665e113 100644 --- a/containerapps/README.md +++ b/containerapps/README.md @@ -1,22 +1,3 @@ -# Deploying on Azure Container Apps +# Deploying to Azure Container Apps -Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here. -To deploy to azure container apps, please run from project root folder: - -```bash -cd containerapps -azd env new -azd env set DEPLOYMENT_TARGET containerapps -azd up -``` - -## Customizing Workload Profile - -The default workload profile is Consumption. If you want to use a dedicated workload profile like D4, please run: - -```bash -azd env AZURE_CONTAINER_APPS_WORKLOAD_PROFILE D4 -``` - -For a full list of workload profiles, please check [here](https://learn.microsoft.com/azure/container-apps/workload-profiles-overview#profile-types). -Please note dedicated workload profiles have a different billing model than Consumption plan. Please check [here](https://learn.microsoft.com/azure/container-apps/billing) for details. +Please see the [guide](../docs/azure_container_apps.md) for deploying to Azure Container Apps. diff --git a/containerapps/azure.yaml b/containerapps/azure.yaml index 0fe5abb3be..542cf58315 100644 --- a/containerapps/azure.yaml +++ b/containerapps/azure.yaml @@ -89,6 +89,8 @@ pipeline: - AZURE_ADLS_GEN2_STORAGE_ACCOUNT - AZURE_ADLS_GEN2_FILESYSTEM_PATH - AZURE_ADLS_GEN2_FILESYSTEM + - DEPLOYMENT_TARGET + - AZURE_CONTAINER_APPS_WORKLOAD_PROFILE secrets: - AZURE_SERVER_APP_SECRET - AZURE_CLIENT_APP_SECRET diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 521ec5f5d7..9c178ba1fd 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -1,31 +1,22 @@ # Deploying on Azure Container Apps -Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives in folder `aca-host` along with symbolic links to `app`,`data` and `scripts` folder. - -## For Linux/MacOS users - -If you are on Linux/MacOS, it should work without any extra settings to deploy on Azure Container Apps. Please use: +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here. +To deploy to azure container apps, please run from project root folder: ```bash -cd aca-host +cd containerapps +azd env new +azd env set DEPLOYMENT_TARGET containerapps azd up ``` -## For Windows users +## Customizing Workload Profile -Because Windows [doesn't enable symbolic links by default](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows), you may need to enable [Developer Mode](https://learn.microsoft.com/windows/apps/get-started/enable-your-device-for-development) and symlinks for git before cloning this repo. -To enable symlinks for git, please use +The default workload profile is Consumption. If you want to use a dedicated workload profile like D4, please run: ```bash -# local setting -git config core.symlinks true -# Alternatively, enable symlinks globally -git config --global core.symlinks true +azd env AZURE_CONTAINER_APPS_WORKLOAD_PROFILE D4 ``` -Please check whether the symlink works correctly and then run: - -```bash -cd aca-host -azd up -``` +For a full list of workload profiles, please check [here](https://learn.microsoft.com/azure/container-apps/workload-profiles-overview#profile-types). +Please note dedicated workload profiles have a different billing model than Consumption plan. Please check [here](https://learn.microsoft.com/azure/container-apps/billing) for details. diff --git a/infra/main.bicep b/infra/main.bicep index cdc1db2ec4..2c79b829b6 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -21,13 +21,15 @@ param searchServiceName string = '' // Set in main.parameters.json param searchServiceResourceGroupName string = '' // Set in main.parameters.json param searchServiceLocation string = '' // Set in main.parameters.json // The free tier does not support managed identity (required) or semantic search (optional) -@allowed([ 'free', 'basic', 'standard', 'standard2', 'standard3', 'storage_optimized_l1', 'storage_optimized_l2' ]) +@allowed(['free', 'basic', 'standard', 'standard2', 'standard3', 'storage_optimized_l1', 'storage_optimized_l2']) param searchServiceSkuName string // Set in main.parameters.json param searchIndexName string // Set in main.parameters.json param searchQueryLanguage string // Set in main.parameters.json param searchQuerySpeller string // Set in main.parameters.json param searchServiceSemanticRankerLevel string // Set in main.parameters.json -var actualSearchServiceSemanticRankerLevel = (searchServiceSkuName == 'free') ? 'disabled' : searchServiceSemanticRankerLevel +var actualSearchServiceSemanticRankerLevel = (searchServiceSkuName == 'free') + ? 'disabled' + : searchServiceSemanticRankerLevel param storageAccountName string = '' // Set in main.parameters.json param storageResourceGroupName string = '' // Set in main.parameters.json @@ -40,7 +42,7 @@ param userStorageContainerName string = 'user-content' param appServiceSkuName string // Set in main.parameters.json -@allowed([ 'azure', 'openai', 'azure_custom' ]) +@allowed(['azure', 'openai', 'azure_custom']) param openAiHost string // Set in main.parameters.json param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure') param deployAzureOpenAi bool = openAiHost == 'azure' @@ -58,7 +60,18 @@ param speechServiceSkuName string // Set in main.parameters.json param useGPT4V bool = false @description('Location for the OpenAI resource group') -@allowed([ 'canadaeast', 'eastus', 'eastus2', 'francecentral', 'switzerlandnorth', 'uksouth', 'japaneast', 'northcentralus', 'australiaeast', 'swedencentral' ]) +@allowed([ + 'canadaeast' + 'eastus' + 'eastus2' + 'francecentral' + 'switzerlandnorth' + 'uksouth' + 'japaneast' + 'northcentralus' + 'australiaeast' + 'swedencentral' +]) @metadata({ azd: { type: 'location' @@ -78,7 +91,7 @@ param documentIntelligenceResourceGroupName string = '' // Set in main.parameter // Limited regions for new version: // https://learn.microsoft.com/azure/ai-services/document-intelligence/concept-layout @description('Location for the Document Intelligence resource group') -@allowed([ 'eastus', 'westus2', 'westeurope' ]) +@allowed(['eastus', 'westus2', 'westeurope']) @metadata({ azd: { type: 'location' @@ -98,7 +111,9 @@ param chatGptDeploymentName string = '' param chatGptDeploymentVersion string = '' param chatGptDeploymentCapacity int = 0 var chatGpt = { - modelName: !empty(chatGptModelName) ? chatGptModelName : startsWith(openAiHost, 'azure') ? 'gpt-35-turbo' : 'gpt-3.5-turbo' + modelName: !empty(chatGptModelName) + ? chatGptModelName + : startsWith(openAiHost, 'azure') ? 'gpt-35-turbo' : 'gpt-3.5-turbo' deploymentName: !empty(chatGptDeploymentName) ? chatGptDeploymentName : 'chat' deploymentVersion: !empty(chatGptDeploymentVersion) ? chatGptDeploymentVersion : '0613' deploymentCapacity: chatGptDeploymentCapacity != 0 ? chatGptDeploymentCapacity : 30 @@ -143,12 +158,12 @@ param clientAppSecret string = '' // Used for optional CORS support for alternate frontends param allowedOrigin string = '' // should start with https://, shouldn't end with a / -@allowed([ 'None', 'AzureServices' ]) +@allowed(['None', 'AzureServices']) @description('If allowedIp is set, whether azure services are allowed to bypass the storage and AI services firewall.') param bypass string = 'AzureServices' @description('Public network access value for all deployed resources') -@allowed([ 'Enabled', 'Disabled' ]) +@allowed(['Enabled', 'Disabled']) param publicNetworkAccess string = 'Enabled' @description('Add a private endpoints for network connectivity') @@ -201,7 +216,9 @@ param azureContainerAppsWorkloadProfile string param deploymentTarget string = 'appservice' param acaIdentityName string = deploymentTarget == 'containerapps' ? '${environmentName}-aca-identity' : '' param acaManagedEnvironmentName string = deploymentTarget == 'containerapps' ? '${environmentName}-aca-env' : '' -param containerRegistryName string = deploymentTarget == 'containerapps' ? '${replace(environmentName, '-', '')}acr' : '' +param containerRegistryName string = deploymentTarget == 'containerapps' + ? '${replace(environmentName, '-', '')}acr' + : '' // Organize resources in a resource group resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { @@ -241,8 +258,12 @@ module monitoring 'core/monitor/monitoring.bicep' = if (useApplicationInsights) params: { location: location tags: tags - applicationInsightsName: !empty(applicationInsightsName) ? applicationInsightsName : '${abbrs.insightsComponents}${resourceToken}' - logAnalyticsName: !empty(logAnalyticsName) ? logAnalyticsName : '${abbrs.operationalInsightsWorkspaces}${resourceToken}' + applicationInsightsName: !empty(applicationInsightsName) + ? applicationInsightsName + : '${abbrs.insightsComponents}${resourceToken}' + logAnalyticsName: !empty(logAnalyticsName) + ? logAnalyticsName + : '${abbrs.operationalInsightsWorkspaces}${resourceToken}' publicNetworkAccess: publicNetworkAccess } } @@ -251,7 +272,9 @@ module applicationInsightsDashboard 'backend-dashboard.bicep' = if (useApplicati name: 'application-insights-dashboard' scope: resourceGroup params: { - name: !empty(applicationInsightsDashboardName) ? applicationInsightsDashboardName : '${abbrs.portalDashboards}${resourceToken}' + name: !empty(applicationInsightsDashboardName) + ? applicationInsightsDashboardName + : '${abbrs.portalDashboards}${resourceToken}' location: location applicationInsightsName: useApplicationInsights ? monitoring.outputs.applicationInsightsName : '' } @@ -273,6 +296,65 @@ module appServicePlan 'core/host/appserviceplan.bicep' = if (deploymentTarget == } } +var appEnvVariables = { + AZURE_STORAGE_ACCOUNT: storage.outputs.name + AZURE_STORAGE_CONTAINER: storageContainerName + AZURE_SEARCH_INDEX: searchIndexName + AZURE_SEARCH_SERVICE: searchService.outputs.name + AZURE_SEARCH_SEMANTIC_RANKER: actualSearchServiceSemanticRankerLevel + AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' + AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage + AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller + APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights + ? monitoring.outputs.applicationInsightsConnectionString + : '' + AZURE_SPEECH_SERVICE_ID: useSpeechOutputAzure ? speech.outputs.resourceId : '' + AZURE_SPEECH_SERVICE_LOCATION: useSpeechOutputAzure ? speech.outputs.location : '' + ENABLE_LANGUAGE_PICKER: enableLanguagePicker + USE_SPEECH_INPUT_BROWSER: useSpeechInputBrowser + USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser + USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure + // Shared by all OpenAI deployments + OPENAI_HOST: openAiHost + AZURE_OPENAI_EMB_MODEL_NAME: embedding.modelName + AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions + AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName + AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName + // Specific to Azure OpenAI + AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' + AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName + AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName + AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' + AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion + AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey + AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl + // Used only with non-Azure OpenAI deployments + OPENAI_API_KEY: openAiApiKey + OPENAI_ORGANIZATION: openAiApiOrganization + // Optional login and document level access control system + AZURE_USE_AUTHENTICATION: useAuthentication + AZURE_ENFORCE_ACCESS_CONTROL: enforceAccessControl + AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: enableGlobalDocuments + AZURE_ENABLE_UNAUTHENTICATED_ACCESS: enableUnauthenticatedAccess + AZURE_SERVER_APP_ID: serverAppId + AZURE_SERVER_APP_SECRET: serverAppSecret + AZURE_CLIENT_APP_ID: clientAppId + AZURE_CLIENT_APP_SECRET: clientAppSecret + AZURE_TENANT_ID: tenantId + AZURE_AUTH_TENANT_ID: tenantIdForAuth + AZURE_AUTHENTICATION_ISSUER_URI: authenticationIssuerUri + // CORS support, for frontends on other hosts + ALLOWED_ORIGIN: allowedOrigin + USE_VECTORS: useVectors + USE_GPT4V: useGPT4V + USE_USER_UPLOAD: useUserUpload + AZURE_USERSTORAGE_ACCOUNT: useUserUpload ? userStorage.outputs.name : '' + AZURE_USERSTORAGE_CONTAINER: useUserUpload ? userStorageContainerName : '' + AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name + USE_LOCAL_PDF_PARSER: useLocalPdfParser + USE_LOCAL_HTML_PARSER: useLocalHtmlParser +} + // The application frontend module backend 'core/host/appservice.bicep' = if (deploymentTarget == 'appservice') { name: 'web' @@ -290,7 +372,7 @@ module backend 'core/host/appservice.bicep' = if (deploymentTarget == 'appservic managedIdentity: true virtualNetworkSubnetId: isolation.outputs.appSubnetId publicNetworkAccess: publicNetworkAccess - allowedOrigins: [ allowedOrigin ] + allowedOrigins: [allowedOrigin] clientAppId: clientAppId serverAppId: serverAppId enableUnauthenticatedAccess: enableUnauthenticatedAccess @@ -299,62 +381,7 @@ module backend 'core/host/appservice.bicep' = if (deploymentTarget == 'appservic authenticationIssuerUri: authenticationIssuerUri use32BitWorkerProcess: appServiceSkuName == 'F1' alwaysOn: appServiceSkuName != 'F1' - appSettings: { - AZURE_STORAGE_ACCOUNT: storage.outputs.name - AZURE_STORAGE_CONTAINER: storageContainerName - AZURE_SEARCH_INDEX: searchIndexName - AZURE_SEARCH_SERVICE: searchService.outputs.name - AZURE_SEARCH_SEMANTIC_RANKER: actualSearchServiceSemanticRankerLevel - AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' - AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage - AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller - APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' - AZURE_SPEECH_SERVICE_ID: useSpeechOutputAzure ? speech.outputs.resourceId : '' - AZURE_SPEECH_SERVICE_LOCATION: useSpeechOutputAzure ? speech.outputs.location : '' - ENABLE_LANGUAGE_PICKER: enableLanguagePicker - USE_SPEECH_INPUT_BROWSER: useSpeechInputBrowser - USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser - USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure - // Shared by all OpenAI deployments - OPENAI_HOST: openAiHost - AZURE_OPENAI_EMB_MODEL_NAME: embedding.modelName - AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions - AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName - AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName - // Specific to Azure OpenAI - AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' - AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName - AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName - AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' - AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion - AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey - AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl - // Used only with non-Azure OpenAI deployments - OPENAI_API_KEY: openAiApiKey - OPENAI_ORGANIZATION: openAiApiOrganization - // Optional login and document level access control system - AZURE_USE_AUTHENTICATION: useAuthentication - AZURE_ENFORCE_ACCESS_CONTROL: enforceAccessControl - AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: enableGlobalDocuments - AZURE_ENABLE_UNAUTHENTICATED_ACCESS: enableUnauthenticatedAccess - AZURE_SERVER_APP_ID: serverAppId - AZURE_SERVER_APP_SECRET: serverAppSecret - AZURE_CLIENT_APP_ID: clientAppId - AZURE_CLIENT_APP_SECRET: clientAppSecret - AZURE_TENANT_ID: tenantId - AZURE_AUTH_TENANT_ID: tenantIdForAuth - AZURE_AUTHENTICATION_ISSUER_URI: authenticationIssuerUri - // CORS support, for frontends on other hosts - ALLOWED_ORIGIN: allowedOrigin - USE_VECTORS: useVectors - USE_GPT4V: useGPT4V - USE_USER_UPLOAD: useUserUpload - AZURE_USERSTORAGE_ACCOUNT: useUserUpload ? userStorage.outputs.name : '' - AZURE_USERSTORAGE_CONTAINER: useUserUpload ? userStorageContainerName : '' - AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name - USE_LOCAL_PDF_PARSER: useLocalPdfParser - USE_LOCAL_HTML_PARSER: useLocalHtmlParser - } + appSettings: appEnvVariables } } @@ -375,6 +402,7 @@ module containerApps 'core/host/container-apps.bicep' = if (deploymentTarget == scope: resourceGroup params: { name: 'app' + tags: tags location: location workloadProfile: azureContainerAppsWorkloadProfile containerAppsEnvironmentName: acaManagedEnvironmentName @@ -404,68 +432,14 @@ module acaBackend 'core/host/container-app-upsert.bicep' = if (deploymentTarget targetPort: 8000 containerCpuCoreCount: '1.0' containerMemory: '2Gi' - allowedOrigins: [ allowedOrigin ] - env: { - AZURE_STORAGE_ACCOUNT: storage.outputs.name - AZURE_STORAGE_CONTAINER: storageContainerName - AZURE_SEARCH_INDEX: searchIndexName - AZURE_SEARCH_SERVICE: searchService.outputs.name - AZURE_SEARCH_SEMANTIC_RANKER: actualSearchServiceSemanticRankerLevel - AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' - AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage - AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller - APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' - AZURE_SPEECH_SERVICE_ID: useSpeechOutputAzure ? speech.outputs.resourceId : '' - AZURE_SPEECH_SERVICE_LOCATION: useSpeechOutputAzure ? speech.outputs.location : '' - USE_SPEECH_INPUT_BROWSER: useSpeechInputBrowser - USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser - USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure - // Shared by all OpenAI deployments - OPENAI_HOST: openAiHost - AZURE_OPENAI_EMB_MODEL_NAME: embedding.modelName - AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions - AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName - AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName - // Specific to Azure OpenAI - AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' - AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName - AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName - AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' - AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion - AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey - AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl - // Used only with non-Azure OpenAI deployments - OPENAI_API_KEY: openAiApiKey - OPENAI_ORGANIZATION: openAiApiOrganization - // Optional login and document level access control system - AZURE_USE_AUTHENTICATION: useAuthentication - AZURE_ENFORCE_ACCESS_CONTROL: enforceAccessControl - AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS: enableGlobalDocuments - AZURE_ENABLE_UNAUTHENTICATED_ACCESS: enableUnauthenticatedAccess - AZURE_SERVER_APP_ID: serverAppId - AZURE_SERVER_APP_SECRET: serverAppSecret - AZURE_CLIENT_APP_ID: clientAppId - AZURE_CLIENT_APP_SECRET: clientAppSecret - AZURE_TENANT_ID: tenantId - AZURE_AUTH_TENANT_ID: tenantIdForAuth - AZURE_AUTHENTICATION_ISSUER_URI: authenticationIssuerUri - // CORS support, for frontends on other hosts - ALLOWED_ORIGIN: allowedOrigin - USE_VECTORS: useVectors - USE_GPT4V: useGPT4V - USE_USER_UPLOAD: useUserUpload - AZURE_USERSTORAGE_ACCOUNT: useUserUpload ? userStorage.outputs.name : '' - AZURE_USERSTORAGE_CONTAINER: useUserUpload ? userStorageContainerName : '' - AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name - USE_LOCAL_PDF_PARSER: useLocalPdfParser - USE_LOCAL_HTML_PARSER: useLocalHtmlParser + allowedOrigins: [allowedOrigin] + env: union(appEnvVariables, { // For using managed identity to access Azure resources. See https://github.com/microsoft/azure-container-apps/issues/442 AZURE_CLIENT_ID: (deploymentTarget == 'containerapps') ? acaIdentity.outputs.clientId : '' - } + }) } } - var defaultOpenAiDeployments = [ { name: chatGpt.deploymentName @@ -493,20 +467,25 @@ var defaultOpenAiDeployments = [ } ] -var openAiDeployments = concat(defaultOpenAiDeployments, useGPT4V ? [ - { - name: gpt4vDeploymentName - model: { - format: 'OpenAI' - name: gpt4vModelName - version: gpt4vModelVersion - } - sku: { - name: 'Standard' - capacity: gpt4vDeploymentCapacity - } - } - ] : []) +var openAiDeployments = concat( + defaultOpenAiDeployments, + useGPT4V + ? [ + { + name: gpt4vDeploymentName + model: { + format: 'OpenAI' + name: gpt4vModelName + version: gpt4vModelVersion + } + sku: { + name: 'Standard' + capacity: gpt4vDeploymentCapacity + } + } + ] + : [] +) module openAi 'br/public:avm/res/cognitive-services/account:0.5.4' = if (isAzureOpenAiHost && deployAzureOpenAi) { name: 'openai' @@ -516,7 +495,9 @@ module openAi 'br/public:avm/res/cognitive-services/account:0.5.4' = if (isAzure location: openAiResourceGroupLocation tags: tags kind: 'OpenAI' - customSubDomainName: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' + customSubDomainName: !empty(openAiServiceName) + ? openAiServiceName + : '${abbrs.cognitiveServicesAccounts}${resourceToken}' publicNetworkAccess: publicNetworkAccess networkAcls: { defaultAction: 'Allow' @@ -534,9 +515,13 @@ module documentIntelligence 'br/public:avm/res/cognitive-services/account:0.5.4' name: 'documentintelligence' scope: documentIntelligenceResourceGroup params: { - name: !empty(documentIntelligenceServiceName) ? documentIntelligenceServiceName : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' + name: !empty(documentIntelligenceServiceName) + ? documentIntelligenceServiceName + : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' kind: 'FormRecognizer' - customSubDomainName: !empty(documentIntelligenceServiceName) ? documentIntelligenceServiceName : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' + customSubDomainName: !empty(documentIntelligenceServiceName) + ? documentIntelligenceServiceName + : '${abbrs.cognitiveServicesDocumentIntelligence}${resourceToken}' publicNetworkAccess: publicNetworkAccess networkAcls: { defaultAction: 'Allow' @@ -577,7 +562,9 @@ module speech 'br/public:avm/res/cognitive-services/account:0.5.4' = if (useSpee networkAcls: { defaultAction: 'Allow' } - customSubDomainName: !empty(speechServiceName) ? speechServiceName : '${abbrs.cognitiveServicesSpeech}${resourceToken}' + customSubDomainName: !empty(speechServiceName) + ? speechServiceName + : '${abbrs.cognitiveServicesSpeech}${resourceToken}' location: !empty(speechServiceLocation) ? speechServiceLocation : location tags: tags sku: speechServiceSkuName @@ -595,8 +582,10 @@ module searchService 'core/search/search-services.bicep' = { name: searchServiceSkuName } semanticSearch: actualSearchServiceSemanticRankerLevel - publicNetworkAccess: publicNetworkAccess == 'Enabled' ? 'enabled' : (publicNetworkAccess == 'Disabled' ? 'disabled' : null) - sharedPrivateLinkStorageAccounts: usePrivateEndpoint ? [ storage.outputs.id ] : [] + publicNetworkAccess: publicNetworkAccess == 'Enabled' + ? 'enabled' + : (publicNetworkAccess == 'Disabled' ? 'disabled' : null) + sharedPrivateLinkStorageAccounts: usePrivateEndpoint ? [storage.outputs.id] : [] } } @@ -640,7 +629,9 @@ module userStorage 'core/storage/storage-account.bicep' = if (useUserUpload) { name: 'user-storage' scope: storageResourceGroup params: { - name: !empty(userStorageAccountName) ? userStorageAccountName : 'user${abbrs.storageStorageAccounts}${resourceToken}' + name: !empty(userStorageAccountName) + ? userStorageAccountName + : 'user${abbrs.storageStorageAccounts}${resourceToken}' location: storageResourceGroupLocation tags: tags publicNetworkAccess: publicNetworkAccess @@ -759,7 +750,9 @@ module openAiRoleBackend 'core/security/role.bicep' = if (isAzureOpenAiHost && d scope: openAiResourceGroup name: 'openai-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' principalType: 'ServicePrincipal' } @@ -769,7 +762,9 @@ module openAiRoleSearchService 'core/security/role.bicep' = if (isAzureOpenAiHos scope: openAiResourceGroup name: 'openai-role-searchservice' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' principalType: 'ServicePrincipal' } @@ -779,7 +774,9 @@ module storageRoleBackend 'core/security/role.bicep' = { scope: storageResourceGroup name: 'storage-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' principalType: 'ServicePrincipal' } @@ -789,7 +786,9 @@ module storageOwnerRoleBackend 'core/security/role.bicep' = if (useUserUpload) { scope: storageResourceGroup name: 'storage-owner-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' principalType: 'ServicePrincipal' } @@ -799,7 +798,9 @@ module storageRoleSearchService 'core/security/role.bicep' = if (useIntegratedVe scope: storageResourceGroup name: 'storage-role-searchservice' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1' principalType: 'ServicePrincipal' } @@ -811,7 +812,9 @@ module searchRoleBackend 'core/security/role.bicep' = { scope: searchServiceResourceGroup name: 'search-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: '1407120a-92aa-4202-b7e9-c0e197c71c8f' principalType: 'ServicePrincipal' } @@ -821,7 +824,9 @@ module speechRoleBackend 'core/security/role.bicep' = { scope: speechResourceGroup name: 'speech-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'f2dc8367-1007-4938-bd23-fe263f013447' principalType: 'ServicePrincipal' } @@ -843,36 +848,38 @@ module isolation 'network-isolation.bicep' = { var environmentData = environment() -var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi && deploymentTarget == 'appservice') ? [{ - groupId: 'account' - dnsZoneName: 'privatelink.openai.azure.com' - resourceIds: concat( - [ openAi.outputs.resourceId ], - useGPT4V ? [ computerVision.outputs.resourceId ] : [], - !useLocalPdfParser ? [ documentIntelligence.outputs.resourceId ] : [] - ) -}] : [] -var otherPrivateEndpointConnections = (usePrivateEndpoint && deploymentTarget == 'appservice') ? [ - { - groupId: 'blob' - dnsZoneName: 'privatelink.blob.${environmentData.suffixes.storage}' - resourceIds: concat( - [ storage.outputs.id ], - useUserUpload ? [ userStorage.outputs.id ] : [] - ) - } - { - groupId: 'searchService' - dnsZoneName: 'privatelink.search.windows.net' - resourceIds: [ searchService.outputs.id ] - } - { - groupId: 'sites' - dnsZoneName: 'privatelink.azurewebsites.net' - resourceIds: [ backend.outputs.id ] - } -] : [] - +var openAiPrivateEndpointConnection = (isAzureOpenAiHost && deployAzureOpenAi && deploymentTarget == 'appservice') + ? [ + { + groupId: 'account' + dnsZoneName: 'privatelink.openai.azure.com' + resourceIds: concat( + [openAi.outputs.resourceId], + useGPT4V ? [computerVision.outputs.resourceId] : [], + !useLocalPdfParser ? [documentIntelligence.outputs.resourceId] : [] + ) + } + ] + : [] +var otherPrivateEndpointConnections = (usePrivateEndpoint && deploymentTarget == 'appservice') + ? [ + { + groupId: 'blob' + dnsZoneName: 'privatelink.blob.${environmentData.suffixes.storage}' + resourceIds: concat([storage.outputs.id], useUserUpload ? [userStorage.outputs.id] : []) + } + { + groupId: 'searchService' + dnsZoneName: 'privatelink.search.windows.net' + resourceIds: [searchService.outputs.id] + } + { + groupId: 'sites' + dnsZoneName: 'privatelink.azurewebsites.net' + resourceIds: [backend.outputs.id] + } + ] + : [] var privateEndpointConnections = concat(otherPrivateEndpointConnections, openAiPrivateEndpointConnection) @@ -897,7 +904,9 @@ module searchReaderRoleBackend 'core/security/role.bicep' = if (useAuthenticatio scope: searchServiceResourceGroup name: 'search-reader-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' principalType: 'ServicePrincipal' } @@ -908,7 +917,9 @@ module searchContribRoleBackend 'core/security/role.bicep' = if (useUserUpload) scope: searchServiceResourceGroup name: 'search-contrib-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' principalType: 'ServicePrincipal' } @@ -919,7 +930,9 @@ module computerVisionRoleBackend 'core/security/role.bicep' = if (useGPT4V) { scope: computerVisionResourceGroup name: 'computervision-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' principalType: 'ServicePrincipal' } @@ -930,7 +943,9 @@ module documentIntelligenceRoleBackend 'core/security/role.bicep' = if (useUserU scope: documentIntelligenceResourceGroup name: 'documentintelligence-role-backend' params: { - principalId: (deploymentTarget == 'appservice') ? backend.outputs.identityPrincipalId : acaBackend.outputs.identityPrincipalId + principalId: (deploymentTarget == 'appservice') + ? backend.outputs.identityPrincipalId + : acaBackend.outputs.identityPrincipalId roleDefinitionId: 'a97b65f3-24c7-4388-baec-2e87135dc908' principalType: 'ServicePrincipal' } @@ -980,4 +995,6 @@ output AZURE_USERSTORAGE_RESOURCE_GROUP string = storageResourceGroup.name output AZURE_USE_AUTHENTICATION bool = useAuthentication output BACKEND_URI string = deploymentTarget == 'appservice' ? backend.outputs.uri : acaBackend.outputs.uri -output AZURE_CONTAINER_REGISTRY_ENDPOINT string = deploymentTarget == 'containerapps' ? containerApps.outputs.registryLoginServer : '' +output AZURE_CONTAINER_REGISTRY_ENDPOINT string = deploymentTarget == 'containerapps' + ? containerApps.outputs.registryLoginServer + : '' From 7e49c99540bbaeba7b641552a7f6d5499c9d1d66 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Fri, 13 Sep 2024 05:37:58 +0000 Subject: [PATCH 20/41] Revert "remove symlinks and update scripts with paths relative to its own folder instead of cwd" This reverts commit 40287f2977a168009b2be383388a89b9f905959a. --- containerapps/app | 1 + containerapps/azure.yaml | 8 ++++---- containerapps/data | 1 + containerapps/scripts | 1 + scripts/adlsgen2setup.ps1 | 4 ++-- scripts/adlsgen2setup.sh | 9 ++------- scripts/auth_init.ps1 | 7 +++---- scripts/auth_init.sh | 11 +++-------- scripts/auth_update.ps1 | 7 +++---- scripts/auth_update.sh | 10 +++------- scripts/load_python_env.ps1 | 3 +-- scripts/load_python_env.sh | 8 ++------ scripts/loadenv.ps1 | 6 ++---- scripts/loadenv.sh | 8 ++------ scripts/manageacl.ps1 | 4 +--- scripts/manageacl.sh | 8 ++------ scripts/prepdocs.ps1 | 8 ++++---- scripts/prepdocs.sh | 10 ++-------- 18 files changed, 39 insertions(+), 75 deletions(-) create mode 120000 containerapps/app create mode 120000 containerapps/data create mode 120000 containerapps/scripts diff --git a/containerapps/app b/containerapps/app new file mode 120000 index 0000000000..5df94d993a --- /dev/null +++ b/containerapps/app @@ -0,0 +1 @@ +../app \ No newline at end of file diff --git a/containerapps/azure.yaml b/containerapps/azure.yaml index 542cf58315..a2463f215c 100644 --- a/containerapps/azure.yaml +++ b/containerapps/azure.yaml @@ -98,22 +98,22 @@ hooks: preprovision: windows: shell: pwsh - run: ../scripts/auth_init.ps1 + run: ./scripts/auth_init.ps1 interactive: true continueOnError: false posix: shell: sh - run: ../scripts/auth_init.sh + run: ./scripts/auth_init.sh interactive: true continueOnError: false postprovision: windows: shell: pwsh - run: ../scripts/auth_update.ps1; ../scripts/prepdocs.ps1 + run: ./scripts/auth_update.ps1;./scripts/prepdocs.ps1 interactive: true continueOnError: false posix: shell: sh - run: ../scripts/auth_update.sh; ../scripts/prepdocs.sh + run: ./scripts/auth_update.sh;./scripts/prepdocs.sh interactive: true continueOnError: false diff --git a/containerapps/data b/containerapps/data new file mode 120000 index 0000000000..4909e06efb --- /dev/null +++ b/containerapps/data @@ -0,0 +1 @@ +../data \ No newline at end of file diff --git a/containerapps/scripts b/containerapps/scripts new file mode 120000 index 0000000000..a339954dff --- /dev/null +++ b/containerapps/scripts @@ -0,0 +1 @@ +../scripts \ No newline at end of file diff --git a/scripts/adlsgen2setup.ps1 b/scripts/adlsgen2setup.ps1 index b0a567f775..e6b80c0d46 100644 --- a/scripts/adlsgen2setup.ps1 +++ b/scripts/adlsgen2setup.ps1 @@ -1,7 +1,6 @@ ## Set the preference to stop on the first error $ErrorActionPreference = "Stop" -$projectRoot = Split-Path -Parent $PSScriptRoot & $PSScriptRoot\loadenv.ps1 $venvPythonPath = "./.venv/scripts/python.exe" @@ -16,4 +15,5 @@ if ([string]::IsNullOrEmpty($env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT)) { } Write-Host 'Running "adlsgen2setup.py"' -Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/adlsgen2setup.py `"$projectRoot/data`" --data-access-control $projectRoot/scripts/sampleacls.json --storage-account $env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT -v" -Wait -NoNewWindow +$cwd = (Get-Location) +Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/adlsgen2setup.py `"$cwd/data`" --data-access-control ./scripts/sampleacls.json --storage-account $env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT -v" -Wait -NoNewWindow diff --git a/scripts/adlsgen2setup.sh b/scripts/adlsgen2setup.sh index d40722846c..6118a5e8e6 100755 --- a/scripts/adlsgen2setup.sh +++ b/scripts/adlsgen2setup.sh @@ -1,11 +1,6 @@ #!/bin/sh -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -script_dir="$project_root/scripts" -data_dir="$project_root/data" - -. $script_dir/loadenv.sh +. ./scripts/loadenv.sh if [ -n "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" ]; then echo 'AZURE_ADLS_GEN2_STORAGE_ACCOUNT must be set to continue' @@ -14,4 +9,4 @@ fi echo 'Running "adlsgen2setup.py"' -./.venv/bin/python $script_dir/adlsgen2setup.py "$data_dir/*" --data-access-control "$script_dir/sampleacls.json" --storage-account "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" -v +./.venv/bin/python ./scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sampleacls.json' --storage-account "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" -v diff --git a/scripts/auth_init.ps1 b/scripts/auth_init.ps1 index 8f725b1c1d..872cf5ab6e 100755 --- a/scripts/auth_init.ps1 +++ b/scripts/auth_init.ps1 @@ -1,11 +1,10 @@ -$projectRoot = Split-Path -Parent $PSScriptRoot -. $projectRoot/scripts/load_azd_env.ps1 +. ./scripts/load_azd_env.ps1 if (-not $env:AZURE_USE_AUTHENTICATION) { Exit 0 } -. $projectRoot/scripts/load_python_env.ps1 +. ./scripts/load_python_env.ps1 $venvPythonPath = "./.venv/scripts/python.exe" if (Test-Path -Path "/usr") { @@ -13,4 +12,4 @@ if (Test-Path -Path "/usr") { $venvPythonPath = "./.venv/bin/python" } -Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/auth_init.py" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/auth_init.py" -Wait -NoNewWindow diff --git a/scripts/auth_init.sh b/scripts/auth_init.sh index 0066731793..bd7cfff552 100755 --- a/scripts/auth_init.sh +++ b/scripts/auth_init.sh @@ -1,13 +1,8 @@ #!/bin/sh -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -script_dir="$project_root/scripts" -data_dir="$project_root/data" - echo "Checking if authentication should be setup..." -. $script_dir/load_azd_env.sh +. ./scripts/load_azd_env.sh if [ -z "$AZURE_USE_AUTHENTICATION" ]; then echo "AZURE_USE_AUTHENTICATION is not set, skipping authentication setup." @@ -16,6 +11,6 @@ fi echo "AZURE_USE_AUTHENTICATION is set, proceeding with authentication setup..." -. $script_dir/load_python_env.sh +. ./scripts/load_python_env.sh -./.venv/bin/python $script_dir/auth_init.py +./.venv/bin/python ./scripts/auth_init.py diff --git a/scripts/auth_update.ps1 b/scripts/auth_update.ps1 index f13e78d2b6..1dbf7efee2 100644 --- a/scripts/auth_update.ps1 +++ b/scripts/auth_update.ps1 @@ -1,11 +1,10 @@ -$projectRoot = Split-Path -Parent $PSScriptRoot -. $projectRoot/scripts/load_azd_env.ps1 +. ./scripts/load_azd_env.ps1 if (-not $env:AZURE_USE_AUTHENTICATION) { Exit 0 } -. $projectRoot/scripts/load_python_env.ps1 +. ./scripts/load_python_env.ps1 $venvPythonPath = "./.venv/scripts/python.exe" if (Test-Path -Path "/usr") { @@ -13,4 +12,4 @@ if (Test-Path -Path "/usr") { $venvPythonPath = "./.venv/bin/python" } -Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/auth_update.py" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/auth_update.py" -Wait -NoNewWindow diff --git a/scripts/auth_update.sh b/scripts/auth_update.sh index 1c71c9a4d4..7b64995f75 100755 --- a/scripts/auth_update.sh +++ b/scripts/auth_update.sh @@ -1,15 +1,11 @@ #!/bin/sh -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -script_dir="$project_root/scripts" - -. $script_dir/load_azd_env.sh +. ./scripts/load_azd_env.sh if [ -z "$AZURE_USE_AUTHENTICATION" ]; then exit 0 fi -. $script_dir/load_python_env.sh +. ./scripts/load_python_env.sh -./.venv/bin/python $script_dir/auth_update.py +./.venv/bin/python ./scripts/auth_update.py diff --git a/scripts/load_python_env.ps1 b/scripts/load_python_env.ps1 index 4b2ae12d36..d13af9328d 100644 --- a/scripts/load_python_env.ps1 +++ b/scripts/load_python_env.ps1 @@ -1,4 +1,3 @@ -$projectRoot = Split-Path -Parent $PSScriptRoot $pythonCmd = Get-Command python -ErrorAction SilentlyContinue if (-not $pythonCmd) { # fallback to python3 if python not found @@ -15,4 +14,4 @@ if (Test-Path -Path "/usr") { } Write-Host 'Installing dependencies from "requirements.txt" into virtual environment' -Start-Process -FilePath $venvPythonPath -ArgumentList "-m pip install -r $projectRoot/app/backend/requirements.txt" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "-m pip install -r app/backend/requirements.txt" -Wait -NoNewWindow diff --git a/scripts/load_python_env.sh b/scripts/load_python_env.sh index bd7fb679cf..49622244d2 100755 --- a/scripts/load_python_env.sh +++ b/scripts/load_python_env.sh @@ -1,11 +1,7 @@ -#!/bin/sh - -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -app_dir="$project_root/app" + #!/bin/sh echo 'Creating Python virtual environment "app/backend/.venv"...' python3 -m venv .venv echo 'Installing dependencies from "requirements.txt" into virtual environment (in quiet mode)...' -.venv/bin/python -m pip --quiet --disable-pip-version-check install -r $app_dir/backend/requirements.txt +.venv/bin/python -m pip --quiet --disable-pip-version-check install -r app/backend/requirements.txt diff --git a/scripts/loadenv.ps1 b/scripts/loadenv.ps1 index 26a62fe86f..7b285a0f95 100644 --- a/scripts/loadenv.ps1 +++ b/scripts/loadenv.ps1 @@ -1,5 +1,3 @@ -$projectRoot = Split-Path -Parent $PSScriptRoot +./scripts/load_azd_env.ps1 -& $projectRoot/scripts/load_azd_env.ps1 - -& $projectRoot/scripts/load_python_env.ps1 +./scripts/load_python_env.ps1 diff --git a/scripts/loadenv.sh b/scripts/loadenv.sh index e667889bf7..152687e2c6 100755 --- a/scripts/loadenv.sh +++ b/scripts/loadenv.sh @@ -1,9 +1,5 @@ #!/bin/sh -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -script_dir="$project_root/scripts" +. ./scripts/load_azd_env.sh -. $script_dir/load_azd_env.sh - -. $script_dir/load_python_env.sh +. ./scripts/load_python_env.sh diff --git a/scripts/manageacl.ps1 b/scripts/manageacl.ps1 index 7ce96ef0dd..ca963e5189 100644 --- a/scripts/manageacl.ps1 +++ b/scripts/manageacl.ps1 @@ -9,7 +9,5 @@ if (Test-Path -Path "/usr") { $venvPythonPath = "./.venv/bin/python" } -$projectRoot = Split-Path -Parent $PSScriptRoot - Write-Host "Running manageacl.py. Arguments to script: $args" -Start-Process -FilePath $venvPythonPath -ArgumentList "$projectRoot/scripts/manageacl.py --search-service $env:AZURE_SEARCH_SERVICE --index $env:AZURE_SEARCH_INDEX $args" -Wait -NoNewWindow +Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/manageacl.py --search-service $env:AZURE_SEARCH_SERVICE --index $env:AZURE_SEARCH_INDEX $args" -Wait -NoNewWindow diff --git a/scripts/manageacl.sh b/scripts/manageacl.sh index 74ac943677..556dd35eeb 100755 --- a/scripts/manageacl.sh +++ b/scripts/manageacl.sh @@ -1,10 +1,6 @@ #!/bin/sh -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -script_dir="$project_root/scripts" - -. $script_dir/loadenv.sh +. ./scripts/loadenv.sh echo "Running manageacl.py. Arguments to script: $@" - ./.venv/bin/python $script_dir/manageacl.py --search-service "$AZURE_SEARCH_SERVICE" --index "$AZURE_SEARCH_INDEX" $@ + ./.venv/bin/python ./scripts/manageacl.py --search-service "$AZURE_SEARCH_SERVICE" --index "$AZURE_SEARCH_INDEX" $@ diff --git a/scripts/prepdocs.ps1 b/scripts/prepdocs.ps1 index 4985a1bbda..0d6e4aed5f 100755 --- a/scripts/prepdocs.ps1 +++ b/scripts/prepdocs.ps1 @@ -1,5 +1,4 @@ -$projectRoot = Split-Path -Parent $PSScriptRoot -& $projectRoot/scripts/loadenv.ps1 +./scripts/loadenv.ps1 $venvPythonPath = "./.venv/scripts/python.exe" if (Test-Path -Path "/usr") { @@ -72,13 +71,14 @@ if ($env:AZURE_OPENAI_API_KEY_OVERRIDE) { $openaiApiKeyArg = "--openaikey $env:OPENAI_API_KEY" } -$dataArg = "`"$projectRoot/data/*`"" +$cwd = (Get-Location) +$dataArg = "`"$cwd/data/*`"" $additionalArgs = "" if ($args) { $additionalArgs = "$args" } -$argumentList = "$projectRoot/app/backend/prepdocs.py $dataArg --verbose " + ` +$argumentList = "./app/backend/prepdocs.py $dataArg --verbose " + ` "--subscriptionid $env:AZURE_SUBSCRIPTION_ID " + ` "--storageaccount $env:AZURE_STORAGE_ACCOUNT --container $env:AZURE_STORAGE_CONTAINER --storageresourcegroup $env:AZURE_STORAGE_RESOURCE_GROUP " + ` "--searchservice $env:AZURE_SEARCH_SERVICE --index $env:AZURE_SEARCH_INDEX " + ` diff --git a/scripts/prepdocs.sh b/scripts/prepdocs.sh index d0ee76fbbf..c750eb1bfc 100755 --- a/scripts/prepdocs.sh +++ b/scripts/prepdocs.sh @@ -1,11 +1,6 @@ #!/bin/sh -# Get the project root of the current script -project_root="$(cd "$(dirname $(dirname $0))" && pwd)" -script_dir="$project_root/scripts" -data_dir="$project_root/data" - -. $script_dir/loadenv.sh +. ./scripts/loadenv.sh echo 'Running "prepdocs.py"' @@ -79,8 +74,7 @@ if [ $# -gt 0 ]; then additionalArgs="$@" fi - -./.venv/bin/python $app_dir/backend/prepdocs.py "$data_dir/*" --verbose \ +./.venv/bin/python ./app/backend/prepdocs.py './data/*' --verbose \ --subscriptionid $AZURE_SUBSCRIPTION_ID \ --storageaccount "$AZURE_STORAGE_ACCOUNT" --container "$AZURE_STORAGE_CONTAINER" --storageresourcegroup $AZURE_STORAGE_RESOURCE_GROUP \ --searchservice "$AZURE_SEARCH_SERVICE" --index "$AZURE_SEARCH_INDEX" \ From 259e7a5ee4df8e41db4f8ce8d699b0e6c9032a0c Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Fri, 13 Sep 2024 06:10:10 +0000 Subject: [PATCH 21/41] Add containerapps as a commented out host option --- .azdo/pipelines/azure-dev.yml | 2 + .github/workflows/azure-dev.yml | 2 + CONTRIBUTING.md | 5 +- README.md | 12 ---- azure.yaml | 6 +- containerapps/README.md | 3 - containerapps/app | 1 - containerapps/azure.yaml | 119 -------------------------------- containerapps/data | 1 - containerapps/scripts | 1 - docs/azure_container_apps.md | 17 ++++- 11 files changed, 25 insertions(+), 144 deletions(-) delete mode 100644 containerapps/README.md delete mode 120000 containerapps/app delete mode 100644 containerapps/azure.yaml delete mode 120000 containerapps/data delete mode 120000 containerapps/scripts diff --git a/.azdo/pipelines/azure-dev.yml b/.azdo/pipelines/azure-dev.yml index bb5da8069f..3498c25dc7 100644 --- a/.azdo/pipelines/azure-dev.yml +++ b/.azdo/pipelines/azure-dev.yml @@ -109,6 +109,8 @@ steps: AZURE_ADLS_GEN2_STORAGE_ACCOUNT: $(AZURE_ADLS_GEN2_STORAGE_ACCOUNT) AZURE_ADLS_GEN2_FILESYSTEM_PATH: $(AZURE_ADLS_GEN2_FILESYSTEM_PATH) AZURE_ADLS_GEN2_FILESYSTEM: $(AZURE_ADLS_GEN2_FILESYSTEM) + DEPLOYMENT_TARGET: $(DEPLOYMENT_TARGET) + AZURE_CONTAINER_APPS_WORKLOAD_PROFILE: $(AZURE_CONTAINER_APPS_WORKLOAD_PROFILE) - task: AzureCLI@2 displayName: Deploy Application diff --git a/.github/workflows/azure-dev.yml b/.github/workflows/azure-dev.yml index 819d6cff1d..d414609eb1 100644 --- a/.github/workflows/azure-dev.yml +++ b/.github/workflows/azure-dev.yml @@ -93,6 +93,8 @@ jobs: AZURE_ADLS_GEN2_STORAGE_ACCOUNT: ${{ vars.AZURE_ADLS_GEN2_STORAGE_ACCOUNT }} AZURE_ADLS_GEN2_FILESYSTEM_PATH: ${{ vars.AZURE_ADLS_GEN2_FILESYSTEM_PATH }} AZURE_ADLS_GEN2_FILESYSTEM: ${{ vars.AZURE_ADLS_GEN2_FILESYSTEM }} + DEPLOYMENT_TARGET: ${{ vars.DEPLOYMENT_TARGET }} + AZURE_CONTAINER_APPS_WORKLOAD_PROFILE: ${{ vars.AZURE_CONTAINER_APPS_WORKLOAD_PROFILE }} steps: - name: Checkout diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bbf6de3aba..d734be650d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -166,6 +166,5 @@ If you followed the steps above to install the pre-commit hooks, then you can ju When adding new azd environment variables, please remember to update: 1. App Service's [azure.yaml](./azure.yaml) -2. Azure Container Apps' [azure.yaml](./containerapps/azure.yaml) -3. [ADO pipeline](.azdo/pipelines/azure-dev.yml). -4. [Github workflows](.github/workflows/azure-dev.yml) +1. [ADO pipeline](.azdo/pipelines/azure-dev.yml). +1. [Github workflows](.github/workflows/azure-dev.yml) diff --git a/README.md b/README.md index 13eccaa3a7..7d1ca200f8 100644 --- a/README.md +++ b/README.md @@ -154,18 +154,6 @@ It will look like the following: > NOTE: It may take 5-10 minutes after you see 'SUCCESS' for the application to be fully deployed. If you see a "Python Developer" welcome screen or an error page, then wait a bit and refresh the page. See [guide on debugging App Service deployments](docs/appservice.md). -### Deploying to Azure Container Apps - -By default, this project is deployed to Azure App Service. If you want to deploy to Azure Container Apps, please run: - -```bash -cd containerapps -azd env new -azd env set DEPLOYMENT_TARGET containerapps -azd up -``` -The `.azure/{env name}/.env` file can be found in the `containerapps` folder. - ### Deploying again If you've only changed the backend/frontend code in the `app` folder, then you don't need to re-provision the Azure resources. You can just run: diff --git a/azure.yaml b/azure.yaml index 4deeba5c78..f03a1d4c46 100644 --- a/azure.yaml +++ b/azure.yaml @@ -7,9 +7,11 @@ services: backend: project: ./app/backend language: py + # Please check docs/azure_container_apps.md for more information on how to deploy to azure container apps + # host: containerapp host: appservice hooks: - prepackage: + prebuild: windows: shell: pwsh run: cd ../frontend;npm install;npm run build @@ -86,6 +88,8 @@ pipeline: - AZURE_ADLS_GEN2_STORAGE_ACCOUNT - AZURE_ADLS_GEN2_FILESYSTEM_PATH - AZURE_ADLS_GEN2_FILESYSTEM + - DEPLOYMENT_TARGET + - AZURE_CONTAINER_APPS_WORKLOAD_PROFILE secrets: - AZURE_SERVER_APP_SECRET - AZURE_CLIENT_APP_SECRET diff --git a/containerapps/README.md b/containerapps/README.md deleted file mode 100644 index 64e665e113..0000000000 --- a/containerapps/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Deploying to Azure Container Apps - -Please see the [guide](../docs/azure_container_apps.md) for deploying to Azure Container Apps. diff --git a/containerapps/app b/containerapps/app deleted file mode 120000 index 5df94d993a..0000000000 --- a/containerapps/app +++ /dev/null @@ -1 +0,0 @@ -../app \ No newline at end of file diff --git a/containerapps/azure.yaml b/containerapps/azure.yaml deleted file mode 100644 index a2463f215c..0000000000 --- a/containerapps/azure.yaml +++ /dev/null @@ -1,119 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json - -name: azure-search-openai-demo -metadata: - template: azure-search-openai-demo@0.0.2-beta -services: - backend: - project: ../app/backend - language: py - host: containerapp - hooks: - prebuild: - windows: - shell: pwsh - run: cd ../frontend;npm install;npm run build - interactive: false - continueOnError: false - posix: - shell: sh - run: cd ../frontend;npm install;npm run build - interactive: false - continueOnError: false -infra: - provider: bicep - path: ../infra - -pipeline: - variables: - - AZURE_OPENAI_SERVICE - - AZURE_OPENAI_API_VERSION - - AZURE_OPENAI_RESOURCE_GROUP - - AZURE_DOCUMENTINTELLIGENCE_SERVICE - - AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP - - AZURE_DOCUMENTINTELLIGENCE_SKU - - AZURE_DOCUMENTINTELLIGENCE_LOCATION - - AZURE_SEARCH_INDEX - - AZURE_SEARCH_SERVICE - - AZURE_SEARCH_SERVICE_RESOURCE_GROUP - - AZURE_SEARCH_SERVICE_LOCATION - - AZURE_SEARCH_SERVICE_SKU - - AZURE_SEARCH_QUERY_LANGUAGE - - AZURE_SEARCH_QUERY_SPELLER - - AZURE_SEARCH_SEMANTIC_RANKER - - AZURE_STORAGE_ACCOUNT - - AZURE_STORAGE_RESOURCE_GROUP - - AZURE_STORAGE_SKU - - AZURE_APP_SERVICE - - AZURE_APP_SERVICE_SKU - - AZURE_APP_SERVICE_PLAN - - AZURE_OPENAI_CHATGPT_MODEL - - AZURE_OPENAI_CHATGPT_DEPLOYMENT - - AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY - - AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION - - AZURE_OPENAI_EMB_MODEL_NAME - - AZURE_OPENAI_EMB_DEPLOYMENT - - AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY - - AZURE_OPENAI_EMB_DEPLOYMENT_VERSION - - AZURE_OPENAI_EMB_DIMENSIONS - - OPENAI_HOST - - OPENAI_API_KEY - - OPENAI_ORGANIZATION - - AZURE_USE_APPLICATION_INSIGHTS - - AZURE_APPLICATION_INSIGHTS - - AZURE_APPLICATION_INSIGHTS_DASHBOARD - - AZURE_LOG_ANALYTICS - - USE_VECTORS - - USE_GPT4V - - AZURE_VISION_ENDPOINT - - VISION_SECRET_NAME - - AZURE_COMPUTER_VISION_SERVICE - - AZURE_COMPUTER_VISION_RESOURCE_GROUP - - AZURE_COMPUTER_VISION_LOCATION - - AZURE_COMPUTER_VISION_SKU - - USE_SPEECH_INPUT_BROWSER - - USE_SPEECH_OUTPUT_BROWSER - - USE_SPEECH_OUTPUT_AZURE - - AZURE_SPEECH_SERVICE - - AZURE_SPEECH_SERVICE_RESOURCE_GROUP - - AZURE_SPEECH_SERVICE_LOCATION - - AZURE_SPEECH_SERVICE_SKU - - AZURE_KEY_VAULT_NAME - - AZURE_USE_AUTHENTICATION - - AZURE_ENFORCE_ACCESS_CONTROL - - AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS - - AZURE_AUTH_TENANT_ID - - AZURE_SERVER_APP_ID - - AZURE_CLIENT_APP_ID - - ALLOWED_ORIGIN - - AZURE_ADLS_GEN2_STORAGE_ACCOUNT - - AZURE_ADLS_GEN2_FILESYSTEM_PATH - - AZURE_ADLS_GEN2_FILESYSTEM - - DEPLOYMENT_TARGET - - AZURE_CONTAINER_APPS_WORKLOAD_PROFILE - secrets: - - AZURE_SERVER_APP_SECRET - - AZURE_CLIENT_APP_SECRET -hooks: - preprovision: - windows: - shell: pwsh - run: ./scripts/auth_init.ps1 - interactive: true - continueOnError: false - posix: - shell: sh - run: ./scripts/auth_init.sh - interactive: true - continueOnError: false - postprovision: - windows: - shell: pwsh - run: ./scripts/auth_update.ps1;./scripts/prepdocs.ps1 - interactive: true - continueOnError: false - posix: - shell: sh - run: ./scripts/auth_update.sh;./scripts/prepdocs.sh - interactive: true - continueOnError: false diff --git a/containerapps/data b/containerapps/data deleted file mode 120000 index 4909e06efb..0000000000 --- a/containerapps/data +++ /dev/null @@ -1 +0,0 @@ -../data \ No newline at end of file diff --git a/containerapps/scripts b/containerapps/scripts deleted file mode 120000 index a339954dff..0000000000 --- a/containerapps/scripts +++ /dev/null @@ -1 +0,0 @@ -../scripts \ No newline at end of file diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 9c178ba1fd..59f5ae1776 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -1,12 +1,23 @@ # Deploying on Azure Container Apps -Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, the azure.yaml file for deploying to Azure Container Apps lives here. -To deploy to azure container apps, please run from project root folder: +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, there could be only one host option in the [azure.yaml](../azure.yaml) file. +By default, `host: appservice` is used and `host: containerapp` is commented out. + +To deploy to azure container apps, please follow the following steps: + +1. Comment out `host: appservice` and uncomment `host: containerapp` in the [azure.yaml](../azure.yaml) file. + +1. Run ```bash -cd containerapps +# Login to your azure account +azd auth login + +# Create a new env azd env new +# Set deployment target to containerapps azd env set DEPLOYMENT_TARGET containerapps + azd up ``` From 920e97947ce1cf26a69495769b944816c5db8432 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 13 Sep 2024 09:26:33 -0700 Subject: [PATCH 22/41] Update app/backend/.dockerignore --- app/backend/.dockerignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/backend/.dockerignore b/app/backend/.dockerignore index 13a9b73747..9008115fc8 100644 --- a/app/backend/.dockerignore +++ b/app/backend/.dockerignore @@ -4,4 +4,4 @@ __pycache__ *.pyo *.pyd .Python -env \ No newline at end of file +env From eb09e4614d5f8e426ec110118faa25eeae6c0bbf Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 13 Sep 2024 09:46:20 -0700 Subject: [PATCH 23/41] Apply suggestions from code review --- docs/azure_container_apps.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 59f5ae1776..937f6a79e6 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -1,9 +1,9 @@ # Deploying on Azure Container Apps -Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of azd, there could be only one host option in the [azure.yaml](../azure.yaml) file. +Due to [a limitation](https://github.com/Azure/azure-dev/issues/2736) of the Azure Developer CLI (`azd`), there can be only one host option in the [azure.yaml](../azure.yaml) file. By default, `host: appservice` is used and `host: containerapp` is commented out. -To deploy to azure container apps, please follow the following steps: +To deploy to Azure Container Apps, please follow the following steps: 1. Comment out `host: appservice` and uncomment `host: containerapp` in the [azure.yaml](../azure.yaml) file. From 13021cbac4d42a38de3d008f99cb80afacbef83c Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 13 Sep 2024 10:24:57 -0700 Subject: [PATCH 24/41] More steps for deployment guide --- README.md | 6 +++--- docs/azure_container_apps.md | 38 ++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 4abc0af106..e7e77713e6 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Pricing varies per region and usage, so it isn't possible to predict exact costs However, you can try the [Azure pricing calculator](https://azure.com/e/a87a169b256e43c089015fda8182ca87) for the resources below. - Azure App Service: Basic Tier with 1 CPU core, 1.75 GB RAM. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/app-service/linux/) -- Azure Container Apps: Only provisioned if you deploy to Azure Container Apps following instructions [here](docs/azure_container_apps.md). Consumption plan with 1 CPU core, 2.0 GB RAM. Pricing with Pay-as-You-Go. [Pricing](https://azure.microsoft.com/pricing/details/container-apps/) +- Azure Container Apps: Only provisioned if you deploy to Azure Container Apps following [the ACA deployment guide](docs/azure_container_apps.md). Consumption plan with 1 CPU core, 2.0 GB RAM. Pricing with Pay-as-You-Go. [Pricing](https://azure.microsoft.com/pricing/details/container-apps/) - Azure OpenAI: Standard tier, GPT and Ada models. Pricing per 1K tokens used, and at least 1K tokens are used per question. [Pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/) - Azure AI Document Intelligence: SO (Standard) tier using pre-built layout. Pricing per document page, sample documents have 261 pages total. [Pricing](https://azure.microsoft.com/pricing/details/form-recognizer/) - Azure AI Search: Basic tier, 1 replica, free level of semantic search. Pricing per hour. [Pricing](https://azure.microsoft.com/pricing/details/search/) @@ -135,9 +135,9 @@ The steps below will provision Azure resources and deploy the application code t azd auth login ``` - For GitHub Codespaces users, if the previous command fails, try: + For GitHub Codespaces users, if the previous command fails, try: ```shell - azd auth login --use-device-code + azd auth login --use-device-code ``` 1. Create a new azd environment: diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 937f6a79e6..8f675d280c 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -7,19 +7,37 @@ To deploy to Azure Container Apps, please follow the following steps: 1. Comment out `host: appservice` and uncomment `host: containerapp` in the [azure.yaml](../azure.yaml) file. -1. Run +2. Login to your Azure account: -```bash -# Login to your azure account -azd auth login + ```bash + azd auth login + ``` -# Create a new env -azd env new -# Set deployment target to containerapps -azd env set DEPLOYMENT_TARGET containerapps +3. Create a new `azd` environment to store the deployment parameters: -azd up -``` + ```bash + azd env new + ``` + + Enter a name that will be used for the resource group. + This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward. + +4. Set the deployment target to `containerapps`: + + ```bash + azd env set DEPLOYMENT_TARGET containerapps + ``` + +5. (Optional) This is the point where you can customize the deployment by setting other `azd1 environment variables, in order to [use existing resources](docs/deploy_existing.md), [enable optional features (such as auth or vision)](docs/deploy_features.md), or [deploy to free tiers](docs/deploy_lowcost.md). +6. Provision the resources and deploy the code: + + ```bash + azd up + ``` + + This will provision Azure resources and deploy this sample to those resources, including building the search index based on the files found in the `./data` folder. + + **Important**: Beware that the resources created by this command will incur immediate costs, primarily from the AI Search resource. These resources may accrue costs even if you interrupt the command before it is fully executed. You can run `azd down` or delete the resources manually to avoid unnecessary spending. ## Customizing Workload Profile From 8b19702f716368b75ad81ba7a2a9dc32da3223d2 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 13 Sep 2024 10:30:37 -0700 Subject: [PATCH 25/41] Update azure.yaml --- azure.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure.yaml b/azure.yaml index f03a1d4c46..6a999fd140 100644 --- a/azure.yaml +++ b/azure.yaml @@ -7,7 +7,7 @@ services: backend: project: ./app/backend language: py - # Please check docs/azure_container_apps.md for more information on how to deploy to azure container apps + # Please check docs/azure_container_apps.md for more information on how to deploy to Azure Container Apps # host: containerapp host: appservice hooks: From d49f60c60c45229e33ca80f17d04b9133d959bb9 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 13 Sep 2024 11:15:14 -0700 Subject: [PATCH 26/41] Update comment --- infra/main.bicep | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/main.bicep b/infra/main.bicep index 328058d744..ae63e049f7 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -385,9 +385,9 @@ module backend 'core/host/appservice.bicep' = if (deploymentTarget == 'appservic } } -// Azure container apps resources +// Azure container apps resources (Only deployed if deploymentTarget is 'containerapps') -// identity for pulling images from ACR +// User-assigned identity for pulling images from ACR module acaIdentity 'core/security/aca-identity.bicep' = if (deploymentTarget == 'containerapps') { name: 'aca-identity' scope: resourceGroup From 11837ba7c24252201c5e2702489644ead2be4760 Mon Sep 17 00:00:00 2001 From: yefuwang <1yefuwang1@gmail.com> Date: Sat, 14 Sep 2024 03:02:25 +0000 Subject: [PATCH 27/41] cleanup bicep files and improve docs --- docs/azure_container_apps.md | 4 ++++ infra/core/host/container-apps.bicep | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/azure_container_apps.md b/docs/azure_container_apps.md index 8f675d280c..9fb1854007 100644 --- a/docs/azure_container_apps.md +++ b/docs/azure_container_apps.md @@ -49,3 +49,7 @@ azd env AZURE_CONTAINER_APPS_WORKLOAD_PROFILE D4 For a full list of workload profiles, please check [here](https://learn.microsoft.com/azure/container-apps/workload-profiles-overview#profile-types). Please note dedicated workload profiles have a different billing model than Consumption plan. Please check [here](https://learn.microsoft.com/azure/container-apps/billing) for details. + +## Private endpoints + +Private endpoints is still in private preview for Azure Conainer Apps and not supported for now. diff --git a/infra/core/host/container-apps.bicep b/infra/core/host/container-apps.bicep index 7b64f5b101..81646daba0 100644 --- a/infra/core/host/container-apps.bicep +++ b/infra/core/host/container-apps.bicep @@ -52,7 +52,6 @@ module containerAppsEnvironment 'br/public:avm/res/app/managed-environment:0.5.2 // Non-required parameters infrastructureResourceGroupName: containerRegistryResourceGroupName infrastructureSubnetId: virtualNetworkSubnetId - // internal: true location: location tags: tags zoneRedundant: false From 560076bf8491cde1e046207dbf1425f56e34b816 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 13 Sep 2024 21:46:51 -0700 Subject: [PATCH 28/41] Update condition for running in production for credential --- app/backend/app.py | 5 ++++- docs/appservice.md | 10 ++++++---- infra/main.bicep | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index aea6a52765..9a84b91b1b 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -440,11 +440,14 @@ async def setup_clients(): USE_SPEECH_OUTPUT_BROWSER = os.getenv("USE_SPEECH_OUTPUT_BROWSER", "").lower() == "true" USE_SPEECH_OUTPUT_AZURE = os.getenv("USE_SPEECH_OUTPUT_AZURE", "").lower() == "true" + # WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep + RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None + # Use the current user identity for keyless authentication to Azure services. # This assumes you use 'azd auth login' locally, and managed identity when deployed on Azure. # The managed identity is setup in the infra/ folder. azure_credential: Union[AzureDeveloperCliCredential, ManagedIdentityCredential] - if os.getenv("WEBSITE_HOSTNAME"): # Environment variable set on Azure Web Apps + if RUNNING_ON_AZURE: current_app.logger.info("Setting up Azure credential using ManagedIdentityCredential") azure_credential = ManagedIdentityCredential() elif AZURE_TENANT_ID: diff --git a/docs/appservice.md b/docs/appservice.md index 60fffe7cb5..0fbba03279 100644 --- a/docs/appservice.md +++ b/docs/appservice.md @@ -631,15 +631,17 @@ To see any exceptions and server errors, navigate to the _Investigate -> Failure ## Configuring log levels -By default, the deployed app only logs messages with a level of `WARNING` or higher. +By default, the deployed app only logs messages from packages with a level of `WARNING` or higher, +but logs all messages from the app with a level of `INFO` or higher. These lines of code in `app/backend/app.py` configure the logging level: ```python +# Set root level to WARNING to avoid seeing overly verbose logs from SDKS +logging.basicConfig(level=logging.WARNING) +# Set the app logger level to INFO by default default_level = "INFO" -if os.getenv("WEBSITE_HOSTNAME"): # In production, don't log as heavily - default_level = "WARNING" -logging.basicConfig(level=os.getenv("APP_LOG_LEVEL", default_level)) +app.logger.setLevel(os.getenv("APP_LOG_LEVEL", default_level)) ``` To change the default level, either change `default_level` or set the `APP_LOG_LEVEL` environment variable diff --git a/infra/main.bicep b/infra/main.bicep index ae63e049f7..04c4832c10 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -353,6 +353,7 @@ var appEnvVariables = { AZURE_DOCUMENTINTELLIGENCE_SERVICE: documentIntelligence.outputs.name USE_LOCAL_PDF_PARSER: useLocalPdfParser USE_LOCAL_HTML_PARSER: useLocalHtmlParser + RUNNING_IN_PRODUCTION: 'true' } // App Service for the web application (Python Quart app with JS frontend) From 59e01c8da75ef8b9414915a85f2d2b572f41e413 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Mon, 23 Sep 2024 08:55:59 -0600 Subject: [PATCH 29/41] Refactors to scripts --- app/backend/app.py | 21 +- .../approaches/chatreadretrieveread.py | 2 +- app/backend/approaches/retrievethenread.py | 3 +- app/backend/load_azd_env.py | 23 ++ app/backend/main.py | 9 + app/backend/prepdocs.py | 217 ++++++------------ app/backend/prepdocslib/blobmanager.py | 2 +- app/backend/prepdocslib/embeddings.py | 2 +- app/backend/prepdocslib/filestrategy.py | 2 +- app/backend/prepdocslib/htmlparser.py | 2 +- .../integratedvectorizerstrategy.py | 2 +- app/backend/prepdocslib/listfilestrategy.py | 2 +- app/backend/prepdocslib/pdfparser.py | 2 +- app/backend/prepdocslib/searchmanager.py | 2 +- app/backend/prepdocslib/textsplitter.py | 2 +- app/start.sh | 16 -- azure.yaml | 2 + docs/deploy_features.md | 4 +- docs/login_and_acl.md | 12 +- samples/document-security/README.md | 100 +++++--- scripts/adlsgen2setup.ps1 | 19 -- scripts/adlsgen2setup.py | 38 +-- scripts/adlsgen2setup.sh | 12 - scripts/auth_init.ps1 | 6 +- scripts/auth_init.py | 10 +- scripts/auth_init.sh | 5 +- scripts/auth_update.ps1 | 5 +- scripts/auth_update.sh | 5 +- scripts/load_azd_env.ps1 | 8 - scripts/load_azd_env.sh | 10 - scripts/loadenv.ps1 | 3 - scripts/loadenv.sh | 5 - scripts/manageacl.ps1 | 13 -- scripts/manageacl.py | 25 +- scripts/manageacl.sh | 6 - scripts/prepdocs.ps1 | 81 +------ scripts/prepdocs.sh | 84 +------ scripts/roles.ps1 | 23 +- scripts/roles.sh | 15 +- 39 files changed, 273 insertions(+), 527 deletions(-) create mode 100644 app/backend/load_azd_env.py delete mode 100644 scripts/adlsgen2setup.ps1 delete mode 100755 scripts/adlsgen2setup.sh delete mode 100644 scripts/load_azd_env.ps1 delete mode 100755 scripts/load_azd_env.sh delete mode 100644 scripts/loadenv.ps1 delete mode 100755 scripts/loadenv.sh delete mode 100644 scripts/manageacl.ps1 delete mode 100755 scripts/manageacl.sh diff --git a/app/backend/app.py b/app/backend/app.py index 9a84b91b1b..9e4425b51a 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -395,7 +395,7 @@ async def setup_clients(): AZURE_SEARCH_INDEX = os.environ["AZURE_SEARCH_INDEX"] # Shared by all OpenAI deployments OPENAI_HOST = os.getenv("OPENAI_HOST", "azure") - OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"] + OPENAI_CHATGPT_MODEL = "phi3.5:latest" OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002") OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS", 1536)) # Used with Azure OpenAI deployments @@ -448,8 +448,16 @@ async def setup_clients(): # The managed identity is setup in the infra/ folder. azure_credential: Union[AzureDeveloperCliCredential, ManagedIdentityCredential] if RUNNING_ON_AZURE: - current_app.logger.info("Setting up Azure credential using ManagedIdentityCredential") - azure_credential = ManagedIdentityCredential() + if AZURE_CLIENT_ID := os.getenv("AZURE_CLIENT_ID"): + # ManagedIdentityCredential should use AZURE_CLIENT_ID if set in env, but its not working for some reason, + # so we explicitly pass it in as the client ID here. This is necessary for user-assigned managed identities. + current_app.logger.info( + "Setting up Azure credential using ManagedIdentityCredential with client_id %s", AZURE_CLIENT_ID + ) + azure_credential = ManagedIdentityCredential(client_id=AZURE_CLIENT_ID) + else: + current_app.logger.info("Setting up Azure credential using ManagedIdentityCredential") + azure_credential = ManagedIdentityCredential() elif AZURE_TENANT_ID: current_app.logger.info( "Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID @@ -705,9 +713,10 @@ def create_app(): # Log levels should be one of https://docs.python.org/3/library/logging.html#logging-levels # Set root level to WARNING to avoid seeing overly verbose logs from SDKS logging.basicConfig(level=logging.WARNING) - # Set the app logger level to INFO by default - default_level = "INFO" - app.logger.setLevel(os.getenv("APP_LOG_LEVEL", default_level)) + # Set our own logger levels to INFO by default + app_level = os.getenv("APP_LOG_LEVEL", "INFO") + app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level)) + logging.getLogger("ragapp").setLevel(app_level) if allowed_origin := os.getenv("ALLOWED_ORIGIN"): app.logger.info("ALLOWED_ORIGIN is set, enabling CORS for %s", allowed_origin) diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 95ca08f0f0..9d4e7bbd52 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -51,7 +51,7 @@ def __init__( self.content_field = content_field self.query_language = query_language self.query_speller = query_speller - self.chatgpt_token_limit = get_token_limit(chatgpt_model) + self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=True) @property def system_message_chat_conversation(self): diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py index d5b05a0fbe..24ac84d3cc 100644 --- a/app/backend/approaches/retrievethenread.py +++ b/app/backend/approaches/retrievethenread.py @@ -66,7 +66,7 @@ def __init__( self.content_field = content_field self.query_language = query_language self.query_speller = query_speller - self.chatgpt_token_limit = get_token_limit(chatgpt_model) + self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=True) async def run( self, @@ -121,6 +121,7 @@ async def run( few_shots=[{"role": "user", "content": self.question}, {"role": "assistant", "content": self.answer}], new_user_content=user_content, max_tokens=self.chatgpt_token_limit - response_token_limit, + fallback_to_default=True, ) chat_completion = await self.openai_client.chat.completions.create( diff --git a/app/backend/load_azd_env.py b/app/backend/load_azd_env.py new file mode 100644 index 0000000000..3d869b0708 --- /dev/null +++ b/app/backend/load_azd_env.py @@ -0,0 +1,23 @@ +import json +import logging +import subprocess + +from dotenv import load_dotenv + +logger = logging.getLogger("ragapp") + + +def load_azd_env(): + """Get path to current azd env file and load file using python-dotenv""" + result = subprocess.run("azd env list -o json", shell=True, capture_output=True, text=True) + if result.returncode != 0: + raise Exception("Error loading azd env") + env_json = json.loads(result.stdout) + env_file_path = None + for entry in env_json: + if entry["IsDefault"]: + env_file_path = entry["DotEnvPath"] + if not env_file_path: + raise Exception("No default azd env file found") + logger.info(f"Loading azd env from {env_file_path}") + load_dotenv(env_file_path, override=True) diff --git a/app/backend/main.py b/app/backend/main.py index 0a23b5abbf..0f2914a483 100644 --- a/app/backend/main.py +++ b/app/backend/main.py @@ -1,3 +1,12 @@ +import os + from app import create_app +from load_azd_env import load_azd_env + +# WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep +RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None + +if not RUNNING_ON_AZURE: + load_azd_env() app = create_app() diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index deea428139..ba911e5fbb 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -1,12 +1,14 @@ import argparse import asyncio import logging +import os from typing import Optional, Union from azure.core.credentials import AzureKeyCredential from azure.core.credentials_async import AsyncTokenCredential from azure.identity.aio import AzureDeveloperCliCredential, get_bearer_token_provider +from load_azd_env import load_azd_env from prepdocslib.blobmanager import BlobManager from prepdocslib.embeddings import ( AzureOpenAIEmbeddingService, @@ -31,7 +33,7 @@ from prepdocslib.textparser import TextParser from prepdocslib.textsplitter import SentenceTextSplitter, SimpleTextSplitter -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") def clean_key_if_exists(key: Union[str, None]) -> Union[str, None]: @@ -218,111 +220,19 @@ async def main(strategy: Strategy, setup_index: bool = True): if __name__ == "__main__": parser = argparse.ArgumentParser( description="Prepare documents by extracting content from PDFs, splitting content into sections, uploading to blob storage, and indexing in a search index.", - epilog="Example: prepdocs.py '.\\data\*' --storageaccount myaccount --container mycontainer --searchservice mysearch --index myindex -v", + epilog="Example: prepdocs.py '.\\data\*' -v", ) parser.add_argument("files", nargs="?", help="Files to be processed") - parser.add_argument( - "--datalakestorageaccount", required=False, help="Optional. Azure Data Lake Storage Gen2 Account name" - ) - parser.add_argument( - "--datalakefilesystem", - required=False, - default="gptkbcontainer", - help="Optional. Azure Data Lake Storage Gen2 filesystem name", - ) - parser.add_argument( - "--datalakepath", - required=False, - help="Optional. Azure Data Lake Storage Gen2 filesystem path containing files to index. If omitted, index the entire filesystem", - ) - parser.add_argument( - "--datalakekey", required=False, help="Optional. Use this key when authenticating to Azure Data Lake Gen2" - ) - parser.add_argument( - "--useacls", action="store_true", help="Store ACLs from Azure Data Lake Gen2 Filesystem in the search index" - ) + parser.add_argument( "--category", help="Value for the category field in the search index for all sections indexed in this run" ) parser.add_argument( "--skipblobs", action="store_true", help="Skip uploading individual pages to Azure Blob Storage" ) - parser.add_argument("--storageaccount", help="Azure Blob Storage account name") - parser.add_argument("--container", help="Azure Blob Storage container name") - parser.add_argument("--storageresourcegroup", help="Azure blob storage resource group") - parser.add_argument( - "--storagekey", - required=False, - help="Optional. Use this Azure Blob Storage account key instead of the current user identity to login (use az login to set current user for Azure)", - ) - parser.add_argument( - "--tenantid", required=False, help="Optional. Use this to define the Azure directory where to authenticate)" - ) - parser.add_argument( - "--subscriptionid", - required=False, - help="Optional. Use this to define managed identity connection string in integrated vectorization", - ) - parser.add_argument( - "--searchservice", - help="Name of the Azure AI Search service where content should be indexed (must exist already)", - ) - parser.add_argument( - "--searchserviceassignedid", - required=False, - help="Search service system assigned Identity (Managed identity) (used for integrated vectorization)", - ) - parser.add_argument( - "--index", - help="Name of the Azure AI Search index where content should be indexed (will be created if it doesn't exist)", - ) - parser.add_argument( - "--searchkey", - required=False, - help="Optional. Use this Azure AI Search account key instead of the current user identity to login (use az login to set current user for Azure)", - ) - parser.add_argument( - "--searchanalyzername", - required=False, - default="en.microsoft", - help="Optional. Name of the Azure AI Search analyzer to use for the content field in the index", - ) - parser.add_argument("--openaihost", help="Host of the API used to compute embeddings ('azure' or 'openai')") - parser.add_argument("--openaiservice", help="Name of the Azure OpenAI service used to compute embeddings") - parser.add_argument( - "--openaideployment", - help="Name of the Azure OpenAI model deployment for an embedding model ('text-embedding-ada-002' recommended)", - ) - parser.add_argument( - "--openaimodelname", help="Name of the Azure OpenAI embedding model ('text-embedding-ada-002' recommended)" - ) - parser.add_argument( - "--openaidimensions", - required=False, - default=1536, - type=int, - help="Dimensions for the embedding model (defaults to 1536 for 'text-embedding-ada-002')", - ) - parser.add_argument( - "--novectors", - action="store_true", - help="Don't compute embeddings for the sections (e.g. don't call the OpenAI embeddings API during indexing)", - ) parser.add_argument( "--disablebatchvectors", action="store_true", help="Don't compute embeddings in batch for the sections" ) - - parser.add_argument( - "--openaicustomurl", - required=False, - help="Optional. Use this custom OpenAI URL instead of the default OpenAI URL", - ) - parser.add_argument( - "--openaikey", - required=False, - help="Optional. Use this OpenAI account key instead of the current Azure user identity to login.", - ) - parser.add_argument("--openaiorg", required=False, help="This is required only when using non-Azure endpoints.") parser.add_argument( "--remove", action="store_true", @@ -333,42 +243,32 @@ async def main(strategy: Strategy, setup_index: bool = True): action="store_true", help="Remove all blobs from blob storage and documents from the search index", ) + + # Optional key specification: parser.add_argument( - "--localpdfparser", - action="store_true", - help="Use PyPdf local PDF parser (supports only digital PDFs) instead of Azure Document Intelligence service to extract text, tables and layout from the documents", - ) - parser.add_argument( - "--localhtmlparser", - action="store_true", - help="Use Beautiful soap local HTML parser instead of Azure Document Intelligence service to extract text, tables and layout from the documents", - ) - parser.add_argument( - "--documentintelligenceservice", + "--searchkey", required=False, - help="Optional. Name of the Azure Document Intelligence service which will be used to extract text, tables and layout from the documents (must exist already)", + help="Optional. Use this Azure AI Search account key instead of the current user identity to login (use az login to set current user for Azure)", ) parser.add_argument( - "--documentintelligencekey", + "--storagekey", required=False, - help="Optional. Use this Azure Document Intelligence account key instead of the current user identity to login (use az login to set current user for Azure)", + help="Optional. Use this Azure Blob Storage account key instead of the current user identity to login (use az login to set current user for Azure)", ) parser.add_argument( - "--searchimages", - action="store_true", - required=False, - help="Optional. Generate image embeddings to enable each page to be searched as an image", + "--datalakekey", required=False, help="Optional. Use this key when authenticating to Azure Data Lake Gen2" ) parser.add_argument( - "--visionendpoint", + "--documentintelligencekey", required=False, - help="Optional, required if --searchimages is specified. Endpoint of Azure AI Vision service to use when embedding images.", + help="Optional. Use this Azure Document Intelligence account key instead of the current user identity to login (use az login to set current user for Azure)", ) parser.add_argument( - "--useintvectorization", + "--searchserviceassignedid", required=False, - help="Required if --useintvectorization is specified. Enable Integrated vectorizer indexer support which is in preview)", + help="Search service system assigned Identity (Managed identity) (used for integrated vectorization)", ) + parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") args = parser.parse_args() @@ -378,12 +278,17 @@ async def main(strategy: Strategy, setup_index: bool = True): # to avoid seeing the noisy INFO level logs from the Azure SDKs logger.setLevel(logging.INFO) - use_int_vectorization = args.useintvectorization and args.useintvectorization.lower() == "true" + load_azd_env() + + use_int_vectorization = os.getenv("USE_FEATURE_INT_VECTORIZATION", "").lower() == "true" + use_gptvision = os.getenv("USE_GPT4V", "").lower() == "true" + use_acls = os.getenv("AZURE_ADLS_GEN2_STORAGE_ACCOUNT") is not None + dont_use_vectors = os.getenv("USE_VECTORS", "").lower() == "false" - # Use the current user identity to connect to Azure services unless a key is explicitly set for any of them - if args.tenantid: - logger.info("Connecting to Azure services using the azd credential for tenant %s", args.tenantid) - azd_credential = AzureDeveloperCliCredential(tenant_id=args.tenantid, process_timeout=60) + # Use the current user identity to connect to Azure services. See infra/main.bicep for role assignments. + if tenant_id := os.getenv("AZURE_TENANT_ID"): + logger.info("Connecting to Azure services using the azd credential for tenant %s", tenant_id) + azd_credential = AzureDeveloperCliCredential(tenant_id=tenant_id, process_timeout=60) else: logger.info("Connecting to Azure services using the azd credential for home tenant") azd_credential = AzureDeveloperCliCredential(process_timeout=60) @@ -400,40 +305,48 @@ async def main(strategy: Strategy, setup_index: bool = True): search_info = loop.run_until_complete( setup_search_info( - search_service=args.searchservice, - index_name=args.index, + search_service=os.environ["AZURE_SEARCH_SERVICE"], + index_name=os.environ["AZURE_SEARCH_INDEX"], azure_credential=azd_credential, search_key=clean_key_if_exists(args.searchkey), ) ) blob_manager = setup_blob_manager( azure_credential=azd_credential, - storage_account=args.storageaccount, - storage_container=args.container, - storage_resource_group=args.storageresourcegroup, - subscription_id=args.subscriptionid, - search_images=args.searchimages, + storage_account=os.getenv("AZURE_STORAGE_ACCOUNT"), + storage_container=os.getenv("AZURE_STORAGE_CONTAINER"), + storage_resource_group=os.getenv("AZURE_STORAGE_RESOURCE_GROUP"), + subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"), + search_images=use_gptvision, storage_key=clean_key_if_exists(args.storagekey), ) list_file_strategy = setup_list_file_strategy( azure_credential=azd_credential, local_files=args.files, - datalake_storage_account=args.datalakestorageaccount, - datalake_filesystem=args.datalakefilesystem, - datalake_path=args.datalakepath, + datalake_storage_account=os.getenv("AZURE_ADLS_GEN2_STORAGE_ACCOUNT"), + datalake_filesystem=os.getenv("AZURE_ADLS_GEN2_FILESYSTEM"), + datalake_path=os.getenv("AZURE_ADLS_GEN2_FILESYSTEM_PATH"), datalake_key=clean_key_if_exists(args.datalakekey), ) + + openai_host = os.environ["OPENAI_HOST"] + openai_key = None + if os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE"): + openai_key = os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE") + elif not openai_host.startswith("azure") and os.getenv("OPENAI_API_KEY"): + openai_key = os.getenv("OPENAI_API_KEY") + openai_embeddings_service = setup_embeddings_service( azure_credential=azd_credential, - openai_host=args.openaihost, - openai_model_name=args.openaimodelname, - openai_service=args.openaiservice, - openai_custom_url=args.openaicustomurl, - openai_deployment=args.openaideployment, - openai_dimensions=args.openaidimensions, - openai_key=clean_key_if_exists(args.openaikey), - openai_org=args.openaiorg, - disable_vectors=args.novectors, + openai_host=openai_host, + openai_model_name=os.getenv("AZURE_OPENAI_EMB_MODEL_NAME"), + openai_service=os.getenv("AZURE_OPENAI_SERVICE"), + openai_custom_url=os.getenv("AZURE_OPENAI_CUSTOM_URL"), + openai_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"), + openai_dimensions=os.getenv("AZURE_OPENAI_EMB_DIMENSIONS"), + openai_key=clean_key_if_exists(openai_key), + openai_org=os.getenv("OPENAI_ORGANIZATION"), + disable_vectors=dont_use_vectors, disable_batch_vectors=args.disablebatchvectors, ) @@ -445,23 +358,25 @@ async def main(strategy: Strategy, setup_index: bool = True): blob_manager=blob_manager, document_action=document_action, embeddings=openai_embeddings_service, - subscription_id=args.subscriptionid, + subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"), search_service_user_assigned_id=args.searchserviceassignedid, - search_analyzer_name=args.searchanalyzername, - use_acls=args.useacls, + search_analyzer_name=os.getenv("AZURE_SEARCH_ANALYZER_NAME"), + use_acls=use_acls, category=args.category, ) else: file_processors = setup_file_processors( azure_credential=azd_credential, - document_intelligence_service=args.documentintelligenceservice, + document_intelligence_service=os.getenv("AZURE_DOCUMENTINTELLIGENCE_SERVICE"), document_intelligence_key=clean_key_if_exists(args.documentintelligencekey), - local_pdf_parser=args.localpdfparser, - local_html_parser=args.localhtmlparser, - search_images=args.searchimages, + local_pdf_parser=os.getenv("USE_LOCAL_PDF_PARSER") == "true", + local_html_parser=os.getenv("USE_LOCAL_HTML_PARSER") == "true", + search_images=use_gptvision, ) image_embeddings_service = setup_image_embeddings_service( - azure_credential=azd_credential, vision_endpoint=args.visionendpoint, search_images=args.searchimages + azure_credential=azd_credential, + vision_endpoint=os.getenv("AZURE_VISION_ENDPOINT"), + search_images=use_gptvision, ) ingestion_strategy = FileStrategy( @@ -472,8 +387,8 @@ async def main(strategy: Strategy, setup_index: bool = True): document_action=document_action, embeddings=openai_embeddings_service, image_embeddings=image_embeddings_service, - search_analyzer_name=args.searchanalyzername, - use_acls=args.useacls, + search_analyzer_name=os.getenv("AZURE_SEARCH_ANALYZER_NAME"), + use_acls=use_acls, category=args.category, ) diff --git a/app/backend/prepdocslib/blobmanager.py b/app/backend/prepdocslib/blobmanager.py index b9ada05f10..5b24a0281d 100644 --- a/app/backend/prepdocslib/blobmanager.py +++ b/app/backend/prepdocslib/blobmanager.py @@ -18,7 +18,7 @@ from .listfilestrategy import File -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class BlobManager: diff --git a/app/backend/prepdocslib/embeddings.py b/app/backend/prepdocslib/embeddings.py index b9c5a9219f..148e2d2026 100644 --- a/app/backend/prepdocslib/embeddings.py +++ b/app/backend/prepdocslib/embeddings.py @@ -17,7 +17,7 @@ ) from typing_extensions import TypedDict -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class EmbeddingBatch: diff --git a/app/backend/prepdocslib/filestrategy.py b/app/backend/prepdocslib/filestrategy.py index e8cab16983..8ae196e2e8 100644 --- a/app/backend/prepdocslib/filestrategy.py +++ b/app/backend/prepdocslib/filestrategy.py @@ -8,7 +8,7 @@ from .searchmanager import SearchManager, Section from .strategy import DocumentAction, SearchInfo, Strategy -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") async def parse_file( diff --git a/app/backend/prepdocslib/htmlparser.py b/app/backend/prepdocslib/htmlparser.py index 0acf88b050..627813d987 100644 --- a/app/backend/prepdocslib/htmlparser.py +++ b/app/backend/prepdocslib/htmlparser.py @@ -7,7 +7,7 @@ from .page import Page from .parser import Parser -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") def cleanup_data(data: str) -> str: diff --git a/app/backend/prepdocslib/integratedvectorizerstrategy.py b/app/backend/prepdocslib/integratedvectorizerstrategy.py index 0c475b9f52..7bf51e19f1 100644 --- a/app/backend/prepdocslib/integratedvectorizerstrategy.py +++ b/app/backend/prepdocslib/integratedvectorizerstrategy.py @@ -28,7 +28,7 @@ from .searchmanager import SearchManager from .strategy import DocumentAction, SearchInfo, Strategy -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class IntegratedVectorizerStrategy(Strategy): diff --git a/app/backend/prepdocslib/listfilestrategy.py b/app/backend/prepdocslib/listfilestrategy.py index bd6a48d651..bdc53dc67f 100644 --- a/app/backend/prepdocslib/listfilestrategy.py +++ b/app/backend/prepdocslib/listfilestrategy.py @@ -13,7 +13,7 @@ DataLakeServiceClient, ) -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class File: diff --git a/app/backend/prepdocslib/pdfparser.py b/app/backend/prepdocslib/pdfparser.py index 33335aadd6..f20dd61c2c 100644 --- a/app/backend/prepdocslib/pdfparser.py +++ b/app/backend/prepdocslib/pdfparser.py @@ -11,7 +11,7 @@ from .page import Page from .parser import Parser -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class LocalPdfParser(Parser): diff --git a/app/backend/prepdocslib/searchmanager.py b/app/backend/prepdocslib/searchmanager.py index 496e5ca30a..37c03b265e 100644 --- a/app/backend/prepdocslib/searchmanager.py +++ b/app/backend/prepdocslib/searchmanager.py @@ -26,7 +26,7 @@ from .strategy import SearchInfo from .textsplitter import SplitPage -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class Section: diff --git a/app/backend/prepdocslib/textsplitter.py b/app/backend/prepdocslib/textsplitter.py index 5d899e691e..21c1f9fcbf 100644 --- a/app/backend/prepdocslib/textsplitter.py +++ b/app/backend/prepdocslib/textsplitter.py @@ -6,7 +6,7 @@ from .page import Page, SplitPage -logger = logging.getLogger("ingester") +logger = logging.getLogger("ragapp") class TextSplitter(ABC): diff --git a/app/start.sh b/app/start.sh index ec7d64067a..d703654e69 100755 --- a/app/start.sh +++ b/app/start.sh @@ -1,21 +1,5 @@ #!/bin/sh -echo "" -echo "Loading azd .env file from current environment" -echo "" - -while IFS='=' read -r key value; do - value=$(echo "$value" | sed 's/^"//' | sed 's/"$//') - export "$key=$value" -done < +python ./scripts/manageacl.py -v --acl-action update_storage_urls --url ``` Going forward, all uploaded documents will have their `storageUrl` set in the search index. diff --git a/docs/login_and_acl.md b/docs/login_and_acl.md index 0ec1b549bc..89cf4c0204 100644 --- a/docs/login_and_acl.md +++ b/docs/login_and_acl.md @@ -156,7 +156,7 @@ print(token.token) - If any Entra apps need to be recreated, you can avoid redeploying the app by [changing the app settings in the portal](https://learn.microsoft.com/azure/app-service/configure-common?tabs=portal#configure-app-settings). Any of the [required environment variables](#environment-variables-reference) can be changed. Once the environment variables have been changed, restart the web app. - It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](../app/backend/core/authentication.py) - It's possible that your tenant admin has placed a restriction on consent to apps with [unverified publishers](https://learn.microsoft.com/entra/identity-platform/publisher-verification-overview). In this case, only admins may consent to the client and server apps, and normal user accounts are unable to use the login system until the admin consents on behalf of the entire organization. -- It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. +- It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. See this guide for [granting consent to an app](https://learn.microsoft.com/entra/identity/enterprise-apps/grant-admin-consent?pivots=portal). ## Adding data with document level access control @@ -215,11 +215,17 @@ The script supports the following commands. Note that the syntax is the same reg ### Azure Data Lake Storage Gen2 Setup -[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.ps1](../scripts/adlsgen2setup.ps1) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. +[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.py](../scripts/adlsgen2setup.py) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. In order to use this script, an existing Data Lake Storage Gen2 storage account is required. Run `azd env set AZURE_ADLS_GEN2_STORAGE_ACCOUNT ` prior to running the script. -To run the script, run the following command: `/scripts/adlsgen2setup.ps1`. The script performs the following steps: +Then run the script inside your Python environment: + +```shell +python /scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sampleacls.json' -v +``` + +The script performs the following steps: - Creates example [groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](../scripts/sampleacls.json) file. - Creates a filesystem / container `gptkbcontainer` in the storage account. diff --git a/samples/document-security/README.md b/samples/document-security/README.md index c4c4a62b70..a98959df0c 100644 --- a/samples/document-security/README.md +++ b/samples/document-security/README.md @@ -45,32 +45,33 @@ This guide demonstrates how to add an optional login and document level access c **IMPORTANT:** In order to add optional login and document level access control, you'll need the following in addition to the normal sample requirements -- **Azure account permissions**: Your Azure account must have [permission to manage applications in Entra ID](https://learn.microsoft.com/azure/active-directory/roles/permissions-reference#cloud-application-administrator). +- **Azure account permissions**: Your Azure account must have [permission to manage applications in Microsoft Entra](https://learn.microsoft.com/entra/identity/role-based-access-control/permissions-reference#cloud-application-administrator). -## Setting up Entra ID Apps +## Setting up Microsoft Entra applications -Two Entra ID apps must be registered in order to make the optional login and document level access control system work correctly. One app is for the client UI. The client UI is implemented as a [single page application](https://learn.microsoft.com/azure/active-directory/develop/scenario-spa-app-registration). The other app is for the API server. The API server uses a [confidential client](https://learn.microsoft.com/azure/active-directory/develop/msal-client-applications) to call the [Microsoft Graph API](https://learn.microsoft.com/graph/use-the-api). +Two Microsoft Entra applications must be registered in order to make the optional login and document level access control system work correctly. One app is for the client UI. The client UI is implemented as a [single page application](https://learn.microsoft.com/entra/identity-platform/scenario-spa-app-registration). The other app is for the API server. The API server uses a [confidential client](https://learn.microsoft.com/entra/identity-platform/msal-client-applications) to call the [Microsoft Graph API](https://learn.microsoft.com/graph/use-the-api). ### Automatic Setup The easiest way to setup the two apps is to use the `azd` CLI. We've written scripts that will automatically create the two apps and configure them for use with the sample. To trigger the automatic setup, run the following commands: -1. Run `azd env set AZURE_USE_AUTHENTICATION true` to enable the login UI and App Service authentication. +1. Run `azd env set AZURE_USE_AUTHENTICATION true` to enable the login UI and use App Service authentication by default. 1. Ensure access control is enabled on your search index. If your index doesn't exist yet, run prepdocs with `AZURE_USE_AUTHENTICATION` set to `true`. If your index already exists, run `pwsh ./scripts/manageacl.ps1 --acl-action enable_acls`. 1. (Optional) To require access control when using the app, run `azd env set AZURE_ENFORCE_ACCESS_CONTROL true`. Authentication is always required to search on documents with access control assigned, regardless of if unauthenticated access is enabled or not. 1. (Optional) To allow authenticated users to search on documents that have no access controls assigned, even when access control is required, run `azd env set AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS true`. 1. (Optional) To allow unauthenticated users to use the app, even when access control is enforced, run `azd env set AZURE_ENABLE_UNAUTHENTICATED_ACCESS true`. `AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS` should also be set to true if you want unauthenticated users to be able to search on documents with no access control. 1. Run `azd env set AZURE_AUTH_TENANT_ID ` to set the tenant ID associated with authentication. +1. If your auth tenant ID is different from your currently logged in tenant ID, run `azd auth login --tenant-id ` to login to the authentication tenant simultaneously. 1. Run `azd up` to deploy the app. ### Manual Setup The following instructions explain how to setup the two apps using the Azure Portal. -#### Setting up the Server App +#### Server App - Sign in to the [Azure portal](https://portal.azure.com/). -- Select the Entra ID Service. +- Select the Microsoft Entra ID service. - In the left hand menu, select **Application Registrations**. - Select **New Registration**. - In the **Name** section, enter a meaningful application name. This name will be displayed to users of the app, for example `Azure Search OpenAI Chat API`. @@ -78,7 +79,9 @@ The following instructions explain how to setup the two apps using the Azure Por - Select **Register** to create the application - In the app's registration screen, find the **Application (client) ID**. - Run the following `azd` command to save this ID: `azd env set AZURE_SERVER_APP_ID `. -- Entra ID supports three types of credentials to authenticate an app using the [client credentials](https://learn.microsoft.com/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow): passwords (app secrets), certificates, and federated identity credentials. For a higher level of security, either [certificates](https://learn.microsoft.com/azure/active-directory/develop/howto-create-self-signed-certificate) or federated identity credentials are recommended. This sample currently uses an app secret for ease of provisioning. + +- Microsoft Entra supports three types of credentials to authenticate an app using the [client credentials](https://learn.microsoft.com/entra/identity-platform/v2-oauth2-client-creds-grant-flow): passwords (app secrets), certificates, and federated identity credentials. For a higher level of security, either [certificates](https://learn.microsoft.com/entra/identity-platform/howto-create-self-signed-certificate) or federated identity credentials are recommended. This sample currently uses an app secret for ease of provisioning. + - Select **Certificates & secrets** in the left hand menu. - In the **Client secrets** section, select **New client secret**. - Type a description, for example `Azure Search OpenAI Chat Key`. @@ -90,7 +93,7 @@ The following instructions explain how to setup the two apps using the Azure Por - Select **Delegated permissions**. - Search for and and select `User.Read`. - Select **Add permissions**. -- Select **Expose an API** in the left hand menu. The server app works by using the [On Behalf Of Flow](https://learn.microsoft.com/azure/active-directory/develop/v2-oauth2-on-behalf-of-flow#protocol-diagram), which requires the server app to expose at least 1 API. +- Select **Expose an API** in the left hand menu. The server app works by using the [On Behalf Of Flow](https://learn.microsoft.com/entra/identity-platform/v2-oauth2-on-behalf-of-flow#protocol-diagram), which requires the server app to expose at least 1 API. - The application must define a URI to expose APIs. Select **Add** next to **Application ID URI**. - By default, the Application ID URI is set to `api://`. Accept the default by selecting **Save**. - Under **Scopes defined by this API**, select **Add a scope**. @@ -103,16 +106,16 @@ The following instructions explain how to setup the two apps using the Azure Por - For **User consent description**, type **Allow the app to access Azure Search OpenAI Chat API on your behalf**. - Leave **State** set to **Enabled**. - Select **Add scope** at the bottom to save the scope. -- (Optional) Enable group claims. Include which Entra ID groups the user is part of as part of the login in the [optional claims](https://learn.microsoft.com/azure/active-directory/develop/optional-claims). The groups are used for [optional security filtering](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) in the search results. +- (Optional) Enable group claims. Include which Microsoft Entra groups the user is part of as part of the login in the [optional claims](https://learn.microsoft.com/entra/identity-platform/optional-claims). The groups are used for [optional security filtering](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) in the search results. - In the left hand menu, select **Token configuration** - Under **Optional claims**, select **Add groups claim** - - Select which [group types](https://learn.microsoft.com/azure/active-directory/hybrid/connect/how-to-connect-fed-group-claims) to include in the claim. Note that a [overage claim](https://learn.microsoft.com/azure/active-directory/develop/access-token-claims-reference#groups-overage-claim) will be emitted if the user is part of too many groups. In this case, the API server will use the [Microsoft Graph](https://learn.microsoft.com/graph/api/user-list-memberof?view=graph-rest-*0&tabs=http) to list the groups the user is part of instead of relying on the groups in the claim. + - Select which [group types](https://learn.microsoft.com/entra/identity/hybrid/connect/how-to-connect-fed-group-claims) to include in the claim. Note that a [overage claim](https://learn.microsoft.com/entra/identity-platform/access-token-claims-reference#groups-overage-claim) will be emitted if the user is part of too many groups. In this case, the API server will use the [Microsoft Graph](https://learn.microsoft.com/graph/api/user-list-memberof?view=graph-rest-*0&tabs=http) to list the groups the user is part of instead of relying on the groups in the claim. - Select **Add** to save your changes #### Client App - Sign in to the [Azure portal](https://portal.azure.com/). -- Select the Entra ID Service. +- Select the Microsoft Entra ID service. - In the left hand menu, select **Application Registrations**. - Select **New Registration**. - In the **Name** section, enter a meaningful application name. This name will be displayed to users of the app, for example `Azure Search OpenAI Chat Web App`. @@ -127,7 +130,7 @@ The following instructions explain how to setup the two apps using the Azure Por - In the left hand menu, select **Authentication**. - Under **Implicit grant and hybrid flows**, select **ID Tokens (used for implicit and hybrid flows)** - Select **Save** -- In the left hand menu, select **API permissions**. You will add permission to access the **access_as_user** API on the server app. This permission is required for the [On Behalf Of Flow](https://learn.microsoft.com/azure/active-directory/develop/v2-oauth2-on-behalf-of-flow#protocol-diagram) to work. +- In the left hand menu, select **API permissions**. You will add permission to access the **access_as_user** API on the server app. This permission is required for the [On Behalf Of Flow](https://learn.microsoft.com/entra/identity-platform/v2-oauth2-on-behalf-of-flow#protocol-diagram) to work. - Select **Add a permission**, and then **My APIs**. - In the list of applications, select your server application **Azure Search OpenAI Chat API** - Ensure **Delegated permissions** is selected. @@ -136,7 +139,7 @@ The following instructions explain how to setup the two apps using the Azure Por #### Configure Server App Known Client Applications -Consent from the user must be obtained for use of the client and server app. The client app can prompt the user for consent through a dialog when they log in. The server app has no ability to show a dialog for consent. Client apps can be [added to the list of known clients](https://learn.microsoft.com/azure/active-directory/develop/v2-oauth2-on-behalf-of-flow#gaining-consent-for-the-middle-tier-application) to access the server app, so a consent dialog is shown for the server app. +Consent from the user must be obtained for use of the client and server app. The client app can prompt the user for consent through a dialog when they log in. The server app has no ability to show a dialog for consent. Client apps can be [added to the list of known clients](https://learn.microsoft.com/entra/identity-platform/v2-oauth2-on-behalf-of-flow#gaining-consent-for-the-middle-tier-application) to access the server app, so a consent dialog is shown for the server app. - Navigate to the server app registration - In the left hand menu, select **Manifest** @@ -145,18 +148,34 @@ Consent from the user must be obtained for use of the client and server app. The #### Testing -If you are running setup for the first time, ensure you have run `azd env set AZURE_ADLS_GEN2_STORAGE_ACCOUNT ` before running `azd up`. If you do not set this environment variable, your index will not be initialized with access control support when `prepdocs` is run for the first time. To manually enable access control in your index, use the [manual setup script](#using-the-add-documents-api). +If you are running setup for the first time, ensure you have run `azd env set AZURE_ADLS_GEN2_STORAGE_ACCOUNT ` before running `azd up`. If you do not set this environment variable, your index will not be initialized with access control support when `prepdocs` is run for the first time. To manually enable access control in your index, use the [manual setup script](#azure-data-lake-storage-gen2-setup). -Ensure you run `azd env set AZURE_USE_AUTHENTICATION` to enable the login UI once you have setup the two Entra ID apps before you deploy or run the application. The login UI will not appear unless all [required environment variables](#environment-variables-reference) have been setup. +Ensure you run `azd env set AZURE_USE_AUTHENTICATION` to enable the login UI once you have setup the two Microsoft Entra apps before you deploy or run the application. The login UI will not appear unless all [required environment variables](#environment-variables-reference) have been setup. In both the chat and ask a question modes, under **Developer settings** optional **Use oid security filter** and **Use groups security filter** checkboxes will appear. The oid (User ID) filter maps to the `oids` field in the search index and the groups (Group ID) filter maps to the `groups` field in the search index. If `AZURE_ENFORCE_ACCESS_CONTROL` has been set, then both the **Use oid security filter** and **Use groups security filter** options are always enabled and cannot be disabled. -### Troubleshooting Entra ID Setup +#### Programmatic Access with Authentication + +If you want to use the chat endpoint without the UI and still use authentication, you must disable [App Service built-in authentication](https://learn.microsoft.com/azure/app-service/overview-authentication-authorization) and use only the app's MSAL-based authentication flow. Ensure the `AZURE_DISABLE_APP_SERVICES_AUTHENTICATION` environment variable is set before deploying. + +Get an access token that can be used for calling the chat API using the following code: + +```python +from azure.identity import DefaultAzureCredential +import os -- If any Entra ID apps need to be recreated, you can avoid redeploying the app by [changing the app settings in the portal](https://learn.microsoft.com/azure/app-service/configure-common?tabs=portal#configure-app-settings). Any of the [required environment variables](#environment-variables-reference) can be changed. Once the environment variables have been changed, restart the web app. -- It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](../../app/backend/core/authentication.py) -- It's possible that your tenant admin has placed a restriction on consent to apps with [unverified publishers](https://learn.microsoft.com/azure/active-directory/develop/publisher-verification-overview). In this case, only admins may consent to the client and server apps, and normal user accounts are unable to use the login system until the admin consents on behalf of the entire organization. -- It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/azure/active-directory/manage-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. +token = DefaultAzureCredential().get_token(f"api://{os.environ['AZURE_SERVER_APP_ID']}/access_as_user", tenant_id=os.getenv('AZURE_AUTH_TENANT_ID', os.getenv('AZURE_TENANT_ID'))) + +print(token.token) +``` + +### Troubleshooting + +- If your primary tenant restricts the ability to create Entra applications, you'll need to use a separate tenant to create the Entra applications. You can create a new tenant by following [these instructions](https://learn.microsoft.com/entra/identity-platform/quickstart-create-new-tenant). Then run `azd env set AZURE_AUTH_TENANT_ID ` before running `azd up`. +- If any Entra apps need to be recreated, you can avoid redeploying the app by [changing the app settings in the portal](https://learn.microsoft.com/azure/app-service/configure-common?tabs=portal#configure-app-settings). Any of the [required environment variables](#environment-variables-reference) can be changed. Once the environment variables have been changed, restart the web app. +- It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](../app/backend/core/authentication.py) +- It's possible that your tenant admin has placed a restriction on consent to apps with [unverified publishers](https://learn.microsoft.com/entra/identity-platform/publisher-verification-overview). In this case, only admins may consent to the client and server apps, and normal user accounts are unable to use the login system until the admin consents on behalf of the entire organization. +- It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. ## Adding data with document level access control @@ -167,11 +186,11 @@ The sample supports 2 main strategies for adding data with document level access ### Using the Add Documents API -Manually enable document level access control on a search index and manually set access control values using the [manageacl.ps1](../../scripts/manageacl.ps1) script. +Manually enable document level access control on a search index and manually set access control values using the [manageacl.ps1](../scripts/manageacl.ps1) script. Run `azd up` or use `azd env set` to manually set `AZURE_SEARCH_SERVICE` and `AZURE_SEARCH_INDEX` environment variables prior to running the script. -The script supports the following commands. Note that the syntax is the same regardless of whether [manageacl.ps1](../../scripts/manageacl.ps1) or [manageacl.sh](../../scripts/manageacl.sh) is used. All commands support `-v` for verbose logging. +The script supports the following commands. Note that the syntax is the same regardless of whether [manageacl.ps1](../scripts/manageacl.ps1) or [manageacl.sh](../scripts/manageacl.sh) is used. All commands support `-v` for verbose logging. - `./scripts/manageacl.ps1 --acl-action enable_acls`: Creates the required `oids` (User ID) and `groups` (Group IDs) [security filter](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) fields for document level access control on your index, as well as the `storageUrl` field for storing the Blob storage URL. Does nothing if these fields already exist. @@ -215,25 +234,31 @@ The script supports the following commands. Note that the syntax is the same reg ### Azure Data Lake Storage Gen2 Setup -[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.ps1](../../scripts/adlsgen2setup.ps1) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. +[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.py](../scripts/adlsgen2setup.py) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. In order to use this script, an existing Data Lake Storage Gen2 storage account is required. Run `azd env set AZURE_ADLS_GEN2_STORAGE_ACCOUNT ` prior to running the script. -To run the script, run the following command: `/scripts/adlsgen2setup.ps1`. The script performs the following steps: +Then run the script inside your Python environment: + +```shell +python /scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sampleacls.json' -v +``` + +The script performs the following steps: -- Creates example [groups](https://learn.microsoft.com/azure/active-directory/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](../../scripts/sampleacls.json) file. +- Creates example [groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](../scripts/sampleacls.json) file. - Creates a filesystem / container `gptkbcontainer` in the storage account. -- Creates the directories listed in the [sampleacls.json](../../scripts/sampleacls.json) file. -- Uploads the sample PDFs referenced in the [sampleacls.json](../../scripts/sampleacls.json) file into the appropriate directories. -- [Recursively sets Access Control Lists (ACLs)](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-acl-cli) using the information from the [sampleacls.json](../../scripts/sampleacls.json) file. +- Creates the directories listed in the [sampleacls.json](../scripts/sampleacls.json) file. +- Uploads the sample PDFs referenced in the [sampleacls.json](../scripts/sampleacls.json) file into the appropriate directories. +- [Recursively sets Access Control Lists (ACLs)](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-acl-cli) using the information from the [sampleacls.json](../scripts/sampleacls.json) file. -In order to use the sample access control, you need to join these groups in your Entra ID tenant. +In order to use the sample access control, you need to join these groups in your Microsoft Entra tenant. -Note that this optional script may not work in Codespaces if your administrator has applied a [Conditional Access policy](https://learn.microsoft.com/azure/active-directory/conditional-access/overview) to your tenant. +Note that this optional script may not work in Codespaces if your administrator has applied a [Conditional Access policy](https://learn.microsoft.com/entra/identity/conditional-access/overview) to your tenant. #### Azure Data Lake Storage Gen2 Prep Docs -Once a Data Lake Storage Gen2 storage account has been setup with sample data and access control lists, [prepdocs.py](../../app/backend/prepdocs.py) can be used to automatically process PDFs in the storage account and store them with their [access control lists in the search index](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). +Once a Data Lake Storage Gen2 storage account has been setup with sample data and access control lists, [prepdocs.py](../app/backend/prepdocs.py) can be used to automatically process PDFs in the storage account and store them with their [access control lists in the search index](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). To run this script with a Data Lake Storage Gen2 account, first set the following environment variables: @@ -243,18 +268,19 @@ To run this script with a Data Lake Storage Gen2 account, first set the followin Once the environment variables are set, run the script using the following command: `/scripts/prepdocs.ps1` or `/scripts/prepdocs.sh`. -## Environment Variables Reference +## Environment variables reference The following environment variables are used to setup the optional login and document level access control: -- `AZURE_USE_AUTHENTICATION`: Enables Entra ID based optional login and document level access control. Set to true before running `azd up`. +- `AZURE_USE_AUTHENTICATION`: Enables Entra ID login and document level access control. Set to true before running `azd up`. - `AZURE_ENFORCE_ACCESS_CONTROL`: Enforces Entra ID based login and document level access control on documents with access control assigned. Set to true before running `azd up`. If `AZURE_ENFORCE_ACCESS_CONTROL` is enabled and `AZURE_ENABLE_UNAUTHENTICATED_ACCESS` is not enabled, then authentication is required to use the app. - `AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS`: Allows users to search on documents that have no access controls assigned - `AZURE_ENABLE_UNAUTHENTICATED_ACCESS`: Allows unauthenticated users to access the chat app, even when `AZURE_ENFORCE_ACCESS_CONTROL` is enabled. `AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS` should be set to true to allow unauthenticated users to search on documents that have no access control assigned. Unauthenticated users cannot search on documents with access control assigned. -- `AZURE_SERVER_APP_ID`: (Required) Application ID of the Entra ID app for the API server. -- `AZURE_SERVER_APP_SECRET`: [Client secret](https://learn.microsoft.com/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow) used by the API server to authenticate using the Entra ID API server app. -- `AZURE_CLIENT_APP_ID`: Application ID of the Entra ID app for the client UI. -- `AZURE_AUTH_TENANT_ID`: [Tenant ID](https://learn.microsoft.com/azure/active-directory/fundamentals/how-to-find-tenant) associated with the Entra ID used for login and document level access control. Defaults to `AZURE_TENANT_ID` if not defined. +- `AZURE_DISABLE_APP_SERVICES_AUTHENTICATION`: Disables [use of built-in authentication for App Services](https://learn.microsoft.com/azure/app-service/overview-authentication-authorization). An authentication flow based on the MSAL SDKs is used instead. Useful when you want to provide programmatic access to the chat endpoints with authentication. +- `AZURE_SERVER_APP_ID`: (Required) Application ID of the Microsoft Entra app for the API server. +- `AZURE_SERVER_APP_SECRET`: [Client secret](https://learn.microsoft.com/entra/identity-platform/v2-oauth2-client-creds-grant-flow) used by the API server to authenticate using the Microsoft Entra server app. +- `AZURE_CLIENT_APP_ID`: Application ID of the Microsoft Entra app for the client UI. +- `AZURE_AUTH_TENANT_ID`: [Tenant ID](https://learn.microsoft.com/entra/fundamentals/how-to-find-tenant) associated with the Microsoft Entra tenant used for login and document level access control. Defaults to `AZURE_TENANT_ID` if not defined. - `AZURE_ADLS_GEN2_STORAGE_ACCOUNT`: (Optional) Name of existing [Data Lake Storage Gen2 storage account](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) for storing sample data with [access control lists](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). Only used with the optional Data Lake Storage Gen2 [setup](#azure-data-lake-storage-gen2-setup) and [prep docs](#azure-data-lake-storage-gen2-prep-docs) scripts. - `AZURE_ADLS_GEN2_FILESYSTEM`: (Optional) Name of existing [Data Lake Storage Gen2 filesystem](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) for storing sample data with [access control lists](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). Only used with the optional Data Lake Storage Gen2 [setup](#azure-data-lake-storage-gen2-setup) and [prep docs](#azure-data-lake-storage-gen2-prep-docs) scripts. - `AZURE_ADLS_GEN2_FILESYSTEM_PATH`: (Optional) Name of existing path in a [Data Lake Storage Gen2 filesystem](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) for storing sample data with [access control lists](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). Only used with the optional Data Lake Storage Gen2 [prep docs](#azure-data-lake-storage-gen2-prep-docs) script. diff --git a/scripts/adlsgen2setup.ps1 b/scripts/adlsgen2setup.ps1 deleted file mode 100644 index e6b80c0d46..0000000000 --- a/scripts/adlsgen2setup.ps1 +++ /dev/null @@ -1,19 +0,0 @@ -## Set the preference to stop on the first error -$ErrorActionPreference = "Stop" - -& $PSScriptRoot\loadenv.ps1 - -$venvPythonPath = "./.venv/scripts/python.exe" -if (Test-Path -Path "/usr") { - # fallback to Linux venv path - $venvPythonPath = "./.venv/bin/python" -} - -if ([string]::IsNullOrEmpty($env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT)) { - Write-Error "AZURE_ADLS_GEN2_STORAGE_ACCOUNT must be set in order to continue" - exit 1 -} - -Write-Host 'Running "adlsgen2setup.py"' -$cwd = (Get-Location) -Start-Process -FilePath $venvPythonPath -ArgumentList "./scripts/adlsgen2setup.py `"$cwd/data`" --data-access-control ./scripts/sampleacls.json --storage-account $env:AZURE_ADLS_GEN2_STORAGE_ACCOUNT -v" -Wait -NoNewWindow diff --git a/scripts/adlsgen2setup.py b/scripts/adlsgen2setup.py index 02cf860a85..fdff6963fc 100644 --- a/scripts/adlsgen2setup.py +++ b/scripts/adlsgen2setup.py @@ -13,6 +13,10 @@ DataLakeServiceClient, ) +from load_azd_env import load_azd_env + +logger = logging.getLogger("ragapp") + class AdlsGen2Setup: """ @@ -54,18 +58,18 @@ def __init__( async def run(self): async with self.create_service_client() as service_client: - logging.info(f"Ensuring {self.filesystem_name} exists...") + logger.info(f"Ensuring {self.filesystem_name} exists...") async with service_client.get_file_system_client(self.filesystem_name) as filesystem_client: if not await filesystem_client.exists(): await filesystem_client.create_file_system() - logging.info("Creating groups...") + logger.info("Creating groups...") groups: dict[str, str] = {} for group in self.data_access_control_format["groups"]: group_id = await self.create_or_get_group(group) groups[group] = group_id - logging.info("Ensuring directories exist...") + logger.info("Ensuring directories exist...") directories: dict[str, DataLakeDirectoryClient] = {} try: for directory in self.data_access_control_format["directories"].keys(): @@ -76,23 +80,23 @@ async def run(self): ) directories[directory] = directory_client - logging.info("Uploading files...") + logger.info("Uploading files...") for file, file_info in self.data_access_control_format["files"].items(): directory = file_info["directory"] if directory not in directories: - logging.error(f"File {file} has unknown directory {directory}, exiting...") + logger.error(f"File {file} has unknown directory {directory}, exiting...") return await self.upload_file( directory_client=directories[directory], file_path=os.path.join(self.data_directory, file) ) - logging.info("Setting access control...") + logger.info("Setting access control...") for directory, access_control in self.data_access_control_format["directories"].items(): directory_client = directories[directory] if "groups" in access_control: for group_name in access_control["groups"]: if group_name not in groups: - logging.error( + logger.error( f"Directory {directory} has unknown group {group_name} in access control list, exiting" ) return @@ -122,7 +126,7 @@ async def create_or_get_group(self, group_name: str): token_result = await self.credentials.get_token("https://graph.microsoft.com/.default") self.graph_headers = {"Authorization": f"Bearer {token_result.token}"} async with aiohttp.ClientSession(headers=self.graph_headers) as session: - logging.info(f"Searching for group {group_name}...") + logger.info(f"Searching for group {group_name}...") async with session.get( f"https://graph.microsoft.com/v1.0/groups?$select=id&$top=1&$filter=displayName eq '{group_name}'" ) as response: @@ -132,7 +136,7 @@ async def create_or_get_group(self, group_name: str): if len(content["value"]) == 1: group_id = content["value"][0]["id"] if not group_id: - logging.info(f"Could not find group {group_name}, creating...") + logger.info(f"Could not find group {group_name}, creating...") group = { "displayName": group_name, "groupTypes": ["Unified"], @@ -143,17 +147,22 @@ async def create_or_get_group(self, group_name: str): if response.status != 201: raise Exception(content) group_id = content["id"] - logging.info(f"Group {group_name} ID {group_id}") + logger.info(f"Group {group_name} ID {group_id}") return group_id async def main(args: Any): + load_azd_env() + + if not os.getenv("AZURE_ADLS_GEN2_STORAGE_ACCOUNT"): + raise Exception("AZURE_ADLS_GEN2_STORAGE_ACCOUNT must be set to continue") + async with AzureDeveloperCliCredential() as credentials: with open(args.data_access_control) as f: data_access_control_format = json.load(f) command = AdlsGen2Setup( data_directory=args.data_directory, - storage_account_name=args.storage_account, + storage_account_name=os.getenv("AZURE_ADLS_GEN2_STORAGE_ACCOUNT"), filesystem_name="gptkbcontainer", security_enabled_groups=args.create_security_enabled_groups, credentials=credentials, @@ -165,14 +174,9 @@ async def main(args: Any): if __name__ == "__main__": parser = argparse.ArgumentParser( description="Upload sample data to a Data Lake Storage Gen2 account and associate sample access control lists with it using sample groups", - epilog="Example: ./scripts/adlsgen2setup.py ./data --data-access-control ./scripts/sampleacls.json --storage-account --create-security-enabled-groups ", + epilog="Example: ./scripts/adlsgen2setup.py ./data --data-access-control ./scripts/sampleacls.json --create-security-enabled-groups ", ) parser.add_argument("data_directory", help="Data directory that contains sample PDFs") - parser.add_argument( - "--storage-account", - required=True, - help="Name of the Data Lake Storage Gen2 account to upload the sample data to", - ) parser.add_argument( "--create-security-enabled-groups", required=False, diff --git a/scripts/adlsgen2setup.sh b/scripts/adlsgen2setup.sh deleted file mode 100755 index 6118a5e8e6..0000000000 --- a/scripts/adlsgen2setup.sh +++ /dev/null @@ -1,12 +0,0 @@ - #!/bin/sh - -. ./scripts/loadenv.sh - -if [ -n "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" ]; then - echo 'AZURE_ADLS_GEN2_STORAGE_ACCOUNT must be set to continue' - exit 1 -fi - -echo 'Running "adlsgen2setup.py"' - -./.venv/bin/python ./scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sampleacls.json' --storage-account "$AZURE_ADLS_GEN2_STORAGE_ACCOUNT" -v diff --git a/scripts/auth_init.ps1 b/scripts/auth_init.ps1 index 872cf5ab6e..1b1467e60b 100755 --- a/scripts/auth_init.ps1 +++ b/scripts/auth_init.ps1 @@ -1,6 +1,8 @@ -. ./scripts/load_azd_env.ps1 +Write-Host "Checking if authentication should be setup..." -if (-not $env:AZURE_USE_AUTHENTICATION) { +$AZURE_USE_AUTHENTICATION = (azd env get-value AZURE_USE_AUTHENTICATION) +if (-not $?) { + Write-Host "AZURE_USE_AUTHENTICATION is not set, skipping authentication setup." Exit 0 } diff --git a/scripts/auth_init.py b/scripts/auth_init.py index e638f40f73..372df96220 100644 --- a/scripts/auth_init.py +++ b/scripts/auth_init.py @@ -22,6 +22,7 @@ from msgraph.generated.models.web_application import WebApplication from auth_common import get_application, test_authentication_enabled +from load_azd_env import load_azd_env async def create_application(graph_client: GraphServiceClient, request_app: Application) -> Tuple[str, str]: @@ -165,11 +166,18 @@ def server_app_known_client_application(client_app_id: str) -> Application: async def main(): + load_azd_env() + if not test_authentication_enabled(): print("Not setting up authentication.") exit(0) - auth_tenant = os.getenv("AZURE_AUTH_TENANT_ID", os.environ["AZURE_TENANT_ID"]) + if not os.getenv("AZURE_AUTH_TENANT_ID") and not os.getenv("AZURE_TENANT_ID"): + print( + "Error: No tenant ID set for authentication. Run `azd env set AZURE_AUTH_TENANT_ID tenant-id` to set the tenant ID." + ) + exit(1) + auth_tenant = os.getenv("AZURE_AUTH_TENANT_ID", os.getenv("AZURE_TENANT_ID")) print("Setting up authentication for tenant", auth_tenant) credential = AzureDeveloperCliCredential(tenant_id=auth_tenant) diff --git a/scripts/auth_init.sh b/scripts/auth_init.sh index bd7cfff552..0b8cbdb037 100755 --- a/scripts/auth_init.sh +++ b/scripts/auth_init.sh @@ -2,9 +2,8 @@ echo "Checking if authentication should be setup..." -. ./scripts/load_azd_env.sh - -if [ -z "$AZURE_USE_AUTHENTICATION" ]; then +AZURE_USE_AUTHENTICATION=$(azd env get-value AZURE_USE_AUTHENTICATION) +if [ $? -ne 0 ]; then echo "AZURE_USE_AUTHENTICATION is not set, skipping authentication setup." exit 0 fi diff --git a/scripts/auth_update.ps1 b/scripts/auth_update.ps1 index 1dbf7efee2..ab6d620e6d 100644 --- a/scripts/auth_update.ps1 +++ b/scripts/auth_update.ps1 @@ -1,6 +1,5 @@ -. ./scripts/load_azd_env.ps1 - -if (-not $env:AZURE_USE_AUTHENTICATION) { +$AZURE_USE_AUTHENTICATION = (azd env get-value AZURE_USE_AUTHENTICATION) +if (-not $?) { Exit 0 } diff --git a/scripts/auth_update.sh b/scripts/auth_update.sh index 7b64995f75..02e1e5f703 100755 --- a/scripts/auth_update.sh +++ b/scripts/auth_update.sh @@ -1,8 +1,7 @@ #!/bin/sh -. ./scripts/load_azd_env.sh - -if [ -z "$AZURE_USE_AUTHENTICATION" ]; then +AZURE_USE_AUTHENTICATION=$(azd env get-value AZURE_USE_AUTHENTICATION) +if [ $? -ne 0 ]; then exit 0 fi diff --git a/scripts/load_azd_env.ps1 b/scripts/load_azd_env.ps1 deleted file mode 100644 index 9f59bc7b07..0000000000 --- a/scripts/load_azd_env.ps1 +++ /dev/null @@ -1,8 +0,0 @@ -Write-Host "Loading azd .env file from current environment" -foreach ($line in (& azd env get-values)) { - if ($line -match "([^=]+)=(.*)") { - $key = $matches[1] - $value = $matches[2] -replace '^"|"$' - [Environment]::SetEnvironmentVariable($key, $value) - } -} diff --git a/scripts/load_azd_env.sh b/scripts/load_azd_env.sh deleted file mode 100755 index 02926243a0..0000000000 --- a/scripts/load_azd_env.sh +++ /dev/null @@ -1,10 +0,0 @@ - #!/bin/sh - -echo "Loading azd .env file from current environment..." - -while IFS='=' read -r key value; do - value=$(echo "$value" | sed 's/^"//' | sed 's/"$//') - export "$key=$value" -done < Date: Mon, 23 Sep 2024 09:04:14 -0600 Subject: [PATCH 30/41] Remove phi changes --- app/backend/app.py | 4 ++-- app/backend/approaches/chatreadretrieveread.py | 2 +- app/backend/approaches/retrievethenread.py | 3 +-- app/backend/load_azd_env.py | 2 +- app/backend/prepdocs.py | 2 +- app/backend/prepdocslib/blobmanager.py | 2 +- app/backend/prepdocslib/embeddings.py | 2 +- app/backend/prepdocslib/filestrategy.py | 2 +- app/backend/prepdocslib/htmlparser.py | 2 +- app/backend/prepdocslib/integratedvectorizerstrategy.py | 2 +- app/backend/prepdocslib/listfilestrategy.py | 2 +- app/backend/prepdocslib/pdfparser.py | 2 +- app/backend/prepdocslib/searchmanager.py | 2 +- app/backend/prepdocslib/textsplitter.py | 2 +- scripts/adlsgen2setup.py | 2 +- scripts/manageacl.py | 2 +- 16 files changed, 17 insertions(+), 18 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index fefbe5105c..386ce6881a 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -395,7 +395,7 @@ async def setup_clients(): AZURE_SEARCH_INDEX = os.environ["AZURE_SEARCH_INDEX"] # Shared by all OpenAI deployments OPENAI_HOST = os.getenv("OPENAI_HOST", "azure") - OPENAI_CHATGPT_MODEL = "phi3.5:latest" + OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"] OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002") OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS", 1536)) # Used with Azure OpenAI deployments @@ -717,7 +717,7 @@ def create_app(): # Set our own logger levels to INFO by default app_level = os.getenv("APP_LOG_LEVEL", "INFO") app.logger.setLevel(os.getenv("APP_LOG_LEVEL", app_level)) - logging.getLogger("ragapp").setLevel(app_level) + logging.getLogger("scripts").setLevel(app_level) if allowed_origin := os.getenv("ALLOWED_ORIGIN"): app.logger.info("ALLOWED_ORIGIN is set, enabling CORS for %s", allowed_origin) diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 9d4e7bbd52..95ca08f0f0 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -51,7 +51,7 @@ def __init__( self.content_field = content_field self.query_language = query_language self.query_speller = query_speller - self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=True) + self.chatgpt_token_limit = get_token_limit(chatgpt_model) @property def system_message_chat_conversation(self): diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py index 24ac84d3cc..d5b05a0fbe 100644 --- a/app/backend/approaches/retrievethenread.py +++ b/app/backend/approaches/retrievethenread.py @@ -66,7 +66,7 @@ def __init__( self.content_field = content_field self.query_language = query_language self.query_speller = query_speller - self.chatgpt_token_limit = get_token_limit(chatgpt_model, default_to_minimum=True) + self.chatgpt_token_limit = get_token_limit(chatgpt_model) async def run( self, @@ -121,7 +121,6 @@ async def run( few_shots=[{"role": "user", "content": self.question}, {"role": "assistant", "content": self.answer}], new_user_content=user_content, max_tokens=self.chatgpt_token_limit - response_token_limit, - fallback_to_default=True, ) chat_completion = await self.openai_client.chat.completions.create( diff --git a/app/backend/load_azd_env.py b/app/backend/load_azd_env.py index 3d869b0708..5a6334ab6f 100644 --- a/app/backend/load_azd_env.py +++ b/app/backend/load_azd_env.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") def load_azd_env(): diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index ba911e5fbb..007a6c7f37 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -33,7 +33,7 @@ from prepdocslib.textparser import TextParser from prepdocslib.textsplitter import SentenceTextSplitter, SimpleTextSplitter -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") def clean_key_if_exists(key: Union[str, None]) -> Union[str, None]: diff --git a/app/backend/prepdocslib/blobmanager.py b/app/backend/prepdocslib/blobmanager.py index 5b24a0281d..e9f18e795a 100644 --- a/app/backend/prepdocslib/blobmanager.py +++ b/app/backend/prepdocslib/blobmanager.py @@ -18,7 +18,7 @@ from .listfilestrategy import File -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class BlobManager: diff --git a/app/backend/prepdocslib/embeddings.py b/app/backend/prepdocslib/embeddings.py index 148e2d2026..c538952e72 100644 --- a/app/backend/prepdocslib/embeddings.py +++ b/app/backend/prepdocslib/embeddings.py @@ -17,7 +17,7 @@ ) from typing_extensions import TypedDict -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class EmbeddingBatch: diff --git a/app/backend/prepdocslib/filestrategy.py b/app/backend/prepdocslib/filestrategy.py index 8ae196e2e8..26745e744d 100644 --- a/app/backend/prepdocslib/filestrategy.py +++ b/app/backend/prepdocslib/filestrategy.py @@ -8,7 +8,7 @@ from .searchmanager import SearchManager, Section from .strategy import DocumentAction, SearchInfo, Strategy -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") async def parse_file( diff --git a/app/backend/prepdocslib/htmlparser.py b/app/backend/prepdocslib/htmlparser.py index 627813d987..a42579f640 100644 --- a/app/backend/prepdocslib/htmlparser.py +++ b/app/backend/prepdocslib/htmlparser.py @@ -7,7 +7,7 @@ from .page import Page from .parser import Parser -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") def cleanup_data(data: str) -> str: diff --git a/app/backend/prepdocslib/integratedvectorizerstrategy.py b/app/backend/prepdocslib/integratedvectorizerstrategy.py index 7bf51e19f1..58b84a1689 100644 --- a/app/backend/prepdocslib/integratedvectorizerstrategy.py +++ b/app/backend/prepdocslib/integratedvectorizerstrategy.py @@ -28,7 +28,7 @@ from .searchmanager import SearchManager from .strategy import DocumentAction, SearchInfo, Strategy -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class IntegratedVectorizerStrategy(Strategy): diff --git a/app/backend/prepdocslib/listfilestrategy.py b/app/backend/prepdocslib/listfilestrategy.py index bdc53dc67f..3c8fcd27b0 100644 --- a/app/backend/prepdocslib/listfilestrategy.py +++ b/app/backend/prepdocslib/listfilestrategy.py @@ -13,7 +13,7 @@ DataLakeServiceClient, ) -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class File: diff --git a/app/backend/prepdocslib/pdfparser.py b/app/backend/prepdocslib/pdfparser.py index f20dd61c2c..6604110020 100644 --- a/app/backend/prepdocslib/pdfparser.py +++ b/app/backend/prepdocslib/pdfparser.py @@ -11,7 +11,7 @@ from .page import Page from .parser import Parser -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class LocalPdfParser(Parser): diff --git a/app/backend/prepdocslib/searchmanager.py b/app/backend/prepdocslib/searchmanager.py index 37c03b265e..8757926000 100644 --- a/app/backend/prepdocslib/searchmanager.py +++ b/app/backend/prepdocslib/searchmanager.py @@ -26,7 +26,7 @@ from .strategy import SearchInfo from .textsplitter import SplitPage -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class Section: diff --git a/app/backend/prepdocslib/textsplitter.py b/app/backend/prepdocslib/textsplitter.py index 21c1f9fcbf..30b0c1ad77 100644 --- a/app/backend/prepdocslib/textsplitter.py +++ b/app/backend/prepdocslib/textsplitter.py @@ -6,7 +6,7 @@ from .page import Page, SplitPage -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class TextSplitter(ABC): diff --git a/scripts/adlsgen2setup.py b/scripts/adlsgen2setup.py index 9b2ee2eebb..387b1b94d8 100644 --- a/scripts/adlsgen2setup.py +++ b/scripts/adlsgen2setup.py @@ -15,7 +15,7 @@ from load_azd_env import load_azd_env -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class AdlsGen2Setup: diff --git a/scripts/manageacl.py b/scripts/manageacl.py index 117e5d025d..00232d2d34 100644 --- a/scripts/manageacl.py +++ b/scripts/manageacl.py @@ -18,7 +18,7 @@ from load_azd_env import load_azd_env -logger = logging.getLogger("ragapp") +logger = logging.getLogger("scripts") class ManageAcl: From 457224da0b770df4779b98847a2f8dfa3b811110 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Mon, 23 Sep 2024 09:48:27 -0600 Subject: [PATCH 31/41] Make mypy happy --- app/backend/prepdocs.py | 17 ++++++++++------- scripts/adlsgen2setup.py | 2 +- scripts/auth_init.py | 4 ++-- scripts/load_azd_env.py | 23 +++++++++++++++++++++++ 4 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 scripts/load_azd_env.py diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index 007a6c7f37..2a0048f7e9 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -313,10 +313,10 @@ async def main(strategy: Strategy, setup_index: bool = True): ) blob_manager = setup_blob_manager( azure_credential=azd_credential, - storage_account=os.getenv("AZURE_STORAGE_ACCOUNT"), - storage_container=os.getenv("AZURE_STORAGE_CONTAINER"), - storage_resource_group=os.getenv("AZURE_STORAGE_RESOURCE_GROUP"), - subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"), + storage_account=os.environ["AZURE_STORAGE_ACCOUNT"], + storage_container=os.environ["AZURE_STORAGE_CONTAINER"], + storage_resource_group=os.environ["AZURE_STORAGE_RESOURCE_GROUP"], + subscription_id=os.environ["AZURE_SUBSCRIPTION_ID"], search_images=use_gptvision, storage_key=clean_key_if_exists(args.storagekey), ) @@ -336,14 +336,17 @@ async def main(strategy: Strategy, setup_index: bool = True): elif not openai_host.startswith("azure") and os.getenv("OPENAI_API_KEY"): openai_key = os.getenv("OPENAI_API_KEY") + openai_dimensions = 1536 + if os.getenv("AZURE_OPENAI_EMB_DIMENSIONS"): + openai_dimensions = int(os.environ["AZURE_OPENAI_EMB_DIMENSIONS"]) openai_embeddings_service = setup_embeddings_service( azure_credential=azd_credential, openai_host=openai_host, - openai_model_name=os.getenv("AZURE_OPENAI_EMB_MODEL_NAME"), + openai_model_name=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"], openai_service=os.getenv("AZURE_OPENAI_SERVICE"), openai_custom_url=os.getenv("AZURE_OPENAI_CUSTOM_URL"), openai_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"), - openai_dimensions=os.getenv("AZURE_OPENAI_EMB_DIMENSIONS"), + openai_dimensions=openai_dimensions, openai_key=clean_key_if_exists(openai_key), openai_org=os.getenv("OPENAI_ORGANIZATION"), disable_vectors=dont_use_vectors, @@ -358,7 +361,7 @@ async def main(strategy: Strategy, setup_index: bool = True): blob_manager=blob_manager, document_action=document_action, embeddings=openai_embeddings_service, - subscription_id=os.getenv("AZURE_SUBSCRIPTION_ID"), + subscription_id=os.environ["AZURE_SUBSCRIPTION_ID"], search_service_user_assigned_id=args.searchserviceassignedid, search_analyzer_name=os.getenv("AZURE_SEARCH_ANALYZER_NAME"), use_acls=use_acls, diff --git a/scripts/adlsgen2setup.py b/scripts/adlsgen2setup.py index 387b1b94d8..1deccdf199 100644 --- a/scripts/adlsgen2setup.py +++ b/scripts/adlsgen2setup.py @@ -165,7 +165,7 @@ async def main(args: Any): data_access_control_format = json.load(f) command = AdlsGen2Setup( data_directory=args.data_directory, - storage_account_name=os.getenv("AZURE_ADLS_GEN2_STORAGE_ACCOUNT"), + storage_account_name=os.environ["AZURE_ADLS_GEN2_STORAGE_ACCOUNT"], filesystem_name="gptkbcontainer", security_enabled_groups=args.create_security_enabled_groups, credentials=credentials, diff --git a/scripts/auth_init.py b/scripts/auth_init.py index 372df96220..f024b70751 100644 --- a/scripts/auth_init.py +++ b/scripts/auth_init.py @@ -172,12 +172,12 @@ async def main(): print("Not setting up authentication.") exit(0) - if not os.getenv("AZURE_AUTH_TENANT_ID") and not os.getenv("AZURE_TENANT_ID"): + auth_tenant = os.getenv("AZURE_AUTH_TENANT_ID", os.getenv("AZURE_TENANT_ID")) + if not auth_tenant: print( "Error: No tenant ID set for authentication. Run `azd env set AZURE_AUTH_TENANT_ID tenant-id` to set the tenant ID." ) exit(1) - auth_tenant = os.getenv("AZURE_AUTH_TENANT_ID", os.getenv("AZURE_TENANT_ID")) print("Setting up authentication for tenant", auth_tenant) credential = AzureDeveloperCliCredential(tenant_id=auth_tenant) diff --git a/scripts/load_azd_env.py b/scripts/load_azd_env.py new file mode 100644 index 0000000000..5a6334ab6f --- /dev/null +++ b/scripts/load_azd_env.py @@ -0,0 +1,23 @@ +import json +import logging +import subprocess + +from dotenv import load_dotenv + +logger = logging.getLogger("scripts") + + +def load_azd_env(): + """Get path to current azd env file and load file using python-dotenv""" + result = subprocess.run("azd env list -o json", shell=True, capture_output=True, text=True) + if result.returncode != 0: + raise Exception("Error loading azd env") + env_json = json.loads(result.stdout) + env_file_path = None + for entry in env_json: + if entry["IsDefault"]: + env_file_path = entry["DotEnvPath"] + if not env_file_path: + raise Exception("No default azd env file found") + logger.info(f"Loading azd env from {env_file_path}") + load_dotenv(env_file_path, override=True) From 7c385c8cf50ac837175d1a97fdac4d3801223fa7 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Mon, 23 Sep 2024 09:59:10 -0600 Subject: [PATCH 32/41] Add dotenv requirement --- app/backend/requirements.in | 1 + app/backend/requirements.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/app/backend/requirements.in b/app/backend/requirements.in index ba7aed8fb0..be5dd02754 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -29,3 +29,4 @@ beautifulsoup4 types-beautifulsoup4 msgraph-sdk==1.1.0 openai-messages-token-helper +python-dotenv diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 2234f99278..8bcb466edf 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -348,6 +348,8 @@ python-dateutil==2.9.0.post0 # microsoft-kiota-serialization-text # pendulum # time-machine +python-dotenv==1.0.1 + # via -r requirements.in quart==0.19.6 # via # -r requirements.in From 02c280cf6ff41b068f4b8060b7aafeb4334ae207 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Tue, 24 Sep 2024 05:52:20 -0600 Subject: [PATCH 33/41] Env var tweaks --- infra/main.parameters.json | 3 +++ scripts/auth_init.ps1 | 2 +- scripts/auth_init.sh | 2 +- scripts/prepdocs.sh | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 3575cd8d5b..a807b9abde 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -116,6 +116,9 @@ "embeddingDimensions": { "value": "${AZURE_OPENAI_EMB_DIMENSIONS}" }, + "gpt4vDeploymentCapacity":{ + "value": "${AZURE_OPENAI_GPT4V_DEPLOYMENT_CAPACITY=10}" + }, "openAiHost": { "value": "${OPENAI_HOST=azure}" }, diff --git a/scripts/auth_init.ps1 b/scripts/auth_init.ps1 index 1b1467e60b..c16cb5201b 100755 --- a/scripts/auth_init.ps1 +++ b/scripts/auth_init.ps1 @@ -1,7 +1,7 @@ Write-Host "Checking if authentication should be setup..." $AZURE_USE_AUTHENTICATION = (azd env get-value AZURE_USE_AUTHENTICATION) -if (-not $?) { +if ($AZURE_USE_AUTHENTICATION -ne "true") { Write-Host "AZURE_USE_AUTHENTICATION is not set, skipping authentication setup." Exit 0 } diff --git a/scripts/auth_init.sh b/scripts/auth_init.sh index 0b8cbdb037..dfe0efe620 100755 --- a/scripts/auth_init.sh +++ b/scripts/auth_init.sh @@ -3,7 +3,7 @@ echo "Checking if authentication should be setup..." AZURE_USE_AUTHENTICATION=$(azd env get-value AZURE_USE_AUTHENTICATION) -if [ $? -ne 0 ]; then +if [ "$AZURE_USE_AUTHENTICATION" != "true" ]; then echo "AZURE_USE_AUTHENTICATION is not set, skipping authentication setup." exit 0 fi diff --git a/scripts/prepdocs.sh b/scripts/prepdocs.sh index db24b2c11b..b75a5efc96 100755 --- a/scripts/prepdocs.sh +++ b/scripts/prepdocs.sh @@ -7,7 +7,7 @@ echo 'Running "prepdocs.py"' AZURE_USE_AUTHENTICATION=$(azd env get-value AZURE_PUBLIC_NETWORK_ACCESS) AZURE_PUBLIC_NETWORK_ACCESS=$(azd env get-value AZURE_PUBLIC_NETWORK_ACCESS) -if [ -n $AZURE_USE_AUTHENTICATION ] && [ $AZURE_PUBLIC_NETWORK_ACCESS = "Disabled" ]; then +if [ -n "$AZURE_USE_AUTHENTICATION" ] && [ "$AZURE_PUBLIC_NETWORK_ACCESS" = "Disabled" ]; then echo "AZURE_PUBLIC_NETWORK_ACCESS is set to Disabled. Exiting." exit 0 fi From a4a4f11406c57118496b363116f2d0937daf296d Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 25 Sep 2024 08:35:57 -0600 Subject: [PATCH 34/41] Fix error handling --- app/frontend/src/pages/chat/Chat.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 001a9b8712..1129e0f590 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -131,7 +131,7 @@ const Chat = () => { if (event["context"] && event["context"]["data_points"]) { event["message"] = event["delta"]; askResponse = event as ChatAppResponse; - } else if (event["delta"]["content"]) { + } else if (event["delta"] && event["delta"]["content"]) { setIsLoading(false); await updateState(event["delta"]["content"]); } else if (event["context"]) { From e4a7abfd5bcd54388e229c204afd0e7d4b686300 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 25 Sep 2024 09:03:38 -0600 Subject: [PATCH 35/41] Update manageacl.py commands --- docs/login_and_acl.md | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/docs/login_and_acl.md b/docs/login_and_acl.md index 89cf4c0204..d6d1c2363a 100644 --- a/docs/login_and_acl.md +++ b/docs/login_and_acl.md @@ -37,7 +37,7 @@ Two Microsoft Entra applications must be registered in order to make the optiona The easiest way to setup the two apps is to use the `azd` CLI. We've written scripts that will automatically create the two apps and configure them for use with the sample. To trigger the automatic setup, run the following commands: 1. Run `azd env set AZURE_USE_AUTHENTICATION true` to enable the login UI and use App Service authentication by default. -1. Ensure access control is enabled on your search index. If your index doesn't exist yet, run prepdocs with `AZURE_USE_AUTHENTICATION` set to `true`. If your index already exists, run `pwsh ./scripts/manageacl.ps1 --acl-action enable_acls`. +1. Ensure access control is enabled on your search index. If your index doesn't exist yet, run prepdocs with `AZURE_USE_AUTHENTICATION` set to `true`. If your index already exists, run `python ./scripts/manageacl.py --acl-action enable_acls`. 1. (Optional) To require access control when using the app, run `azd env set AZURE_ENFORCE_ACCESS_CONTROL true`. Authentication is always required to search on documents with access control assigned, regardless of if unauthenticated access is enabled or not. 1. (Optional) To allow authenticated users to search on documents that have no access controls assigned, even when access control is required, run `azd env set AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS true`. 1. (Optional) To allow unauthenticated users to use the app, even when access control is enforced, run `azd env set AZURE_ENABLE_UNAUTHENTICATED_ACCESS true`. `AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS` should also be set to true if you want unauthenticated users to be able to search on documents with no access control. @@ -167,50 +167,53 @@ The sample supports 2 main strategies for adding data with document level access ### Using the Add Documents API -Manually enable document level access control on a search index and manually set access control values using the [manageacl.ps1](../scripts/manageacl.ps1) script. +Manually enable document level access control on a search index and manually set access control values using the [manageacl.py](../scripts/manageacl.py) script. -Run `azd up` or use `azd env set` to manually set `AZURE_SEARCH_SERVICE` and `AZURE_SEARCH_INDEX` environment variables prior to running the script. +Prior to running the script: -The script supports the following commands. Note that the syntax is the same regardless of whether [manageacl.ps1](../scripts/manageacl.ps1) or [manageacl.sh](../scripts/manageacl.sh) is used. All commands support `-v` for verbose logging. +- Run `azd up` or use `azd env set` to manually set the `AZURE_SEARCH_SERVICE` and `AZURE_SEARCH_INDEX` azd environment variables +- Activate the Python virtual environment for your shell session -- `./scripts/manageacl.ps1 --acl-action enable_acls`: Creates the required `oids` (User ID) and `groups` (Group IDs) [security filter](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) fields for document level access control on your index, as well as the `storageUrl` field for storing the Blob storage URL. Does nothing if these fields already exist. +The script supports the following commands. All commands support `-v` for verbose logging. + +- `python ./scripts/manageacl.py --acl-action enable_acls`: Creates the required `oids` (User ID) and `groups` (Group IDs) [security filter](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) fields for document level access control on your index, as well as the `storageUrl` field for storing the Blob storage URL. Does nothing if these fields already exist. Example usage: ```shell - ./scripts/manageacl.ps1 -v --acl-action enable_acls + python ./scripts/manageacl.py -v --acl-action enable_acls ``` -- `./scripts/manageacl.ps1 --acl-type [oids or groups]--acl-action view --url [https://url.pdf]`: Prints access control values associated with either User IDs or Group IDs for the document at the specified URL. +- `python ./scripts/manageacl.py --acl-type [oids or groups]--acl-action view --url [https://url.pdf]`: Prints access control values associated with either User IDs or Group IDs for the document at the specified URL. Example to view all Group IDs: ```shell - ./scripts/manageacl.ps1 -v --acl-type groups --acl-action view --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type groups --acl-action view --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `./scripts/manageacl.ps1 --url [https://url.pdf] --acl-type [oids or groups]--acl-action add --acl [ID of group or user]`: Adds an access control value associated with either User IDs or Group IDs for the document at the specified URL. +- `python ./scripts/manageacl.py --acl-type [oids or groups]--acl-action add --acl [ID of group or user] --url [https://url.pdf]`: Adds an access control value associated with either User IDs or Group IDs for the document at the specified URL. Example to add a Group ID: ```shell - ./scripts/manageacl.ps1 -v --acl-type groups --acl-action add --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type groups --acl-action add --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `./scripts/manageacl.ps1 --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove_all`: Removes all access control values associated with either User IDs or Group IDs for a specific document. +- `python ./scripts/manageacl.py --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove_all`: Removes all access control values associated with either User IDs or Group IDs for a specific document. Example to remove all Group IDs: ```shell - ./scripts/manageacl.ps1 -v --acl-type groups --acl-action remove_all --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type groups --acl-action remove_all --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `./scripts/manageacl.ps1 --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove --acl [ID of group or user]`: Removes an access control value associated with either User IDs or Group IDs for a specific document. +- `python ./scripts/manageacl.py --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove --acl [ID of group or user]`: Removes an access control value associated with either User IDs or Group IDs for a specific document. Example to remove a specific User ID: ```shell - ./scripts/manageacl.ps1 -v --acl-type oids --acl-action remove --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type oids --acl-action remove --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` ### Azure Data Lake Storage Gen2 Setup From 0ab84dacae6ce67859f2a9bf187c13f5f3724753 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 25 Sep 2024 15:13:41 -0600 Subject: [PATCH 36/41] Doc update --- docs/login_and_acl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/login_and_acl.md b/docs/login_and_acl.md index d6d1c2363a..6bffa5113a 100644 --- a/docs/login_and_acl.md +++ b/docs/login_and_acl.md @@ -200,7 +200,7 @@ The script supports the following commands. All commands support `-v` for verbos python ./scripts/manageacl.py -v --acl-type groups --acl-action add --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `python ./scripts/manageacl.py --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove_all`: Removes all access control values associated with either User IDs or Group IDs for a specific document. +- `python ./scripts/manageacl.py --acl-type [oids or groups]--acl-action remove_all --url [https://url.pdf]`: Removes all access control values associated with either User IDs or Group IDs for a specific document. Example to remove all Group IDs: From 4fef88400b9e8ddb52c360e59865d1b340febfa7 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Wed, 25 Sep 2024 19:44:27 -0600 Subject: [PATCH 37/41] Adding more tests for prepdocs --- app/backend/prepdocs.py | 32 ++++++---- tests/test_app_config.py | 130 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 11 deletions(-) diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index 2a0048f7e9..342969b6b6 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -158,7 +158,7 @@ def setup_file_processors( ): html_parser: Parser pdf_parser: Parser - doc_int_parser: DocumentAnalysisParser + doc_int_parser: DocumentAnalysisParser = None # check if Azure Document Intelligence credentials are provided if document_intelligence_service is not None: @@ -178,23 +178,33 @@ def setup_file_processors( else: html_parser = doc_int_parser sentence_text_splitter = SentenceTextSplitter(has_image_embeddings=search_images) - return { + + # These file formats can always be parsed, thanks to local packages + file_processors = { ".pdf": FileProcessor(pdf_parser, sentence_text_splitter), ".html": FileProcessor(html_parser, sentence_text_splitter), ".json": FileProcessor(JsonParser(), SimpleTextSplitter()), - ".docx": FileProcessor(doc_int_parser, sentence_text_splitter), - ".pptx": FileProcessor(doc_int_parser, sentence_text_splitter), - ".xlsx": FileProcessor(doc_int_parser, sentence_text_splitter), - ".png": FileProcessor(doc_int_parser, sentence_text_splitter), - ".jpg": FileProcessor(doc_int_parser, sentence_text_splitter), - ".jpeg": FileProcessor(doc_int_parser, sentence_text_splitter), - ".tiff": FileProcessor(doc_int_parser, sentence_text_splitter), - ".bmp": FileProcessor(doc_int_parser, sentence_text_splitter), - ".heic": FileProcessor(doc_int_parser, sentence_text_splitter), ".md": FileProcessor(TextParser(), sentence_text_splitter), ".txt": FileProcessor(TextParser(), sentence_text_splitter), } + # These file formats require Document Intelligence + if doc_int_parser is not None: + file_processors.update( + { + ".docx": FileProcessor(doc_int_parser, sentence_text_splitter), + ".pptx": FileProcessor(doc_int_parser, sentence_text_splitter), + ".xlsx": FileProcessor(doc_int_parser, sentence_text_splitter), + ".png": FileProcessor(doc_int_parser, sentence_text_splitter), + ".jpg": FileProcessor(doc_int_parser, sentence_text_splitter), + ".jpeg": FileProcessor(doc_int_parser, sentence_text_splitter), + ".tiff": FileProcessor(doc_int_parser, sentence_text_splitter), + ".bmp": FileProcessor(doc_int_parser, sentence_text_splitter), + ".heic": FileProcessor(doc_int_parser, sentence_text_splitter), + } + ) + return file_processors + def setup_image_embeddings_service( azure_credential: AsyncTokenCredential, vision_endpoint: Union[str, None], search_images: bool diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 084dabdc4a..29139d2a02 100644 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -2,6 +2,7 @@ from unittest import mock import pytest +import quart import app @@ -52,6 +53,65 @@ async def test_app_azure_custom_identity(monkeypatch, minimal_env): assert quart_app.config[app.CONFIG_OPENAI_CLIENT].base_url == "http://azureapi.com/api/v1/openai/" +@pytest.mark.asyncio +async def test_app_user_upload_processors(monkeypatch, minimal_env): + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + + quart_app = app.create_app() + async with quart_app.test_app(): + ingester = quart_app.config[app.CONFIG_INGESTER] + assert ingester is not None + assert len(ingester.file_processors.keys()) == 5 + + +@pytest.mark.asyncio +async def test_app_user_upload_processors_docint(monkeypatch, minimal_env): + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + monkeypatch.setenv("AZURE_DOCUMENTINTELLIGENCE_SERVICE", "test-docint-service") + + quart_app = app.create_app() + async with quart_app.test_app(): + ingester = quart_app.config[app.CONFIG_INGESTER] + assert ingester is not None + assert len(ingester.file_processors.keys()) == 14 + + +@pytest.mark.asyncio +async def test_app_user_upload_processors_docint_localpdf(monkeypatch, minimal_env): + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + monkeypatch.setenv("AZURE_DOCUMENTINTELLIGENCE_SERVICE", "test-docint-service") + monkeypatch.setenv("USE_LOCAL_PDF_PARSER", "true") + + quart_app = app.create_app() + async with quart_app.test_app(): + ingester = quart_app.config[app.CONFIG_INGESTER] + assert ingester is not None + assert len(ingester.file_processors.keys()) == 14 + assert ingester.file_processors[".pdf"] is not ingester.file_processors[".pptx"] + + +@pytest.mark.asyncio +async def test_app_user_upload_processors_docint_localhtml(monkeypatch, minimal_env): + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + monkeypatch.setenv("AZURE_DOCUMENTINTELLIGENCE_SERVICE", "test-docint-service") + monkeypatch.setenv("USE_LOCAL_HTML_PARSER", "true") + + quart_app = app.create_app() + async with quart_app.test_app(): + ingester = quart_app.config[app.CONFIG_INGESTER] + assert ingester is not None + assert len(ingester.file_processors.keys()) == 14 + assert ingester.file_processors[".html"] is not ingester.file_processors[".pptx"] + + @pytest.mark.asyncio async def test_app_config_default(monkeypatch, minimal_env): quart_app = app.create_app() @@ -105,6 +165,7 @@ async def test_app_config_semanticranker_free(monkeypatch, minimal_env): assert result["showGPT4VOptions"] is False assert result["showSemanticRankerOption"] is True assert result["showVectorOption"] is True + assert result["showUserUpload"] is False @pytest.mark.asyncio @@ -119,6 +180,75 @@ async def test_app_config_semanticranker_disabled(monkeypatch, minimal_env): assert result["showGPT4VOptions"] is False assert result["showSemanticRankerOption"] is False assert result["showVectorOption"] is True + assert result["showUserUpload"] is False + + +@pytest.mark.asyncio +async def test_app_config_user_upload(monkeypatch, minimal_env): + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + quart_app = app.create_app() + async with quart_app.test_app() as test_app: + client = test_app.test_client() + response = await client.get("/config") + assert response.status_code == 200 + result = await response.get_json() + assert result["showGPT4VOptions"] is False + assert result["showSemanticRankerOption"] is True + assert result["showVectorOption"] is True + assert result["showUserUpload"] is True + + +@pytest.mark.asyncio +async def test_app_config_user_upload_novectors(monkeypatch, minimal_env): + """Check that this combo works correctly with prepdocs.py embedding service.""" + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + monkeypatch.setenv("USE_VECTORS", "false") + quart_app = app.create_app() + async with quart_app.test_app() as test_app: + client = test_app.test_client() + response = await client.get("/config") + assert response.status_code == 200 + result = await response.get_json() + assert result["showGPT4VOptions"] is False + assert result["showSemanticRankerOption"] is True + assert result["showVectorOption"] is False + assert result["showUserUpload"] is True + + +@pytest.mark.asyncio +async def test_app_config_user_upload_bad_openai_config(monkeypatch, minimal_env): + """Check that this combo works correctly with prepdocs.py embedding service.""" + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + monkeypatch.setenv("OPENAI_HOST", "openai") + quart_app = app.create_app() + with pytest.raises( + quart.testing.app.LifespanError, match="OpenAI key is required when using the non-Azure OpenAI API" + ): + async with quart_app.test_app() as test_app: + test_app.test_client() + + +@pytest.mark.asyncio +async def test_app_config_user_upload_openaicom(monkeypatch, minimal_env): + """Check that this combo works correctly with prepdocs.py embedding service.""" + monkeypatch.setenv("AZURE_USERSTORAGE_ACCOUNT", "test-user-storage-account") + monkeypatch.setenv("AZURE_USERSTORAGE_CONTAINER", "test-user-storage-container") + monkeypatch.setenv("USE_USER_UPLOAD", "true") + monkeypatch.setenv("OPENAI_HOST", "openai") + monkeypatch.setenv("OPENAI_API_KEY", "pretendkey") + quart_app = app.create_app() + async with quart_app.test_app() as test_app: + client = test_app.test_client() + response = await client.get("/config") + assert response.status_code == 200 + result = await response.get_json() + assert result["showUserUpload"] is True @pytest.mark.asyncio From 7d57de82925a9da662b90062060de0cfc9069592 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 26 Sep 2024 11:00:15 -0700 Subject: [PATCH 38/41] Fix markdown copy --- docs/login_and_acl.md | 2 +- samples/document-security/README.md | 46 ++++++++++++++++------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/docs/login_and_acl.md b/docs/login_and_acl.md index 6bffa5113a..484d64f6ac 100644 --- a/docs/login_and_acl.md +++ b/docs/login_and_acl.md @@ -20,7 +20,7 @@ This guide demonstrates how to add an optional login and document level access control system to the sample. This system can be used to restrict access to indexed data to specific users based on what [Microsoft Entra groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) they are a part of, or their [user object id](https://learn.microsoft.com/partner-center/find-ids-and-domain-names#find-the-user-object-id). -![AppLoginArchitecture](./images/applogincomponents.png) +![AppLoginArchitecture](/docs/images/applogincomponents.png) ## Requirements diff --git a/samples/document-security/README.md b/samples/document-security/README.md index a98959df0c..730d0136f0 100644 --- a/samples/document-security/README.md +++ b/samples/document-security/README.md @@ -23,21 +23,22 @@ The [azure-search-openai-demo](/) project can set up a full RAG chat app on Azur ## Table of Contents - [Requirements](#requirements) -- [Setting up Microsoft Entra ID Apps](#setting-up-entra-id-apps) +- [Setting up Microsoft Entra applications](#setting-up-microsoft-entra-applications) - [Automatic Setup](#automatic-setup) - [Manual Setup](#manual-setup) - - [Server App](#setting-up-the-server-app) + - [Server App](#server-app) - [Client App](#client-app) - [Configure Server App Known Client Applications](#configure-server-app-known-client-applications) - [Testing](#testing) - - [Troubleshooting Entra ID Setup](#troubleshooting-entra-id-setup) + - [Programmatic Access With Authentication](#programmatic-access-with-authentication) + - [Troubleshooting](#troubleshooting) - [Adding data with document level access control](#adding-data-with-document-level-access-control) - [Using the Add Documents API](#using-the-add-documents-api) - [Azure Data Lake Storage Gen2 and prepdocs](#azure-data-lake-storage-gen2-setup) -- [Environment Variables Reference](#environment-variables-reference) - - [Authentication Behavior by Environment](#authentication-behavior-by-environment) +- [Environment variables reference](#environment-variables-reference) + - [Authentication behavior by environment](#authentication-behavior-by-environment) -This guide demonstrates how to add an optional login and document level access control system to the sample. This system can be used to restrict access to indexed data to specific users based on what [Microsoft Entra ID groups](https://learn.microsoft.com/azure/active-directory/fundamentals/how-to-manage-groups) they are a part of, or their [user object id](https://learn.microsoft.com/partner-center/find-ids-and-domain-names#find-the-user-object-id). +This guide demonstrates how to add an optional login and document level access control system to the sample. This system can be used to restrict access to indexed data to specific users based on what [Microsoft Entra groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) they are a part of, or their [user object id](https://learn.microsoft.com/partner-center/find-ids-and-domain-names#find-the-user-object-id). ![AppLoginArchitecture](/docs/images/applogincomponents.png) @@ -56,7 +57,7 @@ Two Microsoft Entra applications must be registered in order to make the optiona The easiest way to setup the two apps is to use the `azd` CLI. We've written scripts that will automatically create the two apps and configure them for use with the sample. To trigger the automatic setup, run the following commands: 1. Run `azd env set AZURE_USE_AUTHENTICATION true` to enable the login UI and use App Service authentication by default. -1. Ensure access control is enabled on your search index. If your index doesn't exist yet, run prepdocs with `AZURE_USE_AUTHENTICATION` set to `true`. If your index already exists, run `pwsh ./scripts/manageacl.ps1 --acl-action enable_acls`. +1. Ensure access control is enabled on your search index. If your index doesn't exist yet, run prepdocs with `AZURE_USE_AUTHENTICATION` set to `true`. If your index already exists, run `python ./scripts/manageacl.py --acl-action enable_acls`. 1. (Optional) To require access control when using the app, run `azd env set AZURE_ENFORCE_ACCESS_CONTROL true`. Authentication is always required to search on documents with access control assigned, regardless of if unauthenticated access is enabled or not. 1. (Optional) To allow authenticated users to search on documents that have no access controls assigned, even when access control is required, run `azd env set AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS true`. 1. (Optional) To allow unauthenticated users to use the app, even when access control is enforced, run `azd env set AZURE_ENABLE_UNAUTHENTICATED_ACCESS true`. `AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS` should also be set to true if you want unauthenticated users to be able to search on documents with no access control. @@ -175,7 +176,7 @@ print(token.token) - If any Entra apps need to be recreated, you can avoid redeploying the app by [changing the app settings in the portal](https://learn.microsoft.com/azure/app-service/configure-common?tabs=portal#configure-app-settings). Any of the [required environment variables](#environment-variables-reference) can be changed. Once the environment variables have been changed, restart the web app. - It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](../app/backend/core/authentication.py) - It's possible that your tenant admin has placed a restriction on consent to apps with [unverified publishers](https://learn.microsoft.com/entra/identity-platform/publisher-verification-overview). In this case, only admins may consent to the client and server apps, and normal user accounts are unable to use the login system until the admin consents on behalf of the entire organization. -- It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. +- It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. See this guide for [granting consent to an app](https://learn.microsoft.com/entra/identity/enterprise-apps/grant-admin-consent?pivots=portal). ## Adding data with document level access control @@ -186,50 +187,53 @@ The sample supports 2 main strategies for adding data with document level access ### Using the Add Documents API -Manually enable document level access control on a search index and manually set access control values using the [manageacl.ps1](../scripts/manageacl.ps1) script. +Manually enable document level access control on a search index and manually set access control values using the [manageacl.py](../scripts/manageacl.py) script. -Run `azd up` or use `azd env set` to manually set `AZURE_SEARCH_SERVICE` and `AZURE_SEARCH_INDEX` environment variables prior to running the script. +Prior to running the script: -The script supports the following commands. Note that the syntax is the same regardless of whether [manageacl.ps1](../scripts/manageacl.ps1) or [manageacl.sh](../scripts/manageacl.sh) is used. All commands support `-v` for verbose logging. +- Run `azd up` or use `azd env set` to manually set the `AZURE_SEARCH_SERVICE` and `AZURE_SEARCH_INDEX` azd environment variables +- Activate the Python virtual environment for your shell session -- `./scripts/manageacl.ps1 --acl-action enable_acls`: Creates the required `oids` (User ID) and `groups` (Group IDs) [security filter](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) fields for document level access control on your index, as well as the `storageUrl` field for storing the Blob storage URL. Does nothing if these fields already exist. +The script supports the following commands. All commands support `-v` for verbose logging. + +- `python ./scripts/manageacl.py --acl-action enable_acls`: Creates the required `oids` (User ID) and `groups` (Group IDs) [security filter](https://learn.microsoft.com/azure/search/search-security-trimming-for-azure-search) fields for document level access control on your index, as well as the `storageUrl` field for storing the Blob storage URL. Does nothing if these fields already exist. Example usage: ```shell - ./scripts/manageacl.ps1 -v --acl-action enable_acls + python ./scripts/manageacl.py -v --acl-action enable_acls ``` -- `./scripts/manageacl.ps1 --acl-type [oids or groups]--acl-action view --url [https://url.pdf]`: Prints access control values associated with either User IDs or Group IDs for the document at the specified URL. +- `python ./scripts/manageacl.py --acl-type [oids or groups]--acl-action view --url [https://url.pdf]`: Prints access control values associated with either User IDs or Group IDs for the document at the specified URL. Example to view all Group IDs: ```shell - ./scripts/manageacl.ps1 -v --acl-type groups --acl-action view --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type groups --acl-action view --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `./scripts/manageacl.ps1 --url [https://url.pdf] --acl-type [oids or groups]--acl-action add --acl [ID of group or user]`: Adds an access control value associated with either User IDs or Group IDs for the document at the specified URL. +- `python ./scripts/manageacl.py --acl-type [oids or groups]--acl-action add --acl [ID of group or user] --url [https://url.pdf]`: Adds an access control value associated with either User IDs or Group IDs for the document at the specified URL. Example to add a Group ID: ```shell - ./scripts/manageacl.ps1 -v --acl-type groups --acl-action add --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type groups --acl-action add --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `./scripts/manageacl.ps1 --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove_all`: Removes all access control values associated with either User IDs or Group IDs for a specific document. +- `python ./scripts/manageacl.py --acl-type [oids or groups]--acl-action remove_all --url [https://url.pdf]`: Removes all access control values associated with either User IDs or Group IDs for a specific document. Example to remove all Group IDs: ```shell - ./scripts/manageacl.ps1 -v --acl-type groups --acl-action remove_all --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type groups --acl-action remove_all --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` -- `./scripts/manageacl.ps1 --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove --acl [ID of group or user]`: Removes an access control value associated with either User IDs or Group IDs for a specific document. +- `python ./scripts/manageacl.py --url [https://url.pdf] --acl-type [oids or groups]--acl-action remove --acl [ID of group or user]`: Removes an access control value associated with either User IDs or Group IDs for a specific document. Example to remove a specific User ID: ```shell - ./scripts/manageacl.ps1 -v --acl-type oids --acl-action remove --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf + python ./scripts/manageacl.py -v --acl-type oids --acl-action remove --acl xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx --url https://st12345.blob.core.windows.net/content/Benefit_Options.pdf ``` ### Azure Data Lake Storage Gen2 Setup From 198084512a7c2f6ca9d8781dd8d73430cf3d3169 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 26 Sep 2024 11:12:06 -0700 Subject: [PATCH 39/41] Fix relative links --- docs/login_and_acl.md | 16 ++++++++-------- samples/document-security/README.md | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/login_and_acl.md b/docs/login_and_acl.md index 484d64f6ac..1af26dd249 100644 --- a/docs/login_and_acl.md +++ b/docs/login_and_acl.md @@ -154,7 +154,7 @@ print(token.token) - If your primary tenant restricts the ability to create Entra applications, you'll need to use a separate tenant to create the Entra applications. You can create a new tenant by following [these instructions](https://learn.microsoft.com/entra/identity-platform/quickstart-create-new-tenant). Then run `azd env set AZURE_AUTH_TENANT_ID ` before running `azd up`. - If any Entra apps need to be recreated, you can avoid redeploying the app by [changing the app settings in the portal](https://learn.microsoft.com/azure/app-service/configure-common?tabs=portal#configure-app-settings). Any of the [required environment variables](#environment-variables-reference) can be changed. Once the environment variables have been changed, restart the web app. -- It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](../app/backend/core/authentication.py) +- It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](/app/backend/core/authentication.py) - It's possible that your tenant admin has placed a restriction on consent to apps with [unverified publishers](https://learn.microsoft.com/entra/identity-platform/publisher-verification-overview). In this case, only admins may consent to the client and server apps, and normal user accounts are unable to use the login system until the admin consents on behalf of the entire organization. - It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. See this guide for [granting consent to an app](https://learn.microsoft.com/entra/identity/enterprise-apps/grant-admin-consent?pivots=portal). @@ -167,7 +167,7 @@ The sample supports 2 main strategies for adding data with document level access ### Using the Add Documents API -Manually enable document level access control on a search index and manually set access control values using the [manageacl.py](../scripts/manageacl.py) script. +Manually enable document level access control on a search index and manually set access control values using the [manageacl.py](/scripts/manageacl.py) script. Prior to running the script: @@ -218,7 +218,7 @@ The script supports the following commands. All commands support `-v` for verbos ### Azure Data Lake Storage Gen2 Setup -[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.py](../scripts/adlsgen2setup.py) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. +[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.py](/scripts/adlsgen2setup.py) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. In order to use this script, an existing Data Lake Storage Gen2 storage account is required. Run `azd env set AZURE_ADLS_GEN2_STORAGE_ACCOUNT ` prior to running the script. @@ -230,11 +230,11 @@ python /scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sam The script performs the following steps: -- Creates example [groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](../scripts/sampleacls.json) file. +- Creates example [groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](/scripts/sampleacls.json) file. - Creates a filesystem / container `gptkbcontainer` in the storage account. -- Creates the directories listed in the [sampleacls.json](../scripts/sampleacls.json) file. -- Uploads the sample PDFs referenced in the [sampleacls.json](../scripts/sampleacls.json) file into the appropriate directories. -- [Recursively sets Access Control Lists (ACLs)](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-acl-cli) using the information from the [sampleacls.json](../scripts/sampleacls.json) file. +- Creates the directories listed in the [sampleacls.json](/scripts/sampleacls.json) file. +- Uploads the sample PDFs referenced in the [sampleacls.json](/scripts/sampleacls.json) file into the appropriate directories. +- [Recursively sets Access Control Lists (ACLs)](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-acl-cli) using the information from the [sampleacls.json](/scripts/sampleacls.json) file. In order to use the sample access control, you need to join these groups in your Microsoft Entra tenant. @@ -242,7 +242,7 @@ Note that this optional script may not work in Codespaces if your administrator #### Azure Data Lake Storage Gen2 Prep Docs -Once a Data Lake Storage Gen2 storage account has been setup with sample data and access control lists, [prepdocs.py](../app/backend/prepdocs.py) can be used to automatically process PDFs in the storage account and store them with their [access control lists in the search index](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). +Once a Data Lake Storage Gen2 storage account has been setup with sample data and access control lists, [prepdocs.py](/app/backend/prepdocs.py) can be used to automatically process PDFs in the storage account and store them with their [access control lists in the search index](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). To run this script with a Data Lake Storage Gen2 account, first set the following environment variables: diff --git a/samples/document-security/README.md b/samples/document-security/README.md index 730d0136f0..5f6bde0edd 100644 --- a/samples/document-security/README.md +++ b/samples/document-security/README.md @@ -174,7 +174,7 @@ print(token.token) - If your primary tenant restricts the ability to create Entra applications, you'll need to use a separate tenant to create the Entra applications. You can create a new tenant by following [these instructions](https://learn.microsoft.com/entra/identity-platform/quickstart-create-new-tenant). Then run `azd env set AZURE_AUTH_TENANT_ID ` before running `azd up`. - If any Entra apps need to be recreated, you can avoid redeploying the app by [changing the app settings in the portal](https://learn.microsoft.com/azure/app-service/configure-common?tabs=portal#configure-app-settings). Any of the [required environment variables](#environment-variables-reference) can be changed. Once the environment variables have been changed, restart the web app. -- It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](../app/backend/core/authentication.py) +- It's possible a consent dialog will not appear when you log into the app for the first time. If this consent dialog doesn't appear, you will be unable to use the security filters because the API server app does not have permission to read your authorization information. A consent dialog can be forced to appear by adding `"prompt": "consent"` to the `loginRequest` property in [`authentication.py`](/app/backend/core/authentication.py) - It's possible that your tenant admin has placed a restriction on consent to apps with [unverified publishers](https://learn.microsoft.com/entra/identity-platform/publisher-verification-overview). In this case, only admins may consent to the client and server apps, and normal user accounts are unable to use the login system until the admin consents on behalf of the entire organization. - It's possible that your tenant admin requires [admin approval of all new apps](https://learn.microsoft.com/entra/identity/enterprise-apps/manage-consent-requests). Regardless of whether you select the delegated or admin permissions, the app will not work without tenant admin consent. See this guide for [granting consent to an app](https://learn.microsoft.com/entra/identity/enterprise-apps/grant-admin-consent?pivots=portal). @@ -187,7 +187,7 @@ The sample supports 2 main strategies for adding data with document level access ### Using the Add Documents API -Manually enable document level access control on a search index and manually set access control values using the [manageacl.py](../scripts/manageacl.py) script. +Manually enable document level access control on a search index and manually set access control values using the [manageacl.py](/scripts/manageacl.py) script. Prior to running the script: @@ -238,7 +238,7 @@ The script supports the following commands. All commands support `-v` for verbos ### Azure Data Lake Storage Gen2 Setup -[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.py](../scripts/adlsgen2setup.py) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. +[Azure Data Lake Storage Gen2](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-introduction) implements an [access control model](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) that can be used for document level access control. The [adlsgen2setup.py](/scripts/adlsgen2setup.py) script uploads the sample data included in the [data](./data) folder to a Data Lake Storage Gen2 storage account. The [Storage Blob Data Owner](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#role-based-access-control-azure-rbac) role is required to use the script. In order to use this script, an existing Data Lake Storage Gen2 storage account is required. Run `azd env set AZURE_ADLS_GEN2_STORAGE_ACCOUNT ` prior to running the script. @@ -250,11 +250,11 @@ python /scripts/adlsgen2setup.py './data/*' --data-access-control './scripts/sam The script performs the following steps: -- Creates example [groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](../scripts/sampleacls.json) file. +- Creates example [groups](https://learn.microsoft.com/entra/fundamentals/how-to-manage-groups) listed in the [sampleacls.json](/scripts/sampleacls.json) file. - Creates a filesystem / container `gptkbcontainer` in the storage account. -- Creates the directories listed in the [sampleacls.json](../scripts/sampleacls.json) file. -- Uploads the sample PDFs referenced in the [sampleacls.json](../scripts/sampleacls.json) file into the appropriate directories. -- [Recursively sets Access Control Lists (ACLs)](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-acl-cli) using the information from the [sampleacls.json](../scripts/sampleacls.json) file. +- Creates the directories listed in the [sampleacls.json](/scripts/sampleacls.json) file. +- Uploads the sample PDFs referenced in the [sampleacls.json](/scripts/sampleacls.json) file into the appropriate directories. +- [Recursively sets Access Control Lists (ACLs)](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-acl-cli) using the information from the [sampleacls.json](/scripts/sampleacls.json) file. In order to use the sample access control, you need to join these groups in your Microsoft Entra tenant. @@ -262,7 +262,7 @@ Note that this optional script may not work in Codespaces if your administrator #### Azure Data Lake Storage Gen2 Prep Docs -Once a Data Lake Storage Gen2 storage account has been setup with sample data and access control lists, [prepdocs.py](../app/backend/prepdocs.py) can be used to automatically process PDFs in the storage account and store them with their [access control lists in the search index](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). +Once a Data Lake Storage Gen2 storage account has been setup with sample data and access control lists, [prepdocs.py](/app/backend/prepdocs.py) can be used to automatically process PDFs in the storage account and store them with their [access control lists in the search index](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). To run this script with a Data Lake Storage Gen2 account, first set the following environment variables: From b3787279e53a8ee08bb42c012abc64161f2bbc37 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 26 Sep 2024 13:30:24 -0700 Subject: [PATCH 40/41] Make prepdocs mypy happy --- app/backend/prepdocs.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index 342969b6b6..696cf6397f 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -156,10 +156,9 @@ def setup_file_processors( local_html_parser: bool = False, search_images: bool = False, ): - html_parser: Parser - pdf_parser: Parser - doc_int_parser: DocumentAnalysisParser = None + sentence_text_splitter = SentenceTextSplitter(has_image_embeddings=search_images) + doc_int_parser: Optional[DocumentAnalysisParser] = None # check if Azure Document Intelligence credentials are provided if document_intelligence_service is not None: documentintelligence_creds: Union[AsyncTokenCredential, AzureKeyCredential] = ( @@ -169,25 +168,34 @@ def setup_file_processors( endpoint=f"https://{document_intelligence_service}.cognitiveservices.azure.com/", credential=documentintelligence_creds, ) + + pdf_parser: Optional[Parser] = None if local_pdf_parser or document_intelligence_service is None: pdf_parser = LocalPdfParser() - else: + elif document_intelligence_service is not None: pdf_parser = doc_int_parser + else: + logger.warning("No PDF parser available") + + html_parser: Optional[Parser] = None if local_html_parser or document_intelligence_service is None: html_parser = LocalHTMLParser() - else: + elif document_intelligence_service is not None: html_parser = doc_int_parser - sentence_text_splitter = SentenceTextSplitter(has_image_embeddings=search_images) + else: + logger.warning("No HTML parser available") - # These file formats can always be parsed, thanks to local packages + # These file formats can always be parsed: file_processors = { - ".pdf": FileProcessor(pdf_parser, sentence_text_splitter), - ".html": FileProcessor(html_parser, sentence_text_splitter), ".json": FileProcessor(JsonParser(), SimpleTextSplitter()), ".md": FileProcessor(TextParser(), sentence_text_splitter), ".txt": FileProcessor(TextParser(), sentence_text_splitter), } - + # These require either a Python package or Document Intelligence + if pdf_parser is not None: + file_processors.update({".pdf": FileProcessor(pdf_parser, sentence_text_splitter)}) + if html_parser is not None: + file_processors.update({".html": FileProcessor(html_parser, sentence_text_splitter)}) # These file formats require Document Intelligence if doc_int_parser is not None: file_processors.update( From 697fa01301d300f5f66a655c81e02eb703247b65 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 26 Sep 2024 13:47:04 -0700 Subject: [PATCH 41/41] Fix auth_update if check --- scripts/auth_update.ps1 | 2 +- scripts/auth_update.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/auth_update.ps1 b/scripts/auth_update.ps1 index ab6d620e6d..37f2392acc 100644 --- a/scripts/auth_update.ps1 +++ b/scripts/auth_update.ps1 @@ -1,5 +1,5 @@ $AZURE_USE_AUTHENTICATION = (azd env get-value AZURE_USE_AUTHENTICATION) -if (-not $?) { +if ($AZURE_USE_AUTHENTICATION -ne "true") { Exit 0 } diff --git a/scripts/auth_update.sh b/scripts/auth_update.sh index 02e1e5f703..31635a237d 100755 --- a/scripts/auth_update.sh +++ b/scripts/auth_update.sh @@ -1,7 +1,7 @@ #!/bin/sh AZURE_USE_AUTHENTICATION=$(azd env get-value AZURE_USE_AUTHENTICATION) -if [ $? -ne 0 ]; then +if [ "$AZURE_USE_AUTHENTICATION" != "true" ]; then exit 0 fi