Skip to content

Commit dfd6d81

Browse files
Merge pull request #442 from microsoft/dev
fix: merging dev changes to main
2 parents d9944e0 + e4276f6 commit dfd6d81

File tree

8 files changed

+345
-139
lines changed

8 files changed

+345
-139
lines changed

infra/deploy_ai_foundry.bicep

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ var aiModelDeployments = [
3535
name: deploymentType
3636
capacity: gptDeploymentCapacity
3737
}
38+
version: '2024-05-13'
3839
raiPolicyName: 'Microsoft.Default'
3940
}
4041
{
@@ -44,6 +45,7 @@ var aiModelDeployments = [
4445
name: 'Standard'
4546
capacity: embeddingDeploymentCapacity
4647
}
48+
version: '2'
4749
raiPolicyName: 'Microsoft.Default'
4850
}
4951
]
@@ -159,8 +161,10 @@ resource aiServicesDeployments 'Microsoft.CognitiveServices/accounts/deployments
159161
model: {
160162
format: 'OpenAI'
161163
name: aiModeldeployment.model
164+
version: aiModeldeployment.version
162165
}
163166
raiPolicyName: aiModeldeployment.raiPolicyName
167+
versionUpgradeOption: 'OnceCurrentVersionExpired'
164168
}
165169
sku:{
166170
name: aiModeldeployment.sku.name
@@ -481,4 +485,5 @@ output aiProjectName string = aiHubProject.name
481485
output applicationInsightsId string = applicationInsights.id
482486
output logAnalyticsWorkspaceResourceName string = logAnalytics.name
483487
output storageAccountName string = storageNameCleaned
488+
output applicationInsightsConnectionString string = applicationInsights.properties.ConnectionString
484489

infra/deploy_app_service.bicep

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ param AZURE_COSMOSDB_ENABLE_FEEDBACK string = 'True'
104104

105105
param imageTag string
106106
param applicationInsightsId string
107+
108+
@description('The Application Insights connection string')
109+
@secure()
110+
param appInsightsConnectionString string
107111
// var imageName = 'DOCKER|byoaiacontainer.azurecr.io/byoaia-app:latest'
108112

109113
// var imageName = 'DOCKER|ncwaappcontainerreg1.azurecr.io/ncqaappimage:v1.0.0'
@@ -144,6 +148,10 @@ resource Website 'Microsoft.Web/sites@2020-06-01' = {
144148
name: 'APPINSIGHTS_INSTRUMENTATIONKEY'
145149
value: reference(applicationInsightsId, '2015-05-01').InstrumentationKey
146150
}
151+
{
152+
name: 'APPLICATIONINSIGHTS_CONNECTION_STRING'
153+
value: appInsightsConnectionString
154+
}
147155
{
148156
name: 'AZURE_SEARCH_SERVICE'
149157
value: aiSearchService
@@ -331,4 +339,3 @@ resource role 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2022-05-
331339
}
332340

333341
output webAppUrl string = 'https://${WebsiteName}.azurewebsites.net'
334-

infra/main.bicep

Lines changed: 2 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ module appserviceModule 'deploy_app_service.bicep' = {
385385
// AZURE_COSMOSDB_ACCOUNT_KEY: keyVault.getSecret('AZURE-COSMOSDB-ACCOUNT-KEY')
386386
AZURE_COSMOSDB_CONVERSATIONS_CONTAINER: cosmosDBModule.outputs.cosmosContainerName
387387
AZURE_COSMOSDB_DATABASE: cosmosDBModule.outputs.cosmosDatabaseName
388+
appInsightsConnectionString: aifoundry.outputs.applicationInsightsConnectionString
388389
AZURE_COSMOSDB_ENABLE_FEEDBACK:'True'
389390
HostingPlanName:'${abbrs.compute.appServicePlan}${solutionPrefix}'
390391
WebsiteName:'${abbrs.compute.webApp}${solutionPrefix}'
@@ -419,6 +420,7 @@ resource ApplicationInsights 'Microsoft.Insights/components@2020-02-02' = {
419420
kind: 'web'
420421
}
421422

423+
422424
// ========== Cosmos DB module ========== //
423425
module cosmosDBModule 'deploy_cosmos_db.bicep' = {
424426
name: 'deploy_cosmos_db'
@@ -438,125 +440,3 @@ output STORAGE_CONTAINER_NAME string = storageAccount.outputs.storageContainer
438440
output KEY_VAULT_NAME string = kvault.outputs.keyvaultName
439441
output COSMOSDB_ACCOUNT_NAME string = cosmosDBModule.outputs.cosmosAccountName
440442
output RESOURCE_GROUP_NAME string = resourceGroup().name
441-
442-
443-
// //========== Deployment script to upload sample data ========== //
444-
// module uploadFiles 'deploy_upload_files_script.bicep' = {
445-
// name : 'deploy_upload_files_script'
446-
// params:{
447-
// solutionLocation: secondaryLocation
448-
// baseUrl: baseUrl
449-
// storageAccountName: storageAccount.outputs.storageName
450-
// containerName: storageAccount.outputs.storageContainer
451-
// managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id
452-
// }
453-
454-
// // dependsOn:[storageAccount,keyVault]
455-
// }
456-
457-
// //========== Deployment script to process and index data ========== //
458-
// module createIndex 'deploy_index_scripts.bicep' = {
459-
// name : 'deploy_index_scripts'
460-
// params:{
461-
// solutionLocation: secondaryLocation
462-
// identity:managedIdentityModule.outputs.managedIdentityOutput.id
463-
// baseUrl:baseUrl
464-
// keyVaultName:aifoundry.outputs.keyvaultName
465-
// }
466-
// dependsOn:[keyVault,uploadFiles]
467-
// }
468-
469-
// //========== Deployment script to upload sample data ========== //
470-
// module uploadFiles 'deploy_post_deployment_scripts.bicep' = {
471-
// name : 'deploy_post_deployment_scripts'
472-
// params:{
473-
// solutionName: solutionPrefix
474-
// solutionLocation: secondaryLocation
475-
// baseUrl: baseUrl
476-
// storageAccountName: storageAccount.outputs.storageName
477-
// containerName: storageAccount.outputs.storageContainer
478-
// managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id
479-
// managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId
480-
// keyVaultName:aifoundry.outputs.keyvaultName
481-
// logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName
482-
// }
483-
// }
484-
485-
486-
// resource CosmosDB 'Microsoft.DocumentDB/databaseAccounts@2023-04-15' = {
487-
// name: CosmosDBName
488-
// location: CosmosDBRegion
489-
// kind: 'GlobalDocumentDB'
490-
// properties: {
491-
// consistencyPolicy: {
492-
// defaultConsistencyLevel: 'Session'
493-
// }
494-
// locations: [
495-
// {
496-
// locationName: CosmosDBRegion
497-
// failoverPriority: 0
498-
// isZoneRedundant: false
499-
// }
500-
// ]
501-
// databaseAccountOfferType: 'Standard'
502-
// capabilities: [
503-
// {
504-
// name: 'EnableServerless'
505-
// }
506-
// ]
507-
// }
508-
// }
509-
510-
// resource CosmosDBName_cosmosdb_database_name 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2023-04-15' = {
511-
// parent: CosmosDB
512-
// name: '${cosmosdb_database_name}'
513-
// properties: {
514-
// resource: {
515-
// id: cosmosdb_database_name
516-
// }
517-
// }
518-
// }
519-
520-
// resource CosmosDBName_cosmosdb_database_name_conversations 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2023-04-15' = {
521-
// parent: CosmosDBName_cosmosdb_database_name
522-
// name: 'conversations'
523-
// properties: {
524-
// resource: {
525-
// id: 'conversations'
526-
// indexingPolicy: {
527-
// indexingMode: 'consistent'
528-
// automatic: true
529-
// includedPaths: [
530-
// {
531-
// path: '/*'
532-
// }
533-
// ]
534-
// excludedPaths: [
535-
// {
536-
// path: '/"_etag"/?'
537-
// }
538-
// ]
539-
// }
540-
// partitionKey: {
541-
// paths: [
542-
// '/userId'
543-
// ]
544-
// kind: 'Hash'
545-
// }
546-
// }
547-
// }
548-
// }
549-
550-
// resource CosmosDBName_roleAssignmentId 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2021-04-15' = {
551-
// parent: CosmosDB
552-
// name: '${roleAssignmentId}'
553-
// properties: {
554-
// roleDefinitionId: resourceId(
555-
// 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions',
556-
// split('${CosmosDBName}/${roleDefinitionId}', '/')[0],
557-
// split('${CosmosDBName}/${roleDefinitionId}', '/')[1]
558-
// )
559-
// // principalId: reference(Website.id, '2021-02-01', 'Full').identity.principalId
560-
// scope: CosmosDB.id
561-
// }
562-
// }

infra/scripts/index_scripts/02_process_data.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from azure.storage.filedatalake import DataLakeServiceClient
1111
from azure.search.documents.indexes import SearchIndexClient
1212

13+
1314
key_vault_name = 'kv_to-be-replaced'
1415
managed_identity_client_id = 'mici_to-be-replaced'
1516
file_system_client_name = "data"
@@ -58,7 +59,7 @@ def clean_spaces_with_regex(text):
5859

5960

6061
def chunk_data(text):
61-
tokens_per_chunk = 1024 # 500
62+
tokens_per_chunk = 256 # 1024 # 500
6263
text = clean_spaces_with_regex(text)
6364

6465
sentences = text.split('. ') # Split text into sentences
@@ -115,6 +116,7 @@ def chunk_data(text):
115116

116117
def prepare_search_doc(content, document_id):
117118
chunks = chunk_data(content)
119+
results = []
118120
chunk_num = 0
119121
for chunk in chunks:
120122
chunk_num += 1
@@ -138,7 +140,8 @@ def prepare_search_doc(content, document_id):
138140
"sourceurl": path.name.split('/')[-1],
139141
"contentVector": v_contentVector
140142
}
141-
return result
143+
results.append(result)
144+
return results
142145

143146

144147
# conversationIds = []
@@ -163,13 +166,14 @@ def prepare_search_doc(content, document_id):
163166
page = pdf_reader.pages[page_num]
164167
text += page.extract_text()
165168
result = prepare_search_doc(text, document_id)
166-
docs.append(result)
169+
docs.extend(result)
167170

168171
counter += 1
169172
if docs != [] and counter % 10 == 0:
170173
result = search_client.upload_documents(documents=docs)
171174
docs = []
172-
print(f' {str(counter)} uploaded')
173175

174176
if docs != []:
175177
results = search_client.upload_documents(documents=docs)
178+
179+
print(f'{str(counter)} files processed.')

0 commit comments

Comments
 (0)