Skip to content

Commit 436a3e6

Browse files
authored
feat(infra): provision AI Search vector pipeline via managed identity (#380)
1 parent 76b2f4a commit 436a3e6

File tree

5 files changed

+204
-31
lines changed

5 files changed

+204
-31
lines changed

.infra/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ When `agcSupportEnabled` is on, shared infrastructure also creates the delegated
164164

165165
The `catalog-products` Azure AI Search index is ensured during `azd` `postprovision`, after the search service is reachable, to avoid nested ARM child-resource timing conflicts during `azd provision`.
166166

167-
The vector indexing pipeline (`product_search_index` + datasource/skillset/indexer) is also ensured during `azd` `postprovision`. The current hook path uses a Cosmos DB connection string from the management plane for the AI Search datasource, so no additional AI Search managed-identity Cosmos RBAC assignment is required for this flow.
167+
The vector indexing pipeline (`product_search_index` + datasource/skillset/indexer) is also ensured during `azd` `postprovision`. The datasource uses a managed-identity Cosmos DB connection string (`IdentityAuthType=AccessToken`), and shared infrastructure provisions both Cosmos control-plane and data-plane reader role assignments for the Azure AI Search managed identity.
168168

169169
**Duration**: ~25 minutes | **Cost**: see [Cost Estimates](#-cost-estimates)
170170

.infra/azd/hooks/ensure-ai-search-vector-pipeline.ps1

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,14 @@ if (-not $cosmosAccountName) {
131131
exit 1
132132
}
133133

134-
$cosmosConnectionString = az cosmosdb keys list --resource-group $ResourceGroup --name $cosmosAccountName --type connection-strings --query 'connectionStrings[0].connectionString' -o tsv
135-
if (-not $cosmosConnectionString) {
136-
Write-Error "Failed to resolve Cosmos DB connection string for account '$cosmosAccountName'."
134+
$cosmosAccountId = az cosmosdb show --resource-group $ResourceGroup --name $cosmosAccountName --query id -o tsv 2>$null
135+
if (-not $cosmosAccountId) {
136+
Write-Error "Failed to resolve Cosmos DB resource ID for account '$cosmosAccountName'."
137137
exit 1
138138
}
139139

140+
$cosmosConnectionString = "ResourceId=$cosmosAccountId;Database=$cosmosDatabase;IdentityAuthType=AccessToken"
141+
140142
$projectEndpoint = Resolve-FromAzdEnv -Keys @('PROJECT_ENDPOINT')
141143
if (-not $projectEndpoint) {
142144
$aiServicesName = Resolve-FromAzdEnv -Keys @('AI_SERVICES_NAME', 'aiServicesName')
@@ -217,21 +219,22 @@ $indexDefinition = @{
217219
@{ name = 'search_keywords'; type = 'Collection(Edm.String)'; searchable = $true }
218220
@{ name = 'enriched_description'; type = 'Edm.String'; searchable = $true }
219221
@{ name = 'description_vector'; type = 'Collection(Edm.Single)'; searchable = $true; retrievable = $true; dimensions = 3072; vectorSearchProfile = 'default-vector-profile' }
222+
@{ name = 'content_vector'; type = 'Collection(Edm.Single)'; searchable = $true; retrievable = $true; dimensions = 3072; vectorSearchProfile = 'default-vector-profile' }
220223
)
221224
vectorSearch = @{
222225
algorithms = @(
223226
@{ name = 'hnsw-algo'; kind = 'hnsw'; hnswParameters = @{ m = 4; efConstruction = 400; efSearch = 500; metric = 'cosine' } }
224227
)
225228
profiles = @(
226-
@{ name = 'default-vector-profile'; algorithmConfigurationName = 'hnsw-algo'; vectorizer = 'text-embedding-vectorizer' }
229+
@{ name = 'default-vector-profile'; algorithm = 'hnsw-algo'; vectorizer = 'text-embedding-vectorizer' }
227230
)
228231
vectorizers = @(
229232
@{ name = 'text-embedding-vectorizer'; kind = 'azureOpenAI'; azureOpenAIParameters = @{ modelName = $EmbeddingDeploymentName; deploymentId = $EmbeddingDeploymentName; resourceUri = $projectEndpoint } }
230233
)
231234
}
232235
semantic = @{
233236
configurations = @(
234-
@{ name = 'default-semantic'; prioritizedFields = @{ titleField = @{ fieldName = 'name' }; contentFields = @(@{ fieldName = 'enriched_description' }, @{ fieldName = 'description' }); keywordsFields = @(@{ fieldName = 'search_keywords' }, @{ fieldName = 'use_cases' }) } }
237+
@{ name = 'default-semantic'; prioritizedFields = @{ titleField = @{ fieldName = 'name' }; prioritizedContentFields = @(@{ fieldName = 'enriched_description' }, @{ fieldName = 'description' }); prioritizedKeywordsFields = @(@{ fieldName = 'search_keywords' }, @{ fieldName = 'use_cases' }) } }
235238
)
236239
}
237240
} | ConvertTo-Json -Depth 20 -Compress
@@ -246,10 +249,6 @@ $indexerDefinition = @{
246249
batchSize = 100
247250
maxFailedItems = 10
248251
maxFailedItemsPerBatch = 5
249-
configuration = @{
250-
parsingMode = 'json'
251-
dataToExtract = 'contentAndMetadata'
252-
}
253252
}
254253
fieldMappings = @(
255254
@{ sourceFieldName = 'entity_id'; targetFieldName = 'entity_id' }
@@ -267,6 +266,7 @@ $indexerDefinition = @{
267266
)
268267
outputFieldMappings = @(
269268
@{ sourceFieldName = '/document/description_vector'; targetFieldName = 'description_vector' }
269+
@{ sourceFieldName = '/document/description_vector'; targetFieldName = 'content_vector' }
270270
)
271271
} | ConvertTo-Json -Depth 16 -Compress
272272

@@ -275,18 +275,19 @@ $headers = @{ 'api-key' = $adminKey; 'Content-Type' = 'application/json' }
275275
function Invoke-SearchPut {
276276
param([string]$Name, [string]$Uri, [string]$Body)
277277
for ($attempt = 1; $attempt -le 12; $attempt++) {
278-
try {
279-
Invoke-RestMethod -Method Put -Uri $Uri -Headers $headers -Body $Body | Out-Null
278+
$response = Invoke-WebRequest -Method Put -Uri $Uri -Headers $headers -Body $Body -SkipHttpErrorCheck
279+
if ($response.StatusCode -ge 200 -and $response.StatusCode -lt 300) {
280280
Write-Host "Azure AI Search resource '$Name' is ready."
281281
return
282282
}
283-
catch {
284-
if ($attempt -eq 12) {
285-
Write-Error "Failed to create or update Azure AI Search resource '$Name': $($_.Exception.Message)"
286-
exit 1
287-
}
288-
Start-Sleep -Seconds 10
283+
284+
if ($attempt -eq 12) {
285+
$detail = if ($response.Content) { " Details: $($response.Content)" } else { '' }
286+
Write-Error "Failed to create or update Azure AI Search resource '$Name' (HTTP $($response.StatusCode)).$detail"
287+
exit 1
289288
}
289+
290+
Start-Sleep -Seconds 10
290291
}
291292
}
292293

.infra/azd/hooks/ensure-ai-search-vector-pipeline.sh

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,14 @@ if [ -z "$COSMOS_ACCOUNT_NAME" ]; then
101101
exit 1
102102
fi
103103

104-
COSMOS_CONNECTION_STRING="$(az cosmosdb keys list --resource-group "$RESOURCE_GROUP" --name "$COSMOS_ACCOUNT_NAME" --type connection-strings --query 'connectionStrings[0].connectionString' -o tsv)"
105-
if [ -z "$COSMOS_CONNECTION_STRING" ]; then
106-
echo "Failed to resolve Cosmos DB connection string for account '${COSMOS_ACCOUNT_NAME}'." >&2
104+
COSMOS_ACCOUNT_ID="$(az cosmosdb show --resource-group "$RESOURCE_GROUP" --name "$COSMOS_ACCOUNT_NAME" --query id -o tsv 2>/dev/null || true)"
105+
if [ -z "$COSMOS_ACCOUNT_ID" ]; then
106+
echo "Failed to resolve Cosmos DB resource ID for account '${COSMOS_ACCOUNT_NAME}'." >&2
107107
exit 1
108108
fi
109109

110+
COSMOS_CONNECTION_STRING="ResourceId=${COSMOS_ACCOUNT_ID};Database=${COSMOS_DATABASE};IdentityAuthType=AccessToken"
111+
110112
PROJECT_ENDPOINT_VALUE="$(resolve_from_azd_env 'PROJECT_ENDPOINT')"
111113
if [ -z "$PROJECT_ENDPOINT_VALUE" ]; then
112114
AI_SERVICES_NAME_VALUE="$(resolve_from_azd_env 'AI_SERVICES_NAME|aiServicesName')"
@@ -135,12 +137,12 @@ EOF
135137
)
136138

137139
INDEX_DEFINITION=$(cat <<EOF
138-
{"name":"${VECTOR_INDEX_NAME}","fields":[{"name":"id","type":"Edm.String","key":true,"filterable":true},{"name":"entity_id","type":"Edm.String","filterable":true},{"name":"sku","type":"Edm.String","filterable":true,"searchable":true},{"name":"name","type":"Edm.String","searchable":true,"analyzer":"en.microsoft"},{"name":"brand","type":"Edm.String","filterable":true,"facetable":true,"searchable":true},{"name":"category","type":"Edm.String","filterable":true,"facetable":true},{"name":"description","type":"Edm.String","searchable":true,"analyzer":"en.microsoft"},{"name":"price","type":"Edm.Double","filterable":true,"sortable":true,"facetable":true},{"name":"use_cases","type":"Collection(Edm.String)","filterable":true,"searchable":true},{"name":"complementary_products","type":"Collection(Edm.String)","filterable":true},{"name":"substitute_products","type":"Collection(Edm.String)","filterable":true},{"name":"search_keywords","type":"Collection(Edm.String)","searchable":true},{"name":"enriched_description","type":"Edm.String","searchable":true},{"name":"description_vector","type":"Collection(Edm.Single)","searchable":true,"retrievable":true,"dimensions":3072,"vectorSearchProfile":"default-vector-profile"}],"vectorSearch":{"algorithms":[{"name":"hnsw-algo","kind":"hnsw","hnswParameters":{"m":4,"efConstruction":400,"efSearch":500,"metric":"cosine"}}],"profiles":[{"name":"default-vector-profile","algorithmConfigurationName":"hnsw-algo","vectorizer":"text-embedding-vectorizer"}],"vectorizers":[{"name":"text-embedding-vectorizer","kind":"azureOpenAI","azureOpenAIParameters":{"modelName":"${EMBEDDING_DEPLOYMENT_NAME}","deploymentId":"${EMBEDDING_DEPLOYMENT_NAME}","resourceUri":"${PROJECT_ENDPOINT_VALUE}"}}]},"semantic":{"configurations":[{"name":"default-semantic","prioritizedFields":{"titleField":{"fieldName":"name"},"contentFields":[{"fieldName":"enriched_description"},{"fieldName":"description"}],"keywordsFields":[{"fieldName":"search_keywords"},{"fieldName":"use_cases"}]}}]}}
140+
{"name":"${VECTOR_INDEX_NAME}","fields":[{"name":"id","type":"Edm.String","key":true,"filterable":true},{"name":"entity_id","type":"Edm.String","filterable":true},{"name":"sku","type":"Edm.String","filterable":true,"searchable":true},{"name":"name","type":"Edm.String","searchable":true,"analyzer":"en.microsoft"},{"name":"brand","type":"Edm.String","filterable":true,"facetable":true,"searchable":true},{"name":"category","type":"Edm.String","filterable":true,"facetable":true},{"name":"description","type":"Edm.String","searchable":true,"analyzer":"en.microsoft"},{"name":"price","type":"Edm.Double","filterable":true,"sortable":true,"facetable":true},{"name":"use_cases","type":"Collection(Edm.String)","filterable":true,"searchable":true},{"name":"complementary_products","type":"Collection(Edm.String)","filterable":true},{"name":"substitute_products","type":"Collection(Edm.String)","filterable":true},{"name":"search_keywords","type":"Collection(Edm.String)","searchable":true},{"name":"enriched_description","type":"Edm.String","searchable":true},{"name":"description_vector","type":"Collection(Edm.Single)","searchable":true,"retrievable":true,"dimensions":3072,"vectorSearchProfile":"default-vector-profile"},{"name":"content_vector","type":"Collection(Edm.Single)","searchable":true,"retrievable":true,"dimensions":3072,"vectorSearchProfile":"default-vector-profile"}],"vectorSearch":{"algorithms":[{"name":"hnsw-algo","kind":"hnsw","hnswParameters":{"m":4,"efConstruction":400,"efSearch":500,"metric":"cosine"}}],"profiles":[{"name":"default-vector-profile","algorithm":"hnsw-algo","vectorizer":"text-embedding-vectorizer"}],"vectorizers":[{"name":"text-embedding-vectorizer","kind":"azureOpenAI","azureOpenAIParameters":{"modelName":"${EMBEDDING_DEPLOYMENT_NAME}","deploymentId":"${EMBEDDING_DEPLOYMENT_NAME}","resourceUri":"${PROJECT_ENDPOINT_VALUE}"}}]},"semantic":{"configurations":[{"name":"default-semantic","prioritizedFields":{"titleField":{"fieldName":"name"},"prioritizedContentFields":[{"fieldName":"enriched_description"},{"fieldName":"description"}],"prioritizedKeywordsFields":[{"fieldName":"search_keywords"},{"fieldName":"use_cases"}]}}]}}
139141
EOF
140142
)
141143

142144
INDEXER_DEFINITION=$(cat <<EOF
143-
{"name":"${INDEXER_NAME}","dataSourceName":"${DATA_SOURCE_NAME}","targetIndexName":"${VECTOR_INDEX_NAME}","skillsetName":"${SKILLSET_NAME}","schedule":{"interval":"PT5M"},"parameters":{"batchSize":100,"maxFailedItems":10,"maxFailedItemsPerBatch":5,"configuration":{"parsingMode":"json","dataToExtract":"contentAndMetadata"}},"fieldMappings":[{"sourceFieldName":"entity_id","targetFieldName":"entity_id"},{"sourceFieldName":"sku","targetFieldName":"sku"},{"sourceFieldName":"name","targetFieldName":"name"},{"sourceFieldName":"brand","targetFieldName":"brand"},{"sourceFieldName":"category","targetFieldName":"category"},{"sourceFieldName":"description","targetFieldName":"description"},{"sourceFieldName":"price","targetFieldName":"price"},{"sourceFieldName":"use_cases","targetFieldName":"use_cases"},{"sourceFieldName":"complementary_products","targetFieldName":"complementary_products"},{"sourceFieldName":"substitute_products","targetFieldName":"substitute_products"},{"sourceFieldName":"search_keywords","targetFieldName":"search_keywords"},{"sourceFieldName":"enriched_description","targetFieldName":"enriched_description"}],"outputFieldMappings":[{"sourceFieldName":"/document/description_vector","targetFieldName":"description_vector"}]}
145+
{"name":"${INDEXER_NAME}","dataSourceName":"${DATA_SOURCE_NAME}","targetIndexName":"${VECTOR_INDEX_NAME}","skillsetName":"${SKILLSET_NAME}","schedule":{"interval":"PT5M"},"parameters":{"batchSize":100,"maxFailedItems":10,"maxFailedItemsPerBatch":5},"fieldMappings":[{"sourceFieldName":"entity_id","targetFieldName":"entity_id"},{"sourceFieldName":"sku","targetFieldName":"sku"},{"sourceFieldName":"name","targetFieldName":"name"},{"sourceFieldName":"brand","targetFieldName":"brand"},{"sourceFieldName":"category","targetFieldName":"category"},{"sourceFieldName":"description","targetFieldName":"description"},{"sourceFieldName":"price","targetFieldName":"price"},{"sourceFieldName":"use_cases","targetFieldName":"use_cases"},{"sourceFieldName":"complementary_products","targetFieldName":"complementary_products"},{"sourceFieldName":"substitute_products","targetFieldName":"substitute_products"},{"sourceFieldName":"search_keywords","targetFieldName":"search_keywords"},{"sourceFieldName":"enriched_description","targetFieldName":"enriched_description"}],"outputFieldMappings":[{"sourceFieldName":"/document/description_vector","targetFieldName":"description_vector"},{"sourceFieldName":"/document/description_vector","targetFieldName":"content_vector"}]}
144146
EOF
145147
)
146148

@@ -151,16 +153,23 @@ put_with_retry() {
151153

152154
attempt=1
153155
while [ "$attempt" -le 12 ]; do
154-
if curl -fsS -X PUT -H "api-key: ${ADMIN_KEY}" -H 'Content-Type: application/json' --data "$RESOURCE_BODY" "$RESOURCE_URI" >/dev/null; then
156+
RESPONSE_FILE="$(mktemp)"
157+
HTTP_CODE="$(curl -sS -o "$RESPONSE_FILE" -w '%{http_code}' -X PUT -H "api-key: ${ADMIN_KEY}" -H 'Content-Type: application/json' --data "$RESOURCE_BODY" "$RESOURCE_URI" || true)"
158+
if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then
159+
rm -f "$RESPONSE_FILE"
155160
echo "Azure AI Search resource '${RESOURCE_NAME}' is ready."
156161
return 0
157162
fi
158163

159164
if [ "$attempt" -eq 12 ]; then
160-
echo "Failed to create or update Azure AI Search resource '${RESOURCE_NAME}'." >&2
165+
echo "Failed to create or update Azure AI Search resource '${RESOURCE_NAME}' (HTTP ${HTTP_CODE})." >&2
166+
cat "$RESPONSE_FILE" >&2 || true
167+
rm -f "$RESPONSE_FILE"
161168
exit 1
162169
fi
163170

171+
rm -f "$RESPONSE_FILE"
172+
164173
attempt=$((attempt + 1))
165174
sleep 10
166175
done

.infra/modules/shared-infrastructure/shared-infrastructure.bicep

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,36 @@ resource aksSearchIndexDataContributorRole 'Microsoft.Authorization/roleAssignme
11001100
}
11011101
}
11021102

1103+
// Azure AI Search managed identity -> Cosmos DB (control plane)
1104+
resource aiSearchCosmosAccountReaderRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
1105+
name: guid(resourceGroup().id, aiSearchName, cosmosAccountName, 'CosmosAccountReader')
1106+
scope: cosmosAccount
1107+
properties: {
1108+
roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'fbdf93bf-df7d-467e-a4d2-9458aa1360c8') // Cosmos DB Account Reader Role
1109+
principalId: aiSearchFromFoundry.identity.principalId
1110+
principalType: 'ServicePrincipal'
1111+
}
1112+
dependsOn: [
1113+
aiFoundry
1114+
cosmos
1115+
]
1116+
}
1117+
1118+
// Azure AI Search managed identity -> Cosmos DB (data plane)
1119+
resource aiSearchCosmosDataReaderRole 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-11-15' = {
1120+
parent: cosmosAccount
1121+
name: guid(resourceGroup().id, aiSearchName, cosmosAccountName, 'CosmosDataReader')
1122+
properties: {
1123+
roleDefinitionId: '${cosmosAccount.id}/sqlRoleDefinitions/00000000-0000-0000-0000-000000000001' // Built-in Data Reader
1124+
principalId: aiSearchFromFoundry.identity.principalId
1125+
scope: cosmosAccount.id
1126+
}
1127+
dependsOn: [
1128+
aiFoundry
1129+
cosmos
1130+
]
1131+
}
1132+
11031133
// Outputs
11041134
output aksClusterName string = aks.outputs.name
11051135
output acrLoginServer string = acr.outputs.loginServer

0 commit comments

Comments
 (0)