Skip to content

Commit 843a382

Browse files
authored
Support azd for env creation (#6)
azd
1 parent 4dc1e80 commit 843a382

13 files changed

+282
-70
lines changed

.devcontainer/devcontainer.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
{
22
"name": "Azure AI Content Understanding Demo",
33
"image": "mcr.microsoft.com/devcontainers/python:3.11",
4+
"features": {
5+
"ghcr.io/azure/azure-dev/azd:latest": {}
6+
},
47
"customizations": {
58
"vscode": {
69
"extensions": [
@@ -9,7 +12,11 @@
912
"ms-python.python",
1013
"ms-toolsai.jupyter",
1114
"esbenp.prettier-vscode"
12-
]
15+
],
16+
"settings": {
17+
"python.defaultInterpreterPath": "/usr/local/bin/python"
18+
}
1319
}
14-
}
20+
},
21+
"postCreateCommand": "sudo apt-get install -y git-lfs && git-lfs pull && pip install -r requirements.txt"
1522
}

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,4 +162,5 @@ cython_debug/
162162
#.idea/
163163

164164
# VSCode
165-
.vscode
165+
.vscode
166+
.azure

README.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ Azure AI Content Understanding is a new Generative AI-based [Azure AI service](h
1616
| [content_extraction.ipynb](notebooks/content_extraction.ipynb) | Extract structured content from your input files |
1717
| [analyzer_training.ipynb](notebooks/analyzer_training.ipynb) | Provide training data to improve the quality of your analyzer |
1818

19-
## Prerequisites
20-
21-
To use Content Understanding, you need an [Azure AI Services resource](docs/create_azure_ai_service.md).
2219

2320
## Getting started with GitHub Codespaces
2421

@@ -27,9 +24,22 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
2724
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=899687170&ref=main&geo=UsEast&devcontainer_path=.devcontainer%2Fdevcontainer.json)
2825

2926
### Configure Azure AI service resource
30-
27+
### (Option 1) Use `azd` commands to auto create temporal resources to run sample
28+
1. Login Azure
29+
```shell
30+
azd auth login
31+
```
32+
1. Setting up environment, following prompts to choose location
33+
```shell
34+
azd up
35+
```
36+
37+
38+
### (Option 2) Manually create resources and set environment variables
39+
1. Create [Azure AI Services resource](docs/create_azure_ai_service.md)
40+
1. Go to `Access Control (IAM)` in resource, grant yourself role `Cognitive Services User`
3141
1. Copy `notebooks/.env.sample` to `notebooks/.env`
32-
2. Fill **AZURE_AI_ENDPOINT** and **AZURE_AI_API_KEY** with the endpoint and key values from your Azure portal Azure AI Services instance.
42+
1. Fill **AZURE_AI_ENDPOINT** with the endpoint from your Azure portal Azure AI Services instance.
3343

3444
### Open a Jupyter notebook and follow the step-by-step guidance
3545

azure.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
2+
3+
name: azure-ai-content-understanding-python
4+
metadata:
5+
6+
hooks:
7+
postprovision:
8+
windows:
9+
shell: pwsh
10+
run: ./infra/write_dot_env.ps1
11+
interactive: false
12+
continueOnError: false
13+
posix:
14+
shell: sh
15+
run: sh infra/write_dot_env.sh
16+
interactive: false
17+
continueOnError: false

infra/main.bicep

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
targetScope = 'subscription'
2+
3+
@minLength(1)
4+
@maxLength(64)
5+
@description('Name of the the environment which is used to generate a short unique hash used in all resources.')
6+
param environmentName string
7+
8+
@minLength(1)
9+
@description('Location for the AI resource')
10+
@allowed([
11+
'eastus'
12+
'eastus2'
13+
'northcentralus'
14+
'southcentralus'
15+
'spaincentral'
16+
'swedencentral'
17+
'westus'
18+
'westus3'
19+
])
20+
@metadata({
21+
azd: {
22+
type: 'location'
23+
}
24+
})
25+
param location string
26+
27+
@description('Id of the user or app to assign application roles')
28+
param principalId string = ''
29+
30+
@description('Non-empty if the deployment is running on GitHub Actions')
31+
param runningOnGitHub string = ''
32+
33+
var principalType = empty(runningOnGitHub) ? 'User' : 'ServicePrincipal'
34+
35+
var uniqueId = toLower(uniqueString(subscription().id, environmentName, location))
36+
var resourcePrefix = '${environmentName}${uniqueId}'
37+
var tags = {
38+
'azd-env-name': environmentName
39+
owner: 'azure-ai-sample'
40+
}
41+
42+
// Organize resources in a resource group
43+
resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
44+
name: '${resourcePrefix}-rg'
45+
location: location
46+
tags: tags
47+
}
48+
49+
var aiServiceName = '${resourcePrefix}-aiservice'
50+
module aiService 'br/public:avm/res/cognitive-services/account:0.8.1' = {
51+
name: 'aiService'
52+
scope: resourceGroup
53+
params: {
54+
name: aiServiceName
55+
location: location
56+
tags: tags
57+
kind: 'AIServices'
58+
sku: 'S0'
59+
customSubDomainName: aiServiceName
60+
networkAcls: {
61+
defaultAction: 'Allow'
62+
bypass: 'AzureServices'
63+
}
64+
roleAssignments: [
65+
{
66+
principalId: principalId
67+
roleDefinitionIdOrName: 'Cognitive Services User'
68+
principalType: principalType
69+
}
70+
]
71+
}
72+
}
73+
74+
output AZURE_LOCATION string = location
75+
output AZURE_TENANT_ID string = tenant().tenantId
76+
output AZURE_RESOURCE_GROUP string = resourceGroup.name
77+
78+
output AZURE_AI_ENDPOINT string = aiService.outputs.endpoint

infra/main.parameters.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
3+
"contentVersion": "1.0.0.0",
4+
"parameters": {
5+
"environmentName": {
6+
"value": "${AZURE_ENV_NAME}"
7+
},
8+
"location": {
9+
"value": "${AZURE_LOCATION}"
10+
},
11+
"principalId": {
12+
"value": "${AZURE_PRINCIPAL_ID}"
13+
},
14+
"runningOnGitHub": {
15+
"value": "${GITHUB_ACTIONS}"
16+
}
17+
}
18+
}

infra/write_dot_env.ps1

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Clear the contents of the .env file
2+
Set-Content -Path notebooks/.env -Value ""
3+
4+
# Append new values to the .env file
5+
$azureAiEndpoint = azd env get-value AZURE_AI_ENDPOINT
6+
7+
Add-Content -Path notebooks/.env -Value "AZURE_AI_ENDPOINT=$azureAiEndpoint"

infra/write_dot_env.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
# Clear the contents of the .env file
4+
> notebooks/.env
5+
6+
# Append new values to the .env file
7+
echo "AZURE_AI_ENDPOINT=$(azd env get-value AZURE_AI_ENDPOINT)" >> notebooks/.env

notebooks/.env.sample

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
1-
AZURE_AI_ENDPOINT=
2-
AZURE_AI_API_KEY=
1+
AZURE_AI_ENDPOINT=

notebooks/analyzer_training.ipynb

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
},
6161
{
6262
"cell_type": "code",
63-
"execution_count": 3,
63+
"execution_count": null,
6464
"metadata": {},
6565
"outputs": [],
6666
"source": [
@@ -81,8 +81,6 @@
8181
"client = AzureContentUnderstandingClient(\n",
8282
" endpoint=os.getenv(\"AZURE_AI_ENDPOINT\"),\n",
8383
" api_version=os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\"),\n",
84-
" subscription_key=os.getenv(\"AZURE_AI_API_KEY\"),\n",
85-
" api_token=os.getenv(\"AZURE_AI_API_TOKEN\"),\n",
8684
" x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_training\",\n",
8785
")"
8886
]
@@ -99,16 +97,80 @@
9997
},
10098
{
10199
"cell_type": "code",
102-
"execution_count": null,
100+
"execution_count": 4,
103101
"metadata": {},
104-
"outputs": [],
102+
"outputs": [
103+
{
104+
"name": "stderr",
105+
"output_type": "stream",
106+
"text": [
107+
"INFO:python.content_understanding_client:Analyzer train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59 create request accepted.\n",
108+
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
109+
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
110+
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
111+
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
112+
"INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
113+
"INFO:python.content_understanding_client:Request result is ready after 13.72 seconds.\n",
114+
"INFO:root:Here is the analyzer detail for train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\n",
115+
"INFO:root:{\n",
116+
" \"id\": \"7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e\",\n",
117+
" \"status\": \"Succeeded\",\n",
118+
" \"result\": {\n",
119+
" \"analyzerId\": \"train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\",\n",
120+
" \"description\": \"Extract useful information from purchase order\",\n",
121+
" \"createdAt\": \"2024-12-09T23:58:59Z\",\n",
122+
" \"lastModifiedAt\": \"2024-12-09T23:59:14Z\",\n",
123+
" \"config\": {\n",
124+
" \"returnDetails\": false,\n",
125+
" \"enableOcr\": true,\n",
126+
" \"enableLayout\": true,\n",
127+
" \"enableBarcode\": false,\n",
128+
" \"enableFormula\": false\n",
129+
" },\n",
130+
" \"fieldSchema\": {\n",
131+
" \"fields\": {\n",
132+
" \"PurchaseOrderNumber\": {\n",
133+
" \"type\": \"string\",\n",
134+
" \"method\": \"extract\",\n",
135+
" \"description\": \"\"\n",
136+
" },\n",
137+
" \"PurchaseDate\": {\n",
138+
" \"type\": \"date\",\n",
139+
" \"method\": \"extract\",\n",
140+
" \"description\": \"\"\n",
141+
" },\n",
142+
" \"TotalPayment\": {\n",
143+
" \"type\": \"number\",\n",
144+
" \"method\": \"extract\",\n",
145+
" \"description\": \"\"\n",
146+
" },\n",
147+
" \"ShippedToAddress\": {\n",
148+
" \"type\": \"string\",\n",
149+
" \"method\": \"extract\",\n",
150+
" \"description\": \"\"\n",
151+
" }\n",
152+
" }\n",
153+
" },\n",
154+
" \"trainingData\": {\n",
155+
" \"containerUrl\": \"https://chethodevusw2.blob.core.windows.net/test?sv=2023-01-03&st=2024-12-09T19%3A27%3A04Z&se=2024-12-10T19%3A27%3A04Z&skoid=4de42c4d-0fe6-4b96-93a4-161b1303ea3b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-09T19%3A27%3A04Z&ske=2024-12-10T19%3A27%3A04Z&sks=b&skv=2023-01-03&sr=c&sp=rl&sig=sbUVLXK2JfJdUG7Fxyakg1e9lR%2B%2B6oMISoZqfudtvgw%3D\",\n",
156+
" \"kind\": \"blob\",\n",
157+
" \"prefix\": \"train\"\n",
158+
" },\n",
159+
" \"warnings\": [],\n",
160+
" \"status\": \"ready\",\n",
161+
" \"scenario\": \"document\"\n",
162+
" }\n",
163+
"}\n"
164+
]
165+
}
166+
],
105167
"source": [
106168
"import uuid\n",
107169
"ANALYZER_ID = \"train-sample-\" + str(uuid.uuid4())\n",
108170
"\n",
109171
"response = client.begin_create_analyzer(\n",
110172
" ANALYZER_ID,\n",
111-
" analyzer_schema_path=analyzer_template,\n",
173+
" analyzer_template_path=analyzer_template,\n",
112174
" training_storage_container_sas_url=os.getenv(\"TRAINING_DATA_SAS_URL\"),\n",
113175
" training_storage_container_path_prefix=os.getenv(\"TRAINING_DATA_PATH\"),\n",
114176
")\n",
@@ -139,19 +201,17 @@
139201
"name": "stderr",
140202
"output_type": "stream",
141203
"text": [
142-
"INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: train-sample-3292ff56-bc75-4bf0-8a09-8aa866d8553f\n",
143-
"INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
144-
"INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
145-
"INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
146-
"INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
147-
"INFO:python.content_understanding_client:Request result is ready after 11.27 seconds.\n",
204+
"INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\n",
205+
"INFO:python.content_understanding_client:Request dced30f5-bb4d-473b-8b7a-13a7e29ed3ac in progress ...\n",
206+
"INFO:python.content_understanding_client:Request dced30f5-bb4d-473b-8b7a-13a7e29ed3ac in progress ...\n",
207+
"INFO:python.content_understanding_client:Request result is ready after 5.52 seconds.\n",
148208
"INFO:root:{\n",
149-
" \"id\": \"9ed825c9-551e-45e2-8ec0-1ae555bcd56f\",\n",
209+
" \"id\": \"dced30f5-bb4d-473b-8b7a-13a7e29ed3ac\",\n",
150210
" \"status\": \"Succeeded\",\n",
151211
" \"result\": {\n",
152-
" \"analyzerId\": \"train-sample-3292ff56-bc75-4bf0-8a09-8aa866d8553f\",\n",
212+
" \"analyzerId\": \"train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\",\n",
153213
" \"apiVersion\": \"2024-12-01-preview\",\n",
154-
" \"createdAt\": \"2024-12-09T19:42:58Z\",\n",
214+
" \"createdAt\": \"2024-12-09T23:59:16Z\",\n",
155215
" \"warnings\": [],\n",
156216
" \"contents\": [\n",
157217
" {\n",
@@ -253,7 +313,7 @@
253313
"name": "stderr",
254314
"output_type": "stream",
255315
"text": [
256-
"INFO:python.content_understanding_client:Analyzer train-sample-3292ff56-bc75-4bf0-8a09-8aa866d8553f deleted.\n"
316+
"INFO:python.content_understanding_client:Analyzer train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59 deleted.\n"
257317
]
258318
},
259319
{

0 commit comments

Comments
 (0)