Support azd for env creation (#6)

timctho · web-flow · commit 843a382cca52 · 2024-12-09T17:24:35.000-08:00
azd
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,6 +1,9 @@
 {
     "name": "Azure AI Content Understanding Demo",
     "image": "mcr.microsoft.com/devcontainers/python:3.11",
+    "features": {
+        "ghcr.io/azure/azure-dev/azd:latest": {}
+    },
     "customizations": {
         "vscode": {
             "extensions": [
@@ -9,7 +12,11 @@
                 "ms-python.python",
                 "ms-toolsai.jupyter",
                 "esbenp.prettier-vscode"
-            ]
+            ],
+            "settings": {
+                "python.defaultInterpreterPath": "/usr/local/bin/python"
+            }
         }
-    }
+    },
+    "postCreateCommand": "sudo apt-get install -y git-lfs && git-lfs pull && pip install -r requirements.txt"
 }
diff --git a/.gitignore b/.gitignore
@@ -162,4 +162,5 @@ cython_debug/
 #.idea/
 
 # VSCode
-.vscode
+.vscode
+.azure
diff --git a/README.md b/README.md
@@ -16,9 +16,6 @@ Azure AI Content Understanding is a new Generative AI-based [Azure AI service](h
 | [content_extraction.ipynb](notebooks/content_extraction.ipynb) | Extract structured content from your input files |
 | [analyzer_training.ipynb](notebooks/analyzer_training.ipynb) | Provide training data to improve the quality of your analyzer |
 
-## Prerequisites
-
-To use Content Understanding, you need an [Azure AI Services resource](docs/create_azure_ai_service.md).
 
 ## Getting started with GitHub Codespaces
 
@@ -27,9 +24,22 @@ You can run this repo virtually by using GitHub Codespaces, which will open a we
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new?skip_quickstart=true&machine=basicLinux32gb&repo=899687170&ref=main&geo=UsEast&devcontainer_path=.devcontainer%2Fdevcontainer.json)
 
 ### Configure Azure AI service resource
-
+### (Option 1) Use `azd` commands to auto create temporal resources to run sample
+1. Login Azure
+    ```shell
+    azd auth login
+    ```
+1. Setting up environment, following prompts to choose location
+    ```shell
+    azd up
+    ```
+
+
+### (Option 2) Manually create resources and set environment variables
+1. Create [Azure AI Services resource](docs/create_azure_ai_service.md)
+1. Go to `Access Control (IAM)` in resource, grant yourself role `Cognitive Services User`
 1. Copy `notebooks/.env.sample` to `notebooks/.env`
-2. Fill **AZURE_AI_ENDPOINT** and **AZURE_AI_API_KEY** with the endpoint and key values from your Azure portal Azure AI Services instance.
+1. Fill **AZURE_AI_ENDPOINT** with the endpoint from your Azure portal Azure AI Services instance.
 
 ### Open a Jupyter notebook and follow the step-by-step guidance
 
diff --git a/azure.yaml b/azure.yaml
@@ -0,0 +1,17 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
+
+name: azure-ai-content-understanding-python
+metadata:
+  template: azure-ai-content-understanding-python@0.0.1
+hooks:
+  postprovision:
+    windows:
+      shell: pwsh
+      run: ./infra/write_dot_env.ps1
+      interactive: false
+      continueOnError: false
+    posix:
+      shell: sh
+      run: sh infra/write_dot_env.sh
+      interactive: false
+      continueOnError: false
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -0,0 +1,78 @@
+targetScope = 'subscription'
+
+@minLength(1)
+@maxLength(64)
+@description('Name of the the environment which is used to generate a short unique hash used in all resources.')
+param environmentName string
+
+@minLength(1)
+@description('Location for the AI resource')
+@allowed([
+  'eastus'
+  'eastus2'
+  'northcentralus'
+  'southcentralus'
+  'spaincentral'
+  'swedencentral'
+  'westus'
+  'westus3'
+])
+@metadata({
+  azd: {
+    type: 'location'
+  }
+})
+param location string
+
+@description('Id of the user or app to assign application roles')
+param principalId string = ''
+
+@description('Non-empty if the deployment is running on GitHub Actions')
+param runningOnGitHub string = ''
+
+var principalType = empty(runningOnGitHub) ? 'User' : 'ServicePrincipal'
+
+var uniqueId = toLower(uniqueString(subscription().id, environmentName, location))
+var resourcePrefix = '${environmentName}${uniqueId}'
+var tags = { 
+    'azd-env-name': environmentName
+    owner: 'azure-ai-sample'
+}
+
+// Organize resources in a resource group
+resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
+    name: '${resourcePrefix}-rg'
+    location: location
+    tags: tags
+}
+
+var aiServiceName = '${resourcePrefix}-aiservice'
+module aiService 'br/public:avm/res/cognitive-services/account:0.8.1' = {
+  name: 'aiService'
+  scope: resourceGroup
+  params: {
+    name: aiServiceName
+    location: location
+    tags: tags
+    kind: 'AIServices'
+    sku: 'S0'
+    customSubDomainName: aiServiceName
+    networkAcls: {
+      defaultAction: 'Allow'
+      bypass: 'AzureServices'
+    }
+    roleAssignments: [
+        {
+          principalId: principalId
+          roleDefinitionIdOrName: 'Cognitive Services User'
+          principalType: principalType
+        }
+      ]
+  }
+}
+
+output AZURE_LOCATION string = location
+output AZURE_TENANT_ID string = tenant().tenantId
+output AZURE_RESOURCE_GROUP string = resourceGroup.name
+
+output AZURE_AI_ENDPOINT string = aiService.outputs.endpoint
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
@@ -0,0 +1,18 @@
+{
+    "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
+    "contentVersion": "1.0.0.0",
+    "parameters": {
+      "environmentName": {
+        "value": "${AZURE_ENV_NAME}"
+      },
+      "location": {
+        "value": "${AZURE_LOCATION}"
+      },
+      "principalId": {
+        "value": "${AZURE_PRINCIPAL_ID}"
+      },
+      "runningOnGitHub": {
+        "value": "${GITHUB_ACTIONS}"
+      }
+    }
+  }
diff --git a/infra/write_dot_env.ps1 b/infra/write_dot_env.ps1
@@ -0,0 +1,7 @@
+# Clear the contents of the .env file
+Set-Content -Path notebooks/.env -Value ""
+
+# Append new values to the .env file
+$azureAiEndpoint = azd env get-value AZURE_AI_ENDPOINT
+
+Add-Content -Path notebooks/.env -Value "AZURE_AI_ENDPOINT=$azureAiEndpoint"
diff --git a/infra/write_dot_env.sh b/infra/write_dot_env.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Clear the contents of the .env file
+> notebooks/.env
+
+# Append new values to the .env file
+echo "AZURE_AI_ENDPOINT=$(azd env get-value AZURE_AI_ENDPOINT)" >> notebooks/.env
diff --git a/notebooks/.env.sample b/notebooks/.env.sample
@@ -1,2 +1 @@
-AZURE_AI_ENDPOINT=
-AZURE_AI_API_KEY=
+AZURE_AI_ENDPOINT=
diff --git a/notebooks/analyzer_training.ipynb b/notebooks/analyzer_training.ipynb
@@ -60,7 +60,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,8 +81,6 @@
     "client = AzureContentUnderstandingClient(\n",
     "    endpoint=os.getenv(\"AZURE_AI_ENDPOINT\"),\n",
     "    api_version=os.getenv(\"AZURE_AI_API_VERSION\", \"2024-12-01-preview\"),\n",
-    "    subscription_key=os.getenv(\"AZURE_AI_API_KEY\"),\n",
-    "    api_token=os.getenv(\"AZURE_AI_API_TOKEN\"),\n",
     "    x_ms_useragent=\"azure-ai-content-understanding-python/analyzer_training\",\n",
     ")"
    ]
@@ -99,16 +97,80 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:python.content_understanding_client:Analyzer train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59 create request accepted.\n",
+      "INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
+      "INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
+      "INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
+      "INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
+      "INFO:python.content_understanding_client:Request 7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e in progress ...\n",
+      "INFO:python.content_understanding_client:Request result is ready after 13.72 seconds.\n",
+      "INFO:root:Here is the analyzer detail for train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\n",
+      "INFO:root:{\n",
+      "  \"id\": \"7a0f7689-2b41-4a5e-96bc-c7ef8cb72c5e\",\n",
+      "  \"status\": \"Succeeded\",\n",
+      "  \"result\": {\n",
+      "    \"analyzerId\": \"train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\",\n",
+      "    \"description\": \"Extract useful information from purchase order\",\n",
+      "    \"createdAt\": \"2024-12-09T23:58:59Z\",\n",
+      "    \"lastModifiedAt\": \"2024-12-09T23:59:14Z\",\n",
+      "    \"config\": {\n",
+      "      \"returnDetails\": false,\n",
+      "      \"enableOcr\": true,\n",
+      "      \"enableLayout\": true,\n",
+      "      \"enableBarcode\": false,\n",
+      "      \"enableFormula\": false\n",
+      "    },\n",
+      "    \"fieldSchema\": {\n",
+      "      \"fields\": {\n",
+      "        \"PurchaseOrderNumber\": {\n",
+      "          \"type\": \"string\",\n",
+      "          \"method\": \"extract\",\n",
+      "          \"description\": \"\"\n",
+      "        },\n",
+      "        \"PurchaseDate\": {\n",
+      "          \"type\": \"date\",\n",
+      "          \"method\": \"extract\",\n",
+      "          \"description\": \"\"\n",
+      "        },\n",
+      "        \"TotalPayment\": {\n",
+      "          \"type\": \"number\",\n",
+      "          \"method\": \"extract\",\n",
+      "          \"description\": \"\"\n",
+      "        },\n",
+      "        \"ShippedToAddress\": {\n",
+      "          \"type\": \"string\",\n",
+      "          \"method\": \"extract\",\n",
+      "          \"description\": \"\"\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    \"trainingData\": {\n",
+      "      \"containerUrl\": \"https://chethodevusw2.blob.core.windows.net/test?sv=2023-01-03&st=2024-12-09T19%3A27%3A04Z&se=2024-12-10T19%3A27%3A04Z&skoid=4de42c4d-0fe6-4b96-93a4-161b1303ea3b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-09T19%3A27%3A04Z&ske=2024-12-10T19%3A27%3A04Z&sks=b&skv=2023-01-03&sr=c&sp=rl&sig=sbUVLXK2JfJdUG7Fxyakg1e9lR%2B%2B6oMISoZqfudtvgw%3D\",\n",
+      "      \"kind\": \"blob\",\n",
+      "      \"prefix\": \"train\"\n",
+      "    },\n",
+      "    \"warnings\": [],\n",
+      "    \"status\": \"ready\",\n",
+      "    \"scenario\": \"document\"\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
    "source": [
     "import uuid\n",
     "ANALYZER_ID = \"train-sample-\" + str(uuid.uuid4())\n",
     "\n",
     "response = client.begin_create_analyzer(\n",
     "    ANALYZER_ID,\n",
-    "    analyzer_schema_path=analyzer_template,\n",
+    "    analyzer_template_path=analyzer_template,\n",
     "    training_storage_container_sas_url=os.getenv(\"TRAINING_DATA_SAS_URL\"),\n",
     "    training_storage_container_path_prefix=os.getenv(\"TRAINING_DATA_PATH\"),\n",
     ")\n",
@@ -139,19 +201,17 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: train-sample-3292ff56-bc75-4bf0-8a09-8aa866d8553f\n",
-      "INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
-      "INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
-      "INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
-      "INFO:python.content_understanding_client:Request 9ed825c9-551e-45e2-8ec0-1ae555bcd56f in progress ...\n",
-      "INFO:python.content_understanding_client:Request result is ready after 11.27 seconds.\n",
+      "INFO:python.content_understanding_client:Analyzing file ../data/purchase_order.jpg with analyzer: train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\n",
+      "INFO:python.content_understanding_client:Request dced30f5-bb4d-473b-8b7a-13a7e29ed3ac in progress ...\n",
+      "INFO:python.content_understanding_client:Request dced30f5-bb4d-473b-8b7a-13a7e29ed3ac in progress ...\n",
+      "INFO:python.content_understanding_client:Request result is ready after 5.52 seconds.\n",
       "INFO:root:{\n",
-      "  \"id\": \"9ed825c9-551e-45e2-8ec0-1ae555bcd56f\",\n",
+      "  \"id\": \"dced30f5-bb4d-473b-8b7a-13a7e29ed3ac\",\n",
       "  \"status\": \"Succeeded\",\n",
       "  \"result\": {\n",
-      "    \"analyzerId\": \"train-sample-3292ff56-bc75-4bf0-8a09-8aa866d8553f\",\n",
+      "    \"analyzerId\": \"train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59\",\n",
       "    \"apiVersion\": \"2024-12-01-preview\",\n",
-      "    \"createdAt\": \"2024-12-09T19:42:58Z\",\n",
+      "    \"createdAt\": \"2024-12-09T23:59:16Z\",\n",
       "    \"warnings\": [],\n",
       "    \"contents\": [\n",
       "      {\n",
@@ -253,7 +313,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:python.content_understanding_client:Analyzer train-sample-3292ff56-bc75-4bf0-8a09-8aa866d8553f deleted.\n"
+      "INFO:python.content_understanding_client:Analyzer train-sample-18473b27-6d27-4d51-8906-9e341ad3fb59 deleted.\n"
      ]
     },
     {
diff --git a/notebooks/field_extraction.ipynb b/notebooks/field_extraction.ipynb
diff --git a/python/content_understanding_client.py b/python/content_understanding_client.py
diff --git a/requirements.txt b/requirements.txt

-Original file line number
+Diff line change
 #.idea/
 # VSCode
 -.vscode
 +.vscode
 +.azure
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`		`-AZURE_AI_ENDPOINT=`
`2`		`-AZURE_AI_API_KEY=`
	`1`	`+AZURE_AI_ENDPOINT=`