Use shell script to trigger the Bicep deployment

Pavan-Microsoft · Pavan-Microsoft · commit f05da15bf3da · 2025-05-29T10:45:13.000+05:30
diff --git a/infra/process_data_scripts.bicep b/infra/process_data_scripts.bicep
@@ -1,6 +1,4 @@
-var resourceGroupLocation = resourceGroup().location
-var solutionLocation = resourceGroupLocation
-
+param solutionLocation string
 param keyVaultName string
 param identity string
 
diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh
@@ -32,4 +32,6 @@ echo "Script Started"
 az login --identity --client-id ${managedIdentityClientId}
 # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication.
 az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source /mnt/azscripts/azscriptinput/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite
-az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite
+az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite
+az storage fs directory create --account-name "$storageAccount" --file-system data --name custom_audiodata --auth-mode login
+az storage fs directory create --account-name "$storageAccount" --file-system data --name custom_transcripts --auth-mode login
diff --git a/infra/scripts/index_scripts/04_cu_process_data_new_data.py b/infra/scripts/index_scripts/04_cu_process_data_new_data.py
@@ -17,8 +17,8 @@
 key_vault_name = 'kv_to-be-replaced'
 
 file_system_client_name = "data"
-directory = 'call_transcripts'
-audio_directory = 'new_audiodata'
+directory = 'custom_call_transcripts'
+audio_directory = 'custom_audiodata'
 
 def get_secrets_from_kv(kv_name, secret_name):
     # Set the name of the Azure Key Vault  
@@ -271,7 +271,7 @@ def create_search_index():
     )
 
 print("Connected to the content understanding client")
-# ANALYZER_ID = "ckm-json"
+ANALYZER_ID = "ckm-json"
 
 def prepare_search_doc(content, document_id): 
     chunks = chunk_data(content)
@@ -297,66 +297,66 @@ def prepare_search_doc(content, document_id):
             }
     return result
         
-# conversationIds = []
-# docs = []
-# counter = 0
-# from datetime import datetime, timedelta
-
-# for path in paths:
-#     file_client = file_system_client.get_file_client(path.name)
-#     data_file = file_client.download_file()
-#     data = data_file.readall()
+conversationIds = []
+docs = []
+counter = 0
+from datetime import datetime, timedelta
+
+for path in paths:
+    file_client = file_system_client.get_file_client(path.name)
+    data_file = file_client.download_file()
+    data = data_file.readall()
    
-#     try:
-#         #Analyzer file
-#         response = client.begin_analyze(ANALYZER_ID, file_location="", file_data=data)
-#         result = client.poll_result(response)
+    try:
+        #Analyzer file
+        response = client.begin_analyze(ANALYZER_ID, file_location="", file_data=data)
+        result = client.poll_result(response)
         
-#         file_name = path.name.split('/')[-1].replace("%3A", "_")
-#         start_time = file_name.replace(".json", "")[-19:]
+        file_name = path.name.split('/')[-1].replace("%3A", "_")
+        start_time = file_name.replace(".json", "")[-19:]
         
-#         timestamp_format = "%Y-%m-%d %H_%M_%S"  # Adjust format if necessary
-#         start_timestamp = datetime.strptime(start_time, timestamp_format)
-
-#         conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
-#         conversationIds.append(conversation_id)
-
-#         duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
-#         end_timestamp = str(start_timestamp + timedelta(seconds=duration))
-#         end_timestamp = end_timestamp.split(".")[0]
-#         start_timestamp = str(start_timestamp).split(".")[0]
-
-#         summary = result['result']['contents'][0]['fields']['summary']['valueString']
-#         satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
-#         sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
-#         topic = result['result']['contents'][0]['fields']['topic']['valueString']
-#         key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
-#         complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
-#         content = result['result']['contents'][0]['fields']['content']['valueString']
-
-#         cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))    
-#         conn.commit()
+        timestamp_format = "%Y-%m-%d %H_%M_%S"  # Adjust format if necessary
+        start_timestamp = datetime.strptime(start_time, timestamp_format)
+
+        conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
+        conversationIds.append(conversation_id)
+
+        duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
+        end_timestamp = str(start_timestamp + timedelta(seconds=duration))
+        end_timestamp = end_timestamp.split(".")[0]
+        start_timestamp = str(start_timestamp).split(".")[0]
+
+        summary = result['result']['contents'][0]['fields']['summary']['valueString']
+        satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
+        sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
+        topic = result['result']['contents'][0]['fields']['topic']['valueString']
+        key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
+        complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
+        content = result['result']['contents'][0]['fields']['content']['valueString']
+
+        cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))    
+        conn.commit()
         
-#         # keyPhrases = key_phrases.split(',')
-#         # for keyPhrase in keyPhrases:
-#         #     cursor.execute(f"INSERT INTO processed_data_key_phrases (ConversationId, key_phrase, sentiment) VALUES (?,?,?)", (conversation_id, keyPhrase, sentiment))
-
-#         document_id = conversation_id
-
-#         result = prepare_search_doc(content, document_id)
-#         docs.append(result)
-#         counter += 1
-#     except:
-#         pass
-
-#     if docs != [] and counter % 10 == 0:
-#         result = search_client.upload_documents(documents=docs)
-#         docs = []
-#         print(f' {str(counter)} uploaded')
-
-# # upload the last batch
-# if docs != []:
-#     search_client.upload_documents(documents=docs)
+        # keyPhrases = key_phrases.split(',')
+        # for keyPhrase in keyPhrases:
+        #     cursor.execute(f"INSERT INTO processed_data_key_phrases (ConversationId, key_phrase, sentiment) VALUES (?,?,?)", (conversation_id, keyPhrase, sentiment))
+
+        document_id = conversation_id
+
+        result = prepare_search_doc(content, document_id)
+        docs.append(result)
+        counter += 1
+    except:
+        pass
+
+    if docs != [] and counter % 10 == 0:
+        result = search_client.upload_documents(documents=docs)
+        docs = []
+        print(f' {str(counter)} uploaded')
+
+# upload the last batch
+if docs != []:
+    search_client.upload_documents(documents=docs)
 
    
 ANALYZER_ID = "ckm-audio"
diff --git a/infra/scripts/run_process_data_scripts.sh b/infra/scripts/run_process_data_scripts.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# === Configuration ===
+RESOURCE_GROUP="$1"
+BICEP_FILE="./../process_data_scripts.bicep"
+
+# === Validate input ===
+if [[ -z "$RESOURCE_GROUP" ]]; then
+  echo "Usage: $0 <RESOURCE_GROUP>"
+  echo "ERROR: RESOURCE_GROUP parameter is required."
+  exit 1
+fi
+
+# === Ensure user is logged in to Azure CLI ===
+az account show > /dev/null 2>&1 || az login
+
+echo "Fetching Key Vault and Managed Identity from resource group: $RESOURCE_GROUP"
+
+# === Retrieve the first Key Vault name from the specified resource group ===
+keyVaultName=$(az keyvault list --resource-group "$RESOURCE_GROUP" --query "[0].name" -o tsv)
+
+# === Retrieve the ID of the first user-assigned identity with name starting with 'id-' ===
+identityId=$(az identity list --resource-group "$RESOURCE_GROUP" --query "[?starts_with(name, 'id-')].id | [0]" -o tsv)
+
+# === Normalize identityId (necessary for compatibility in Git Bash on Windows) ===
+identityId=$(echo "$identityId" | sed -E 's|.*(/subscriptions/)|\1|')
+
+# === Get the location of the first SQL Server in the resource group ===
+sqlServerLocation=$(az sql server list --resource-group "$RESOURCE_GROUP" --query "[0].location" -o tsv)
+
+# === Validate that all required resources were found ===
+if [[ -z "$keyVaultName" || -z "$sqlServerLocation" || -z "$identityId" || ! "$identityId" =~ ^/subscriptions/ ]]; then
+  echo "ERROR: Could not find required resources in resource group $RESOURCE_GROUP or identityId is invalid"
+  exit 1
+fi
+
+echo "Using SQL Server Location: $sqlServerLocation"
+echo "Using Key Vault: $keyVaultName"
+echo "Using Managed Identity: $identityId"
+
+# === Deploy resources using the specified Bicep template ===
+echo "Deploying Bicep template..."
+
+# MSYS_NO_PATHCONV disables path conversion in Git Bash for Windows
+MSYS_NO_PATHCONV=1 az deployment group create \
+  --resource-group "$RESOURCE_GROUP" \
+  --template-file "$BICEP_FILE" \
+  --parameters solutionLocation="$sqlServerLocation" keyVaultName="$keyVaultName" identity="$identityId"
+
+echo "Deployment completed."