Skip to content

Commit f05da15

Browse files
Use shell script to trigger the Bicep deployment
1 parent 7744b03 commit f05da15

File tree

4 files changed

+113
-63
lines changed

4 files changed

+113
-63
lines changed

infra/process_data_scripts.bicep

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
var resourceGroupLocation = resourceGroup().location
2-
var solutionLocation = resourceGroupLocation
3-
1+
param solutionLocation string
42
param keyVaultName string
53
param identity string
64

infra/scripts/copy_kb_files.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,6 @@ echo "Script Started"
3232
az login --identity --client-id ${managedIdentityClientId}
3333
# Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication.
3434
az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source /mnt/azscripts/azscriptinput/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite
35-
az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite
35+
az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite
36+
az storage fs directory create --account-name "$storageAccount" --file-system data --name custom_audiodata --auth-mode login
37+
az storage fs directory create --account-name "$storageAccount" --file-system data --name custom_transcripts --auth-mode login

infra/scripts/index_scripts/04_cu_process_data_new_data.py

Lines changed: 59 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
key_vault_name = 'kv_to-be-replaced'
1818

1919
file_system_client_name = "data"
20-
directory = 'call_transcripts'
21-
audio_directory = 'new_audiodata'
20+
directory = 'custom_call_transcripts'
21+
audio_directory = 'custom_audiodata'
2222

2323
def get_secrets_from_kv(kv_name, secret_name):
2424
# Set the name of the Azure Key Vault
@@ -271,7 +271,7 @@ def create_search_index():
271271
)
272272

273273
print("Connected to the content understanding client")
274-
# ANALYZER_ID = "ckm-json"
274+
ANALYZER_ID = "ckm-json"
275275

276276
def prepare_search_doc(content, document_id):
277277
chunks = chunk_data(content)
@@ -297,66 +297,66 @@ def prepare_search_doc(content, document_id):
297297
}
298298
return result
299299

300-
# conversationIds = []
301-
# docs = []
302-
# counter = 0
303-
# from datetime import datetime, timedelta
304-
305-
# for path in paths:
306-
# file_client = file_system_client.get_file_client(path.name)
307-
# data_file = file_client.download_file()
308-
# data = data_file.readall()
300+
conversationIds = []
301+
docs = []
302+
counter = 0
303+
from datetime import datetime, timedelta
304+
305+
for path in paths:
306+
file_client = file_system_client.get_file_client(path.name)
307+
data_file = file_client.download_file()
308+
data = data_file.readall()
309309

310-
# try:
311-
# #Analyzer file
312-
# response = client.begin_analyze(ANALYZER_ID, file_location="", file_data=data)
313-
# result = client.poll_result(response)
310+
try:
311+
#Analyzer file
312+
response = client.begin_analyze(ANALYZER_ID, file_location="", file_data=data)
313+
result = client.poll_result(response)
314314

315-
# file_name = path.name.split('/')[-1].replace("%3A", "_")
316-
# start_time = file_name.replace(".json", "")[-19:]
315+
file_name = path.name.split('/')[-1].replace("%3A", "_")
316+
start_time = file_name.replace(".json", "")[-19:]
317317

318-
# timestamp_format = "%Y-%m-%d %H_%M_%S" # Adjust format if necessary
319-
# start_timestamp = datetime.strptime(start_time, timestamp_format)
320-
321-
# conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
322-
# conversationIds.append(conversation_id)
323-
324-
# duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
325-
# end_timestamp = str(start_timestamp + timedelta(seconds=duration))
326-
# end_timestamp = end_timestamp.split(".")[0]
327-
# start_timestamp = str(start_timestamp).split(".")[0]
328-
329-
# summary = result['result']['contents'][0]['fields']['summary']['valueString']
330-
# satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
331-
# sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
332-
# topic = result['result']['contents'][0]['fields']['topic']['valueString']
333-
# key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
334-
# complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
335-
# content = result['result']['contents'][0]['fields']['content']['valueString']
336-
337-
# cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))
338-
# conn.commit()
318+
timestamp_format = "%Y-%m-%d %H_%M_%S" # Adjust format if necessary
319+
start_timestamp = datetime.strptime(start_time, timestamp_format)
320+
321+
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
322+
conversationIds.append(conversation_id)
323+
324+
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
325+
end_timestamp = str(start_timestamp + timedelta(seconds=duration))
326+
end_timestamp = end_timestamp.split(".")[0]
327+
start_timestamp = str(start_timestamp).split(".")[0]
328+
329+
summary = result['result']['contents'][0]['fields']['summary']['valueString']
330+
satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
331+
sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
332+
topic = result['result']['contents'][0]['fields']['topic']['valueString']
333+
key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
334+
complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
335+
content = result['result']['contents'][0]['fields']['content']['valueString']
336+
337+
cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))
338+
conn.commit()
339339

340-
# # keyPhrases = key_phrases.split(',')
341-
# # for keyPhrase in keyPhrases:
342-
# # cursor.execute(f"INSERT INTO processed_data_key_phrases (ConversationId, key_phrase, sentiment) VALUES (?,?,?)", (conversation_id, keyPhrase, sentiment))
343-
344-
# document_id = conversation_id
345-
346-
# result = prepare_search_doc(content, document_id)
347-
# docs.append(result)
348-
# counter += 1
349-
# except:
350-
# pass
351-
352-
# if docs != [] and counter % 10 == 0:
353-
# result = search_client.upload_documents(documents=docs)
354-
# docs = []
355-
# print(f' {str(counter)} uploaded')
356-
357-
# # upload the last batch
358-
# if docs != []:
359-
# search_client.upload_documents(documents=docs)
340+
# keyPhrases = key_phrases.split(',')
341+
# for keyPhrase in keyPhrases:
342+
# cursor.execute(f"INSERT INTO processed_data_key_phrases (ConversationId, key_phrase, sentiment) VALUES (?,?,?)", (conversation_id, keyPhrase, sentiment))
343+
344+
document_id = conversation_id
345+
346+
result = prepare_search_doc(content, document_id)
347+
docs.append(result)
348+
counter += 1
349+
except:
350+
pass
351+
352+
if docs != [] and counter % 10 == 0:
353+
result = search_client.upload_documents(documents=docs)
354+
docs = []
355+
print(f' {str(counter)} uploaded')
356+
357+
# upload the last batch
358+
if docs != []:
359+
search_client.upload_documents(documents=docs)
360360

361361

362362
ANALYZER_ID = "ckm-audio"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/bash
2+
3+
# === Configuration ===
4+
RESOURCE_GROUP="$1"
5+
BICEP_FILE="./../process_data_scripts.bicep"
6+
7+
# === Validate input ===
8+
if [[ -z "$RESOURCE_GROUP" ]]; then
9+
echo "Usage: $0 <RESOURCE_GROUP>"
10+
echo "ERROR: RESOURCE_GROUP parameter is required."
11+
exit 1
12+
fi
13+
14+
# === Ensure user is logged in to Azure CLI ===
15+
az account show > /dev/null 2>&1 || az login
16+
17+
echo "Fetching Key Vault and Managed Identity from resource group: $RESOURCE_GROUP"
18+
19+
# === Retrieve the first Key Vault name from the specified resource group ===
20+
keyVaultName=$(az keyvault list --resource-group "$RESOURCE_GROUP" --query "[0].name" -o tsv)
21+
22+
# === Retrieve the ID of the first user-assigned identity with name starting with 'id-' ===
23+
identityId=$(az identity list --resource-group "$RESOURCE_GROUP" --query "[?starts_with(name, 'id-')].id | [0]" -o tsv)
24+
25+
# === Normalize identityId (necessary for compatibility in Git Bash on Windows) ===
26+
identityId=$(echo "$identityId" | sed -E 's|.*(/subscriptions/)|\1|')
27+
28+
# === Get the location of the first SQL Server in the resource group ===
29+
sqlServerLocation=$(az sql server list --resource-group "$RESOURCE_GROUP" --query "[0].location" -o tsv)
30+
31+
# === Validate that all required resources were found ===
32+
if [[ -z "$keyVaultName" || -z "$sqlServerLocation" || -z "$identityId" || ! "$identityId" =~ ^/subscriptions/ ]]; then
33+
echo "ERROR: Could not find required resources in resource group $RESOURCE_GROUP or identityId is invalid"
34+
exit 1
35+
fi
36+
37+
echo "Using SQL Server Location: $sqlServerLocation"
38+
echo "Using Key Vault: $keyVaultName"
39+
echo "Using Managed Identity: $identityId"
40+
41+
# === Deploy resources using the specified Bicep template ===
42+
echo "Deploying Bicep template..."
43+
44+
# MSYS_NO_PATHCONV disables path conversion in Git Bash for Windows
45+
MSYS_NO_PATHCONV=1 az deployment group create \
46+
--resource-group "$RESOURCE_GROUP" \
47+
--template-file "$BICEP_FILE" \
48+
--parameters solutionLocation="$sqlServerLocation" keyVaultName="$keyVaultName" identity="$identityId"
49+
50+
echo "Deployment completed."

0 commit comments

Comments
 (0)