1717key_vault_name = 'kv_to-be-replaced'
1818
1919file_system_client_name = "data"
20- directory = 'call_transcripts '
21- audio_directory = 'new_audiodata '
20+ directory = 'custom_call_transcripts '
21+ audio_directory = 'custom_audiodata '
2222
2323def get_secrets_from_kv (kv_name , secret_name ):
2424 # Set the name of the Azure Key Vault
@@ -271,7 +271,7 @@ def create_search_index():
271271 )
272272
273273print ("Connected to the content understanding client" )
274- # ANALYZER_ID = "ckm-json"
274+ ANALYZER_ID = "ckm-json"
275275
276276def prepare_search_doc (content , document_id ):
277277 chunks = chunk_data (content )
@@ -297,66 +297,66 @@ def prepare_search_doc(content, document_id):
297297 }
298298 return result
299299
300- # conversationIds = []
301- # docs = []
302- # counter = 0
303- # from datetime import datetime, timedelta
304-
305- # for path in paths:
306- # file_client = file_system_client.get_file_client(path.name)
307- # data_file = file_client.download_file()
308- # data = data_file.readall()
300+ conversationIds = []
301+ docs = []
302+ counter = 0
303+ from datetime import datetime , timedelta
304+
305+ for path in paths :
306+ file_client = file_system_client .get_file_client (path .name )
307+ data_file = file_client .download_file ()
308+ data = data_file .readall ()
309309
310- # try:
311- # #Analyzer file
312- # response = client.begin_analyze(ANALYZER_ID, file_location="", file_data=data)
313- # result = client.poll_result(response)
310+ try :
311+ #Analyzer file
312+ response = client .begin_analyze (ANALYZER_ID , file_location = "" , file_data = data )
313+ result = client .poll_result (response )
314314
315- # file_name = path.name.split('/')[-1].replace("%3A", "_")
316- # start_time = file_name.replace(".json", "")[-19:]
315+ file_name = path .name .split ('/' )[- 1 ].replace ("%3A" , "_" )
316+ start_time = file_name .replace (".json" , "" )[- 19 :]
317317
318- # timestamp_format = "%Y-%m-%d %H_%M_%S" # Adjust format if necessary
319- # start_timestamp = datetime.strptime(start_time, timestamp_format)
320-
321- # conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
322- # conversationIds.append(conversation_id)
323-
324- # duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
325- # end_timestamp = str(start_timestamp + timedelta(seconds=duration))
326- # end_timestamp = end_timestamp.split(".")[0]
327- # start_timestamp = str(start_timestamp).split(".")[0]
328-
329- # summary = result['result']['contents'][0]['fields']['summary']['valueString']
330- # satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
331- # sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
332- # topic = result['result']['contents'][0]['fields']['topic']['valueString']
333- # key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
334- # complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
335- # content = result['result']['contents'][0]['fields']['content']['valueString']
336-
337- # cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))
338- # conn.commit()
318+ timestamp_format = "%Y-%m-%d %H_%M_%S" # Adjust format if necessary
319+ start_timestamp = datetime .strptime (start_time , timestamp_format )
320+
321+ conversation_id = file_name .split ('convo_' , 1 )[1 ].split ('_' )[0 ]
322+ conversationIds .append (conversation_id )
323+
324+ duration = int (result ['result' ]['contents' ][0 ]['fields' ]['Duration' ]['valueString' ])
325+ end_timestamp = str (start_timestamp + timedelta (seconds = duration ))
326+ end_timestamp = end_timestamp .split ("." )[0 ]
327+ start_timestamp = str (start_timestamp ).split ("." )[0 ]
328+
329+ summary = result ['result' ]['contents' ][0 ]['fields' ]['summary' ]['valueString' ]
330+ satisfied = result ['result' ]['contents' ][0 ]['fields' ]['satisfied' ]['valueString' ]
331+ sentiment = result ['result' ]['contents' ][0 ]['fields' ]['sentiment' ]['valueString' ]
332+ topic = result ['result' ]['contents' ][0 ]['fields' ]['topic' ]['valueString' ]
333+ key_phrases = result ['result' ]['contents' ][0 ]['fields' ]['keyPhrases' ]['valueString' ]
334+ complaint = result ['result' ]['contents' ][0 ]['fields' ]['complaint' ]['valueString' ]
335+ content = result ['result' ]['contents' ][0 ]['fields' ]['content' ]['valueString' ]
336+
337+ cursor .execute (f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)" , (conversation_id , end_timestamp , start_timestamp , content , summary , satisfied , sentiment , topic , key_phrases , complaint ))
338+ conn .commit ()
339339
340- # # keyPhrases = key_phrases.split(',')
341- # # for keyPhrase in keyPhrases:
342- # # cursor.execute(f"INSERT INTO processed_data_key_phrases (ConversationId, key_phrase, sentiment) VALUES (?,?,?)", (conversation_id, keyPhrase, sentiment))
343-
344- # document_id = conversation_id
345-
346- # result = prepare_search_doc(content, document_id)
347- # docs.append(result)
348- # counter += 1
349- # except:
350- # pass
351-
352- # if docs != [] and counter % 10 == 0:
353- # result = search_client.upload_documents(documents=docs)
354- # docs = []
355- # print(f' {str(counter)} uploaded')
356-
357- # # upload the last batch
358- # if docs != []:
359- # search_client.upload_documents(documents=docs)
340+ # keyPhrases = key_phrases.split(',')
341+ # for keyPhrase in keyPhrases:
342+ # cursor.execute(f"INSERT INTO processed_data_key_phrases (ConversationId, key_phrase, sentiment) VALUES (?,?,?)", (conversation_id, keyPhrase, sentiment))
343+
344+ document_id = conversation_id
345+
346+ result = prepare_search_doc (content , document_id )
347+ docs .append (result )
348+ counter += 1
349+ except :
350+ pass
351+
352+ if docs != [] and counter % 10 == 0 :
353+ result = search_client .upload_documents (documents = docs )
354+ docs = []
355+ print (f' { str (counter )} uploaded' )
356+
357+ # upload the last batch
358+ if docs != []:
359+ search_client .upload_documents (documents = docs )
360360
361361
362362ANALYZER_ID = "ckm-audio"
0 commit comments