Skip to content

Commit 28688c5

Browse files
fix: Improve field access safety and duration parsing to prevent runtime exceptions
2 parents 5941af3 + a0d1bd2 commit 28688c5

File tree

2 files changed

+51
-27
lines changed

2 files changed

+51
-27
lines changed

infra/scripts/index_scripts/03_cu_process_data_text.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ def create_tables():
306306

307307
create_tables()
308308

309+
def get_field_value(fields, field_name, default=""):
310+
field = fields.get(field_name, {})
311+
return field.get('valueString', default)
312+
309313
# Process files and insert into DB and Search
310314
conversationIds, docs, counter = [], [], 0
311315
for path in paths:
@@ -325,17 +329,21 @@ def create_tables():
325329
start_timestamp = datetime.strptime(start_time, timestamp_format)
326330
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
327331
conversationIds.append(conversation_id)
328-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
332+
fields = result['result']['contents'][0]['fields']
333+
duration_str = get_field_value(fields, 'Duration', '0')
334+
try:
335+
duration = int(duration_str)
336+
except (ValueError, TypeError):
337+
duration = 0
329338
end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
330339
start_timestamp = str(start_timestamp).split(".")[0]
331-
fields = result['result']['contents'][0]['fields']
332-
summary = fields['summary']['valueString']
333-
satisfied = fields['satisfied']['valueString']
334-
sentiment = fields['sentiment']['valueString']
335-
topic = fields['topic']['valueString']
336-
key_phrases = fields['keyPhrases']['valueString']
337-
complaint = fields['complaint']['valueString']
338-
content = fields['content']['valueString']
340+
summary = get_field_value(fields, 'summary')
341+
satisfied = get_field_value(fields, 'satisfied')
342+
sentiment = get_field_value(fields, 'sentiment')
343+
topic = get_field_value(fields, 'topic')
344+
key_phrases = get_field_value(fields, 'keyPhrases')
345+
complaint = get_field_value(fields, 'complaint')
346+
content = get_field_value(fields, 'content')
339347
cursor.execute(
340348
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
341349
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)

infra/scripts/index_scripts/04_cu_process_custom_data.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,10 @@ def create_tables():
351351

352352
create_tables()
353353

354+
def get_field_value(fields, field_name, default=""):
355+
field = fields.get(field_name, {})
356+
return field.get('valueString', default)
357+
354358
ANALYZER_ID = "ckm-json"
355359
# Process files and insert into DB and Search - transcripts
356360
conversationIds, docs, counter = [], [], 0
@@ -367,17 +371,23 @@ def create_tables():
367371
start_timestamp = datetime.strptime(start_time, timestamp_format)
368372
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
369373
conversationIds.append(conversation_id)
370-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
374+
375+
fields = result['result']['contents'][0]['fields']
376+
duration_str = get_field_value(fields, 'Duration', '0')
377+
try:
378+
duration = int(duration_str)
379+
except (ValueError, TypeError):
380+
duration = 0
381+
371382
end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
372383
start_timestamp = str(start_timestamp).split(".")[0]
373-
fields = result['result']['contents'][0]['fields']
374-
summary = fields['summary']['valueString']
375-
satisfied = fields['satisfied']['valueString']
376-
sentiment = fields['sentiment']['valueString']
377-
topic = fields['topic']['valueString']
378-
key_phrases = fields['keyPhrases']['valueString']
379-
complaint = fields['complaint']['valueString']
380-
content = fields['content']['valueString']
384+
summary = get_field_value(fields, 'summary')
385+
satisfied = get_field_value(fields, 'satisfied')
386+
sentiment = get_field_value(fields, 'sentiment')
387+
topic = get_field_value(fields, 'topic')
388+
key_phrases = get_field_value(fields, 'keyPhrases')
389+
complaint = get_field_value(fields, 'complaint')
390+
content = get_field_value(fields, 'content')
381391
cursor.execute(
382392
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
383393
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)
@@ -421,19 +431,25 @@ def create_tables():
421431
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
422432
conversationIds.append(conversation_id)
423433

424-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
434+
fields = result['result']['contents'][0]['fields']
435+
duration_str = get_field_value(fields, 'Duration', '0')
436+
try:
437+
duration = int(duration_str)
438+
except (ValueError, TypeError):
439+
duration = 0
440+
425441
end_timestamp = str(start_timestamp + timedelta(seconds=duration))
426442
end_timestamp = end_timestamp.split(".")[0]
427443
start_timestamp = str(start_timestamp).split(".")[0]
428444

429-
summary = result['result']['contents'][0]['fields']['summary']['valueString']
430-
satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
431-
sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
432-
topic = result['result']['contents'][0]['fields']['topic']['valueString']
433-
key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
434-
complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
435-
content = result['result']['contents'][0]['fields']['content']['valueString']
436-
# print(topic)
445+
summary = get_field_value(fields, 'summary')
446+
satisfied = get_field_value(fields, 'satisfied')
447+
sentiment = get_field_value(fields, 'sentiment')
448+
topic = get_field_value(fields, 'topic')
449+
key_phrases = get_field_value(fields, 'keyPhrases')
450+
complaint = get_field_value(fields, 'complaint')
451+
content = get_field_value(fields, 'content')
452+
437453
cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))
438454
conn.commit()
439455

0 commit comments

Comments
 (0)