Skip to content

Commit 5e36f29

Browse files
committed
Refactor to use get_field_value() for safer field access to avoid KeyError exceptions
1 parent 8673fe5 commit 5e36f29

File tree

3 files changed

+49
-36
lines changed

3 files changed

+49
-36
lines changed

infra/scripts/index_scripts/03_cu_process_data_text.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,10 @@ def create_tables():
195195

196196
create_tables()
197197

198+
def get_field_value(fields, field_name, default=""):
199+
field = fields.get(field_name, {})
200+
return field.get('valueString', default)
201+
198202
# Process files and insert into DB and Search
199203
conversationIds, docs, counter = [], [], 0
200204
for path in paths:
@@ -210,17 +214,17 @@ def create_tables():
210214
start_timestamp = datetime.strptime(start_time, timestamp_format)
211215
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
212216
conversationIds.append(conversation_id)
213-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
217+
fields = result['result']['contents'][0]['fields']
218+
duration = int(get_field_value(fields, 'Duration', '0'))
214219
end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
215220
start_timestamp = str(start_timestamp).split(".")[0]
216-
fields = result['result']['contents'][0]['fields']
217-
summary = fields['summary']['valueString']
218-
satisfied = fields['satisfied']['valueString']
219-
sentiment = fields['sentiment']['valueString']
220-
topic = fields['topic']['valueString']
221-
key_phrases = fields['keyPhrases']['valueString']
222-
complaint = fields['complaint']['valueString']
223-
content = fields['content']['valueString']
221+
summary = get_field_value(fields, 'summary')
222+
satisfied = get_field_value(fields, 'satisfied')
223+
sentiment = get_field_value(fields, 'sentiment')
224+
topic = get_field_value(fields, 'topic')
225+
key_phrases = get_field_value(fields, 'keyPhrases')
226+
complaint = get_field_value(fields, 'complaint')
227+
content = get_field_value(fields, 'content')
224228
cursor.execute(
225229
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
226230
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)

infra/scripts/index_scripts/03_cu_process_data_text_manual.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,10 @@ def create_tables():
198198

199199
create_tables()
200200

201+
def get_field_value(fields, field_name, default=""):
202+
field = fields.get(field_name, {})
203+
return field.get('valueString', default)
204+
201205
# Process files and insert into DB and Search
202206
conversationIds, docs, counter = [], [], 0
203207
for path in paths:
@@ -213,17 +217,17 @@ def create_tables():
213217
start_timestamp = datetime.strptime(start_time, timestamp_format)
214218
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
215219
conversationIds.append(conversation_id)
216-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
220+
fields = result['result']['contents'][0]['fields']
221+
duration = int(get_field_value(fields, 'Duration', '0'))
217222
end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
218223
start_timestamp = str(start_timestamp).split(".")[0]
219-
fields = result['result']['contents'][0]['fields']
220-
summary = fields['summary']['valueString']
221-
satisfied = fields['satisfied']['valueString']
222-
sentiment = fields['sentiment']['valueString']
223-
topic = fields['topic']['valueString']
224-
key_phrases = fields['keyPhrases']['valueString']
225-
complaint = fields['complaint']['valueString']
226-
content = fields['content']['valueString']
224+
summary = get_field_value(fields, 'summary')
225+
satisfied = get_field_value(fields, 'satisfied')
226+
sentiment = get_field_value(fields, 'sentiment')
227+
topic = get_field_value(fields, 'topic')
228+
key_phrases = get_field_value(fields, 'keyPhrases')
229+
complaint = get_field_value(fields, 'complaint')
230+
content = get_field_value(fields, 'content')
227231
cursor.execute(
228232
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
229233
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)

infra/scripts/index_scripts/04_cu_process_data_new_data.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,10 @@ def create_tables():
266266

267267
create_tables()
268268

269+
def get_field_value(fields, field_name, default=""):
270+
field = fields.get(field_name, {})
271+
return field.get('valueString', default)
272+
269273
ANALYZER_ID = "ckm-json"
270274
# Process files and insert into DB and Search - transcripts
271275
conversationIds, docs, counter = [], [], 0
@@ -282,17 +286,17 @@ def create_tables():
282286
start_timestamp = datetime.strptime(start_time, timestamp_format)
283287
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
284288
conversationIds.append(conversation_id)
285-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
289+
fields = result['result']['contents'][0]['fields']
290+
duration = int(get_field_value(fields, 'Duration', '0'))
286291
end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
287292
start_timestamp = str(start_timestamp).split(".")[0]
288-
fields = result['result']['contents'][0]['fields']
289-
summary = fields['summary']['valueString']
290-
satisfied = fields['satisfied']['valueString']
291-
sentiment = fields['sentiment']['valueString']
292-
topic = fields['topic']['valueString']
293-
key_phrases = fields['keyPhrases']['valueString']
294-
complaint = fields['complaint']['valueString']
295-
content = fields['content']['valueString']
293+
summary = get_field_value(fields, 'summary')
294+
satisfied = get_field_value(fields, 'satisfied')
295+
sentiment = get_field_value(fields, 'sentiment')
296+
topic = get_field_value(fields, 'topic')
297+
key_phrases = get_field_value(fields, 'keyPhrases')
298+
complaint = get_field_value(fields, 'complaint')
299+
content = get_field_value(fields, 'content')
296300
cursor.execute(
297301
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
298302
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)
@@ -339,19 +343,20 @@ def create_tables():
339343
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
340344
conversationIds.append(conversation_id)
341345

342-
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
346+
fields = result['result']['contents'][0]['fields']
347+
duration = int(get_field_value(fields, 'Duration', '0'))
343348
end_timestamp = str(start_timestamp + timedelta(seconds=duration))
344349
end_timestamp = end_timestamp.split(".")[0]
345350
start_timestamp = str(start_timestamp).split(".")[0]
346351

347-
summary = result['result']['contents'][0]['fields']['summary']['valueString']
348-
satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
349-
sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
350-
topic = result['result']['contents'][0]['fields']['topic']['valueString']
351-
key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
352-
complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
353-
content = result['result']['contents'][0]['fields']['content']['valueString']
354-
# print(topic)
352+
summary = get_field_value(fields, 'summary')
353+
satisfied = get_field_value(fields, 'satisfied')
354+
sentiment = get_field_value(fields, 'sentiment')
355+
topic = get_field_value(fields, 'topic')
356+
key_phrases = get_field_value(fields, 'keyPhrases')
357+
complaint = get_field_value(fields, 'complaint')
358+
content = get_field_value(fields, 'content')
359+
355360
cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))
356361
conn.commit()
357362

0 commit comments

Comments
 (0)