Skip to content

Commit 604d747

Browse files
committed
refactor: update _speeches_to_json to handle speech_data directly and improve author_id handling
1 parent d92bd31 commit 604d747

File tree

1 file changed

+20
-14
lines changed

1 file changed

+20
-14
lines changed

src/api/views.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,14 @@ def _add_to_result(levels, data, result):
9090
current_level.append(data)
9191

9292

93-
def _speeches_to_json(speech_data):
94-
df_sitting = pd.DataFrame(json.loads(speech_data))
93+
def _speeches_to_json(speech_data: list[dict]):
94+
# df_sitting = pd.DataFrame(json.loads(speech_data))
9595
result = []
96-
for index, row in df_sitting.iterrows():
96+
for row in speech_data:
9797
speech_dict = {
9898
"speech": row["proc_speech"],
99-
"author": row["author"],
99+
"author": row["author"] if pd.notna(row["author"]) else None,
100+
"author_id": int(row["speaker"]) if pd.notna(row["speaker"]) else None,
100101
"timestamp": row["timestamp"],
101102
"is_annotation": row["is_annotation"],
102103
"index": row["index"],
@@ -168,11 +169,9 @@ def post(self, request):
168169
speech_data = request.data.get("speech_data")
169170
speeches_list = json.loads(speech_data)
170171

171-
# speech dataframe
172-
df_speech = pd.DataFrame(speeches_list)
173-
df_speech = df_speech[df_speech.is_annotation == False]
174-
175-
serializer.validated_data["speech_data"] = speech_data
172+
# Convert and serialize speeches to nested JSON format for frontend catalogue
173+
speech_data_json = _speeches_to_json(speeches_list)
174+
serializer.validated_data["speech_data"] = speech_data_json
176175

177176
# Get parliamentary cycle ID given house_types, and date that falls between start_date and end_date
178177
cycle = ParliamentaryCycle.objects.get(
@@ -193,22 +192,29 @@ def post(self, request):
193192

194193
# Then create speech records if speech_data is provided
195194
try:
196-
# Prepare speech records
195+
# Prepare speech records for speech searching
196+
df_speech = pd.DataFrame(speeches_list)
197+
df_speech = df_speech[df_speech.is_annotation == False]
198+
197199
speech_records = []
198200
for idx, speech in df_speech.iterrows():
199201
speech_records.append(
200202
{
201203
"sitting_id": sitting.sitting_id,
202-
"index": speech["index"],
203-
"speaker_id": speech["author_id"],
204+
"index": int(speech["index"]),
205+
"speaker_id": (
206+
int(speech["speaker"])
207+
if pd.notna(speech["speaker"])
208+
else None
209+
),
204210
"timestamp": speech["timestamp"],
205211
"speech": speech["speech"],
206212
"speech_tokens": speech["speech_tokens"],
207-
"length": speech["length"],
213+
"length": int(speech["length"]),
208214
"level_1": speech["level_1"],
209215
"level_2": speech["level_2"],
210216
"level_3": speech["level_3"],
211-
"is_annotation": speech["is_annotation"],
217+
"is_annotation": bool(speech["is_annotation"]),
212218
}
213219
)
214220

0 commit comments

Comments
 (0)