Skip to content

Commit 2091c98

Browse files
committed
fix: upsert bills by bill_number on transcript ingest
1 parent 0e3fac4 commit 2091c98

File tree

2 files changed

+46
-2
lines changed

2 files changed

+46
-2
lines changed

lib/transcripts/ingestor.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,7 @@ def _upsert_bill_from_legislation(self, legislation_item: dict[str, Any]) -> Non
404404
"""
405405
INSERT INTO bills (id, bill_number, title, description, status)
406406
VALUES (%s, %s, %s, %s, %s)
407-
ON CONFLICT (id) DO UPDATE SET
408-
bill_number = EXCLUDED.bill_number,
407+
ON CONFLICT (bill_number) DO UPDATE SET
409408
title = EXCLUDED.title,
410409
description = COALESCE(NULLIF(EXCLUDED.description, ''), bills.description),
411410
updated_at = NOW()

tests/test_transcript_ingestion_unit.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,48 @@ def test_transcript_ingestor_should_upsert_legislation_into_bills() -> None:
137137
if "INSERT INTO bills" in call.args[0]
138138
]
139139
assert len(bill_queries) == 1
140+
141+
142+
def test_transcript_ingestor_upserts_bills_by_bill_number() -> None:
143+
postgres = Mock()
144+
embeddings = Mock()
145+
embeddings.generate_embeddings_batch.return_value = [[0.0] * 768]
146+
147+
ingestor = TranscriptIngestor(
148+
postgres=postgres,
149+
embedding_client=embeddings,
150+
)
151+
152+
transcript_data = {
153+
"video_metadata": {
154+
"title": "Test Video",
155+
"upload_date": "20260106",
156+
"duration": "0:01:00",
157+
},
158+
"speakers": [],
159+
"transcripts": [
160+
{
161+
"start": "00:00:10",
162+
"text": "The Appropriation Bill 2026 is important.",
163+
"voice_id": 1,
164+
"speaker_id": "s_speaker_1",
165+
}
166+
],
167+
"legislation": [
168+
{
169+
"id": "L_APPROPRIATION_BILL_1",
170+
"name": "Appropriation Bill 2026",
171+
"description": "Annual appropriations",
172+
"source": "audio",
173+
}
174+
],
175+
}
176+
177+
ingestor.ingest_transcript_json(transcript_data, youtube_video_id="test_video")
178+
179+
bill_queries = [
180+
call.args[0]
181+
for call in postgres.execute_update.call_args_list
182+
if "INSERT INTO bills" in call.args[0]
183+
]
184+
assert any("ON CONFLICT (bill_number)" in query for query in bill_queries)

0 commit comments

Comments
 (0)