Skip to content

Commit 41955d8

Browse files
authored
Merge pull request #485 from sudoleg/copilot/fix-video-indexing-issue
Fix UNIQUE constraint violation when indexing videos already saved to library
2 parents 6dd3eaa + e1fe164 commit 41955d8

File tree

5 files changed

+187
-10
lines changed

5 files changed

+187
-10
lines changed

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ docker-compose.yml
2424
Dockerfile
2525
.github/
2626
scripts/
27+
.env

modules/persistance.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from datetime import datetime
23
from typing import Literal
34

45
from peewee import (
@@ -62,6 +63,40 @@ class Transcript(BaseModel):
6263
chroma_collection_name = CharField(null=True)
6364

6465

66+
def get_or_create_video(
67+
yt_video_id: str, link: str, title: str, channel: str, saved_on: datetime
68+
):
69+
"""Gets an existing video or creates a new one if it doesn't exist.
70+
71+
Args:
72+
yt_video_id (str): The YouTube video ID.
73+
link (str): The URL of the video.
74+
title (str): The title of the video.
75+
channel (str): The channel name.
76+
saved_on: The timestamp when the video was saved.
77+
78+
Returns:
79+
tuple: A tuple containing (Video instance, created boolean).
80+
created is True if a new video was created, False if it already existed.
81+
"""
82+
try:
83+
video = Video.get(Video.yt_video_id == yt_video_id)
84+
logging.info(
85+
"Video with yt_video_id '%s' already exists in database.", yt_video_id
86+
)
87+
return video, False
88+
except Video.DoesNotExist:
89+
video = Video.create(
90+
yt_video_id=yt_video_id,
91+
link=link,
92+
title=title,
93+
channel=channel,
94+
saved_on=saved_on,
95+
)
96+
logging.info("Created new video entry for yt_video_id '%s'.", yt_video_id)
97+
return video, True
98+
99+
65100
def delete_video(
66101
video_title: str,
67102
):

pages/chat.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
Transcript,
2929
Video,
3030
delete_video,
31+
get_or_create_video,
3132
save_library_entry,
3233
)
3334
from modules.rag import (
@@ -184,6 +185,7 @@ def save_response_to_lib():
184185
# --- initialize models ---
185186
if provider_is_openai:
186187
chat_model = ChatOpenAI(
188+
name=st.session_state.model,
187189
api_key=st.session_state.openai_api_key,
188190
temperature=st.session_state.temperature,
189191
model=st.session_state.model,
@@ -196,8 +198,8 @@ def save_response_to_lib():
196198
)
197199
else:
198200
chat_model = ChatOllama(
199-
model=st.session_state.model,
200201
name=st.session_state.model,
202+
model=st.session_state.model,
201203
temperature=st.session_state.temperature,
202204
top_p=st.session_state.top_p,
203205
)
@@ -303,7 +305,7 @@ def save_response_to_lib():
303305
)
304306

305307
# 1. save video in the database
306-
saved_video = Video.create(
308+
saved_video, created = get_or_create_video(
307309
yt_video_id=extract_youtube_video_id(url_input),
308310
link=url_input,
309311
title=video_metadata["name"],
@@ -314,15 +316,15 @@ def save_response_to_lib():
314316
original_transcript = fetch_youtube_transcript(url_input)
315317

316318
# 3. save transcript, or more precisely, information about it, in the database
317-
model_for_count = (
318-
chat_model.model_name
319-
if provider_is_openai
320-
else st.session_state.model
321-
)
322319
saved_transcript = Transcript.create(
323320
video=saved_video,
324321
original_token_num=num_tokens_from_string(
325-
string=original_transcript, model=model_for_count
322+
string=original_transcript,
323+
model=(
324+
chat_model.model_name
325+
if provider_is_openai
326+
else st.session_state.model
327+
),
326328
),
327329
)
328330

pages/summary.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@
1212
is_api_key_valid,
1313
is_ollama_available,
1414
)
15-
from modules.persistance import SQL_DB, LibraryEntry, Video, save_library_entry
15+
from modules.persistance import (
16+
SQL_DB,
17+
LibraryEntry,
18+
Video,
19+
get_or_create_video,
20+
save_library_entry,
21+
)
1622
from modules.summary import TranscriptTooLongForModelException, get_transcript_summary
1723
from modules.ui import (
1824
GENERAL_ERROR_MESSAGE,
@@ -57,7 +63,7 @@ def display_dialog(message: str):
5763
def save_summary_to_lib():
5864
"""Wrapper func for saving summaries to the library."""
5965
try:
60-
saved_video = Video.create(
66+
saved_video, created = get_or_create_video(
6167
yt_video_id=extract_youtube_video_id(url_input),
6268
link=url_input,
6369
title=vid_metadata["name"],

tests/test_persistance.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
from datetime import datetime as dt
2+
3+
import pytest
4+
from peewee import SqliteDatabase
5+
6+
from modules.persistance import (
7+
LibraryEntry,
8+
Transcript,
9+
Video,
10+
get_or_create_video,
11+
save_library_entry,
12+
)
13+
14+
# Use an in-memory database for testing
15+
test_db = SqliteDatabase(":memory:")
16+
17+
18+
@pytest.fixture
19+
def setup_test_db():
20+
"""Set up a test database before each test."""
21+
# Bind models to test database
22+
test_db.bind([Video, Transcript, LibraryEntry])
23+
test_db.connect()
24+
test_db.create_tables([Video, Transcript, LibraryEntry])
25+
26+
yield test_db
27+
28+
# Clean up after test
29+
test_db.drop_tables([Video, Transcript, LibraryEntry])
30+
test_db.close()
31+
32+
33+
def test_get_or_create_video_creates_new(setup_test_db):
34+
"""Test that get_or_create_video creates a new video when it doesn't exist."""
35+
yt_video_id = "test_video_123"
36+
link = "https://www.youtube.com/watch?v=test_video_123"
37+
title = "Test Video"
38+
channel = "Test Channel"
39+
saved_on = dt.now()
40+
41+
video, created = get_or_create_video(
42+
yt_video_id=yt_video_id,
43+
link=link,
44+
title=title,
45+
channel=channel,
46+
saved_on=saved_on,
47+
)
48+
49+
assert created is True
50+
assert video.yt_video_id == yt_video_id
51+
assert video.title == title
52+
assert video.link == link
53+
assert video.channel == channel
54+
55+
56+
def test_get_or_create_video_gets_existing(setup_test_db):
57+
"""Test that get_or_create_video returns existing video instead of creating duplicate."""
58+
yt_video_id = "test_video_456"
59+
link = "https://www.youtube.com/watch?v=test_video_456"
60+
title = "Test Video 2"
61+
channel = "Test Channel 2"
62+
saved_on = dt.now()
63+
64+
# Create video first time
65+
video1, created1 = get_or_create_video(
66+
yt_video_id=yt_video_id,
67+
link=link,
68+
title=title,
69+
channel=channel,
70+
saved_on=saved_on,
71+
)
72+
73+
assert created1 is True
74+
75+
# Try to create same video again
76+
video2, created2 = get_or_create_video(
77+
yt_video_id=yt_video_id,
78+
link=link,
79+
title=title,
80+
channel=channel,
81+
saved_on=saved_on,
82+
)
83+
84+
assert created2 is False
85+
assert video1.id == video2.id
86+
assert video1.yt_video_id == video2.yt_video_id
87+
88+
89+
def test_save_summary_with_existing_video(setup_test_db):
90+
"""Test that saving a summary works when video already exists."""
91+
yt_video_id = "test_video_789"
92+
link = "https://www.youtube.com/watch?v=test_video_789"
93+
title = "Test Video 3"
94+
channel = "Test Channel 3"
95+
saved_on = dt.now()
96+
97+
# Create video first (simulating summary.py saving a video)
98+
video, created = get_or_create_video(
99+
yt_video_id=yt_video_id,
100+
link=link,
101+
title=title,
102+
channel=channel,
103+
saved_on=saved_on,
104+
)
105+
106+
# Save a summary
107+
save_library_entry(
108+
entry_type="S",
109+
question_text=None,
110+
response_text="This is a test summary",
111+
video=video,
112+
)
113+
114+
# Now try to get the same video again (simulating chat.py)
115+
video2, created2 = get_or_create_video(
116+
yt_video_id=yt_video_id,
117+
link=link,
118+
title=title,
119+
channel=channel,
120+
saved_on=saved_on,
121+
)
122+
123+
# Should not create a new video
124+
assert created2 is False
125+
assert video.id == video2.id
126+
127+
# Verify we can create a transcript for this video
128+
transcript = Transcript.create(
129+
video=video2,
130+
original_token_num=1000,
131+
)
132+
133+
assert transcript.video.id == video.id

0 commit comments

Comments
 (0)