Skip to content

Commit a4c37f3

Browse files
authored
Merge pull request #126 from sudoleg/develop
introduce library
2 parents c89fde7 + b68f355 commit a4c37f3

File tree

7 files changed

+192
-16
lines changed

7 files changed

+192
-16
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ transcript_audios/
1717
data/audio/*.mp3
1818
data/transcripts/*.txt
1919
scripts/tokens.ipynb
20+
scripts/persistance.ipynb
2021

2122
# Byte-compiled / optimized / DLL files
2223
__pycache__/

modules/helpers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def is_api_key_set() -> bool:
1717
return False
1818

1919

20+
@st.cache_data
2021
def is_api_key_valid(api_key: str):
2122
"""
2223
Checks the validity of an OpenAI API key.
@@ -28,10 +29,6 @@ def is_api_key_valid(api_key: str):
2829
bool: True if the API key is valid, False if the API key is invalid.
2930
"""
3031

31-
api_key_valid = os.getenv("OPENAI_API_KEY_VALID")
32-
if api_key_valid:
33-
return True
34-
3532
openai.api_key = api_key
3633
try:
3734
openai.models.list()
@@ -49,7 +46,6 @@ def is_api_key_valid(api_key: str):
4946
return False
5047
else:
5148
logging.info("API key validation successful")
52-
os.environ["OPENAI_API_KEY_VALID"] = "yes"
5349
return True
5450

5551

modules/persistance.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from typing import Literal
23

34
from peewee import (
45
BooleanField,
@@ -8,6 +9,7 @@
89
IntegerField,
910
Model,
1011
SqliteDatabase,
12+
TextField,
1113
UUIDField,
1214
)
1315

@@ -77,3 +79,43 @@ def delete_video(
7779
video.yt_video_id,
7880
str(e),
7981
)
82+
83+
84+
class LibraryEntry(BaseModel):
85+
"""Model for saved responses and summaries. Represents a table in a relational SQL database."""
86+
87+
ENTRY_TYPE_CHOICES = (
88+
("S", "Summary"),
89+
("A", "Answer"),
90+
)
91+
92+
entry_type = CharField(max_length=1, choices=ENTRY_TYPE_CHOICES)
93+
video = ForeignKeyField(Video, backref="lib_entries")
94+
question = TextField(null=True)
95+
text = TextField(null=False)
96+
97+
98+
def save_library_entry(
99+
entry_type: Literal["S", "A"], question_text: str, response_text: str, video: Video
100+
):
101+
"""Saves a summary or answer entry to the library.
102+
103+
Args:
104+
entry_type (str): Type of entry to save, "S" for summary or "A" for answer.
105+
question_text (str): Text of the question (used only if entry_type is "A").
106+
response_text (str): Text of the response or summary.
107+
video (Video): The video object associated with the entry.
108+
"""
109+
if entry_type == "S":
110+
LibraryEntry.create(entry_type="S", video=video, text=response_text)
111+
else:
112+
LibraryEntry.create(
113+
entry_type="A", video=video, question=question_text, text=response_text
114+
)
115+
logging.info("Saved library entry for video '%s'", video.title)
116+
117+
118+
def delete_library_entry(lib_entry: LibraryEntry):
119+
"""Deletes a library entry."""
120+
LibraryEntry.delete_by_id(lib_entry)
121+
logging.info("Deleted library entry for video '%s'", lib_entry.video.title)

modules/ui.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,4 @@ def display_nav_menu():
127127
st.sidebar.page_link(page="main.py", label="Home")
128128
st.sidebar.page_link(page="pages/summary.py", label="Summary")
129129
st.sidebar.page_link(page="pages/chat.py", label="Chat")
130+
st.sidebar.page_link(page="pages/library.py", label="Library")

pages/chat.py

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,14 @@
1919
read_file,
2020
save_response_as_file,
2121
)
22-
from modules.persistance import SQL_DB, Transcript, Video, delete_video
22+
from modules.persistance import (
23+
SQL_DB,
24+
LibraryEntry,
25+
Transcript,
26+
Video,
27+
delete_video,
28+
save_library_entry,
29+
)
2330
from modules.rag import (
2431
CHUNK_SIZE_TO_K_MAPPING,
2532
embed_excerpts,
@@ -60,7 +67,7 @@
6067
# --- SQLite stuff ---
6168
SQL_DB.connect(reuse_if_open=True)
6269
# create tables if they don't already exist
63-
SQL_DB.create_tables([Video, Transcript], safe=True)
70+
SQL_DB.create_tables([Video, Transcript, LibraryEntry], safe=True)
6471
# --- end ---
6572

6673
# --- Chroma ---
@@ -95,6 +102,26 @@ def refresh_page(message: str):
95102
st.rerun()
96103

97104

105+
# variable for holding the Video object
106+
saved_video: None | Video = None
107+
108+
109+
def save_response_to_lib():
110+
"""Wrapper func for saving responses to the library."""
111+
try:
112+
save_library_entry(
113+
entry_type="A",
114+
question_text=st.session_state.user_prompt,
115+
response_text=st.session_state.response,
116+
video=saved_video,
117+
)
118+
except Exception as e:
119+
st.error("Saving failed! If you are a developer, see logs for details!")
120+
logging.error("Error when saving library entry: %s", e)
121+
else:
122+
st.success("Saved answer to library successfully!")
123+
124+
98125
if (
99126
is_api_key_set()
100127
and is_api_key_valid(st.session_state.openai_api_key)
@@ -129,7 +156,7 @@ def refresh_page(message: str):
129156
)
130157
# --- end ---
131158

132-
# fetch saved videos from SQLite
159+
# fetch all saved videos from SQLite
133160
saved_videos = Video.select()
134161

135162
# create columns
@@ -139,7 +166,10 @@ def refresh_page(message: str):
139166
selected_video_title = st.selectbox(
140167
label="Select from already processed videos",
141168
placeholder="Choose a video",
142-
options=[video.title for video in saved_videos],
169+
# only videos with an associated transcript can be selected
170+
options=[
171+
video.title for video in saved_videos if video.transcripts.count() != 0
172+
],
143173
index=None,
144174
key="selected_video",
145175
help=get_default_config_value("help_texts.selected_video"),
@@ -148,7 +178,6 @@ def refresh_page(message: str):
148178
label="Or enter the URL of a new video:", disabled=is_video_selected()
149179
)
150180

151-
saved_video = None
152181
if is_video_selected():
153182
saved_video = Video.get(Video.title == selected_video_title)
154183

@@ -218,16 +247,18 @@ def refresh_page(message: str):
218247
url=url_input,
219248
)
220249

221-
# 1. fetch transcript from youtube
250+
# 1. save video in the database
222251
saved_video = Video.create(
223252
yt_video_id=extract_youtube_video_id(url_input),
224253
link=url_input,
225254
title=video_metadata["name"],
226255
channel=video_metadata["channel"],
227256
saved_on=dt.now(),
228257
)
258+
# 2. fetch transcript from youtube
229259
original_transcript = fetch_youtube_transcript(url_input)
230260

261+
# 3. save transcript, ormore precisely, information about it, in the database
231262
saved_transcript = Transcript.create(
232263
video=saved_video,
233264
original_token_num=num_tokens_from_string(
@@ -236,6 +267,7 @@ def refresh_page(message: str):
236267
),
237268
)
238269

270+
# 4. get an already existing or create a new collection in ChromaDB
239271
collection = chroma_client.get_or_create_collection(
240272
name=randomname.get_name(),
241273
metadata={
@@ -245,7 +277,7 @@ def refresh_page(message: str):
245277
},
246278
)
247279

248-
# 2. create excerpts. Either
280+
# 5. create excerpts. Either
249281
# - from original transcript
250282
# - or from whisper transcription if transcription checkbox is checked
251283
if transcription_checkbox:
@@ -271,7 +303,7 @@ def refresh_page(message: str):
271303
len_func="tokens",
272304
)
273305

274-
# 3. embed/index transcript excerpts
306+
# 6. embed/index transcript excerpts
275307
Transcript.update(
276308
{
277309
Transcript.preprocessed: transcription_checkbox,
@@ -322,10 +354,10 @@ def refresh_page(message: str):
322354

323355
prompt = st.chat_input(
324356
placeholder="Ask a question or provide a topic covered in the video",
325-
key="user_prompt",
326357
)
327358

328359
if prompt:
360+
st.session_state.user_prompt = prompt
329361
with st.spinner("Generating answer..."):
330362
try:
331363
relevant_docs = find_relevant_documents(
@@ -340,13 +372,19 @@ def refresh_page(message: str):
340372
llm=openai_chat_model,
341373
relevant_docs=relevant_docs,
342374
)
375+
st.session_state.response = response
343376
except Exception as e:
344377
logging.error(
345378
"An unexpected error occurred: %s", str(e), exc_info=True
346379
)
347380
st.error(GENERAL_ERROR_MESSAGE)
348381
else:
349-
st.write(response)
382+
st.write(st.session_state.response)
383+
st.button(
384+
label="Save this response to your library",
385+
on_click=save_response_to_lib,
386+
help="Unfortunately, the response disappears in this view after saving it to the library. However, it will be visible on the 'Library' page!",
387+
)
350388
with st.expander(
351389
label="Show chunks retrieved from index and provided to the model as context"
352390
):

pages/library.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import streamlit as st
2+
3+
from modules.persistance import SQL_DB, LibraryEntry, delete_library_entry
4+
from modules.ui import display_nav_menu
5+
6+
7+
st.set_page_config("Library", layout="wide", initial_sidebar_state="auto")
8+
display_nav_menu()
9+
10+
# --- SQLite stuff ---
11+
SQL_DB.connect(reuse_if_open=True)
12+
# create tables if they don't already exist
13+
SQL_DB.create_tables([LibraryEntry], safe=True)
14+
# --- end ---
15+
16+
saved_lib_entries_summaries = (
17+
LibraryEntry.select().where(LibraryEntry.entry_type == "S").execute()
18+
)
19+
saved_lib_entries_answers = (
20+
LibraryEntry.select().where(LibraryEntry.entry_type == "A").execute()
21+
)
22+
23+
24+
def execute_entry_deletion(entry: LibraryEntry):
25+
"""Wrapper func for deleting a library entry."""
26+
delete_library_entry(entry)
27+
st.rerun()
28+
29+
30+
if saved_lib_entries_summaries:
31+
st.header("Saved summaries")
32+
for i, entry in enumerate(saved_lib_entries_summaries, 0):
33+
st.caption(f"{entry.video.title} - {entry.video.channel}")
34+
with st.expander("Show"):
35+
st.write(entry.text)
36+
if st.button(
37+
label="Delete entry",
38+
key=f"delete_summary_{i}",
39+
):
40+
execute_entry_deletion(entry)
41+
st.divider()
42+
else:
43+
st.info("You don't have any saved summaries yet!")
44+
45+
if saved_lib_entries_answers:
46+
st.header("Saved answers")
47+
for j, entry in enumerate(saved_lib_entries_answers, 0):
48+
st.subheader(entry.question)
49+
with st.expander("Show"):
50+
st.write(entry.text)
51+
if st.button(
52+
label="Delete entry",
53+
key=f"delete_answer_{j}",
54+
):
55+
execute_entry_deletion(entry)
56+
st.divider()
57+
else:
58+
st.info("You don't have any saved answers yet!")

0 commit comments

Comments
 (0)