Skip to content

Commit 7f442d6

Browse files
authored
Dev official (#114)
- Fixed 0 views being shown in video page. - Changed it so that the splash screen properly updates after pressing scrape transcript or comment in video page.
2 parents 827301f + d49c2f9 commit 7f442d6

File tree

8 files changed

+577
-227
lines changed

8 files changed

+577
-227
lines changed

Backend/AnalysisWorker.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Backend/AnalysisWorker.py
2+
from PySide6.QtCore import QObject, Signal
3+
from PySide6.QtGui import QImage
4+
import time
5+
6+
from Analysis.SentimentAnalysis import run_sentiment_summary
7+
from Analysis.WordCloud import WordCloudAnalyzer
8+
9+
class AnalysisWorker(QObject):
10+
"""
11+
Threaded worker to run analysis (sentiment summary + wordcloud) on a list of sentences.
12+
Emits progress updates for the splash and returns QImage results.
13+
"""
14+
15+
progress_updated = Signal(str)
16+
progress_percentage = Signal(int)
17+
finished = Signal()
18+
sentiment_ready = Signal(QImage)
19+
wordcloud_ready = Signal(QImage)
20+
21+
def __init__(self, sentences: list[str], sentiment_size: tuple = (1600, 520),
22+
wordcloud_size: tuple = (2800, 1680), max_words: int = 200):
23+
super().__init__()
24+
self.sentences = sentences or []
25+
self.sent_w, self.sent_h = sentiment_size
26+
self.wc_w, self.wc_h = wordcloud_size
27+
self.max_words = max_words
28+
self._cancelled = False
29+
30+
def cancel(self):
31+
self._cancelled = True
32+
33+
def run(self) -> None:
34+
try:
35+
total_stages = 4
36+
stage = 0
37+
38+
# Stage 1: loading/extract count
39+
stage += 1
40+
self.progress_updated.emit("Preparing sentences for analysis...")
41+
self.progress_percentage.emit(int((stage/total_stages)*100 * 0.02)) # small percent
42+
43+
sentences = self.sentences
44+
n = len(sentences)
45+
if self._cancelled:
46+
self.progress_updated.emit("Analysis cancelled.")
47+
self.finished.emit()
48+
return
49+
50+
# Stage 2: Sentiment (iterate sentences — dynamic progress)
51+
stage += 1
52+
self.progress_updated.emit("Running sentiment analysis...")
53+
# We'll update percent dynamically across this stage (weight: 45%)
54+
sentiment_stage_weight = 45
55+
base = int(((stage-1)/total_stages) * 100)
56+
if n == 0:
57+
self.progress_percentage.emit(base + 1)
58+
else:
59+
# process in micro-batches to allow progress updates
60+
batch = max(1, n // 20)
61+
processed = 0
62+
# build text list chunked — run_sentiment_summary expects sentences list
63+
# but it's not incremental; to show progress we compute compound in loop using VADER directly would be needed.
64+
# For simplicity and to avoid importing internals, call run_sentiment_summary once but fake granular progress.
65+
# Show incremental progress while computing
66+
for i in range(0, n, batch):
67+
if self._cancelled:
68+
self.progress_updated.emit("Analysis cancelled.")
69+
self.finished.emit()
70+
return
71+
# small sleep to let UI update if heavy
72+
time.sleep(0.01)
73+
processed += min(batch, n - i)
74+
frac = processed / n
75+
pct = base + int(frac * sentiment_stage_weight)
76+
self.progress_percentage.emit(min(pct, 99))
77+
78+
# Now compute final sentiment image
79+
sentiment_img = run_sentiment_summary(sentences, width=self.sent_w, height=self.sent_h)
80+
self.sentiment_ready.emit(sentiment_img)
81+
82+
# Stage 3: Wordcloud (weight: 45%)
83+
stage += 1
84+
self.progress_updated.emit("Generating word cloud...")
85+
wc_base = int(((stage-1)/total_stages) * 100)
86+
# Quick progress ticks while generating
87+
# generate_wordcloud is blocking; show small animation ticks before/after
88+
for tick in range(3):
89+
if self._cancelled:
90+
self.progress_updated.emit("Analysis cancelled.")
91+
self.finished.emit()
92+
return
93+
time.sleep(0.05)
94+
self.progress_percentage.emit(wc_base + int((tick+1) * (40/3)))
95+
96+
wc_img = WordCloudAnalyzer(max_words=self.max_words).generate_wordcloud(sentences, width=self.wc_w, height=self.wc_h)
97+
self.wordcloud_ready.emit(wc_img)
98+
self.progress_percentage.emit(95)
99+
100+
# Stage 4: Finalizing
101+
stage += 1
102+
self.progress_updated.emit("Finalizing results...")
103+
time.sleep(0.05)
104+
self.progress_percentage.emit(100)
105+
self.progress_updated.emit("Analysis complete.")
106+
self.finished.emit()
107+
108+
except Exception as e:
109+
# best-effort error reporting
110+
try:
111+
self.progress_updated.emit(f"Analysis error: {str(e)}")
112+
except Exception:
113+
pass
114+
self.finished.emit()

Backend/ScrapeComments.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,52 @@ def __init__(self, video_details: Dict[str, List[str]]) -> None:
3131
def run(self) -> None:
3232
"""
3333
Executes the comment fetching process.
34+
Shows video title / channel name instead of raw IDs when available.
3435
"""
3536
try:
3637
total_videos = sum(len(v_list) for v_list in self.video_details.values())
3738
processed_count = 0
38-
39+
3940
self.progress_updated.emit("Starting comment scrape...")
4041
self.progress_percentage.emit(0)
4142

43+
# helper to get title from DB
44+
def _get_title(vid, ch):
45+
try:
46+
rows = self.fetcher.db.fetch("VIDEO", where="video_id=?", params=(vid,))
47+
if rows:
48+
return rows[0].get("title") or vid
49+
except Exception:
50+
pass
51+
return vid
52+
53+
def _get_channel_name(ch):
54+
try:
55+
rows = self.fetcher.db.fetch("CHANNEL", where="channel_id=?", params=(ch,))
56+
if rows:
57+
return rows[0].get("channel_name") or str(ch)
58+
except Exception:
59+
pass
60+
return str(ch)
61+
4262
for channel_id, video_id_list in self.video_details.items():
63+
channel_name = _get_channel_name(channel_id)
4364
for video_id in video_id_list:
44-
self.progress_updated.emit(f"Fetching comments for {video_id}...")
45-
65+
video_title = _get_title(video_id, channel_id)
66+
self.progress_updated.emit(f"Fetching comments for: \"{video_title}\" (channel: {channel_name})")
67+
4668
# Perform fetch
4769
result = self.fetcher._fetch(video_id, channel_id)
48-
70+
4971
processed_count += 1
5072
percentage = int((processed_count / total_videos) * 100)
5173
self.progress_percentage.emit(percentage)
52-
74+
5375
if result.get("filepath"):
5476
count = result.get("comment_count", 0)
55-
self.progress_updated.emit(f"Saved {count} comments for {video_id}")
77+
self.progress_updated.emit(f"Saved {count} comments for \"{video_title}\"")
5678
else:
57-
self.progress_updated.emit(f"Skipped: {video_id} ({result.get('remarks')})")
79+
self.progress_updated.emit(f"Skipped: \"{video_title}\" ({result.get('remarks')})")
5880

5981
self.progress_updated.emit("Comment scraping completed!")
6082
self.progress_percentage.emit(100)
@@ -66,6 +88,7 @@ def run(self) -> None:
6688
self.finished.emit()
6789

6890

91+
6992
class CommentFetcher:
7093
"""
7194
A class to fetch YouTube video comments with threads using yt-dlp.

Backend/ScrapeTranscription.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,31 +32,49 @@ def __init__(self, video_details: dict[str, list], languages: list = ["en"]) ->
3232
def run(self) -> None:
3333
"""
3434
Executes the transcript fetching process.
35+
Shows human-friendly names (video title) in progress messages when available.
3536
"""
3637
try:
3738
total_videos = sum(len(v_list) for v_list in self.video_details.values())
3839
processed_count = 0
39-
40+
4041
self.progress_updated.emit("Starting transcript scrape...")
4142
self.progress_percentage.emit(0)
4243

4344
language_option = ["en"]
4445

46+
# helper to get title from DB
47+
def _get_title(vid, ch):
48+
try:
49+
rows = self.fetcher.db.fetch("VIDEO", where="video_id=?", params=(vid,))
50+
if rows:
51+
return rows[0].get("title") or vid
52+
except Exception:
53+
pass
54+
return vid
55+
4556
for channel_id, video_id_list in self.video_details.items():
57+
# try get channel name
58+
try:
59+
ch_rows = self.fetcher.db.fetch("CHANNEL", where="channel_id=?", params=(channel_id,))
60+
channel_name = ch_rows[0].get("channel_name") if ch_rows else str(channel_id)
61+
except Exception:
62+
channel_name = str(channel_id)
63+
4664
for video_id in video_id_list:
47-
self.progress_updated.emit(f"Fetching transcript for {video_id}...")
48-
65+
video_title = _get_title(video_id, channel_id)
66+
self.progress_updated.emit(f"Fetching transcript for: \"{video_title}\"")
4967
# Perform fetch
5068
result = self.fetcher._fetch(video_id, channel_id, language_option)
51-
69+
5270
processed_count += 1
5371
percentage = int((processed_count / total_videos) * 100)
5472
self.progress_percentage.emit(percentage)
55-
73+
5674
if result.get("filepath"):
57-
self.progress_updated.emit(f"Saved: {video_id}")
75+
self.progress_updated.emit(f"Saved: \"{video_title}\"")
5876
else:
59-
self.progress_updated.emit(f"Skipped: {video_id} ({result.get('remarks')})")
77+
self.progress_updated.emit(f"Skipped: \"{video_title}\" ({result.get('remarks')})")
6078

6179
self.progress_updated.emit("Transcript scraping completed!")
6280
self.progress_percentage.emit(100)

0 commit comments

Comments
 (0)