Sakth1 · Sakth1 · Nov 1, 2025 · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -9,3 +9,4 @@ updates:
     directory: "/" # Location of package manifests
     schedule:
       interval: "weekly"
+    target-branch: "Dev-official"
diff --git a/.gitignore b/.gitignore
@@ -1,12 +1,20 @@
-.vscode/
+# Virtual environments
 .venv/
-UI/__pycache__
-Backend/__pycache__
-trial.py
-audio/
+
+# IDE and editor settings
+.vscode/
+
+# Python cache files
+__pycache__/
+UI/__pycache__/
+Backend/__pycache__/
+Data/__pycache__/
+utils/__pycache__/
+
+# Temporary or generated files
 output/
-try/
-Data/__pycache__
-utils/__pycache__
+experiments/
+
+# Notes and documentation drafts
 notes.excalidraw
 Dev_Dairy.md
diff --git a/Backend/ScrapeChannel.py b/Backend/ScrapeChannel.py
@@ -1,11 +1,21 @@
 import scrapetube
-from pathlib import Path
-from datetime import datetime
-import urllib.request
 
 from Data.DatabaseManager import DatabaseManager
 from utils.Proxy import Proxy
 
+def download_with_proxy(url, save_path, proxy_url=None):
+    if proxy_url is None:
+        return
+
+    import requests
+    try:
+        response = requests.get(url, proxies={'http': proxy_url, 'https': proxy_url}, timeout=15.0, stream=True)
+        response.raise_for_status()
+        with open(save_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+    except Exception as e:
+        print(f"[ERROR] Failed to download {url}: {e}")
 
 class Search:
     def __init__(self, db: DatabaseManager):
@@ -15,29 +25,25 @@ def __init__(self, db: DatabaseManager):
     def search_channel(self, name: str = None, limit: int = 6):
         if not name:
             return {"None": {"title": None, "url": None}}
-
-        proxy = Proxy().get_proxy()
-        if proxy:
-            pass
-        else:
-            proxy = None
 
         self.channels = {}
         search_results = scrapetube.get_search(name, results_type="channel", limit=limit)
+        proxy_url = Proxy().get_working_proxy()
 
         for ch in search_results:
             title = ch.get("title", {}).get("simpleText")
             sub_count = ch.get("videoCountText", {}).get("accessibility", {}).get("accessibilityData", {}).get("label")
             desc = ch.get("descriptionSnippet", {}).get("runs")[0].get("text") if ch.get("descriptionSnippet") else None
             channel_id = ch.get("channelId")
             profile_url = "https:" + ch.get("thumbnail", {}).get("thumbnails")[0].get("url")
-
+                        
             try:
                 profile_save_path = rf"{self.db.profile_pic_dir}/{channel_id}.png"
-                urllib.request.urlretrieve(profile_url, profile_save_path)
-                print(f'pic saved to {profile_save_path}')
+                download_with_proxy(profile_url, profile_save_path, proxy_url)
             except Exception as e:
                 print(f"Failed to save profile picture: {e}")
+                import traceback
+                traceback.print_exc()
 
             if channel_id:
                 url = f"https://www.youtube.com/channel/{channel_id}"

diff --git a/Backend/ScrapeVideo.py b/Backend/ScrapeVideo.py
@@ -1,10 +1,25 @@
 import yt_dlp
 from datetime import datetime
 from pathlib import Path
+import os
 
 from utils.Proxy import Proxy
-from Data.DatabaseManager import DatabaseManager  # Your DB class
-
+from Data.DatabaseManager import DatabaseManager  
+
+
+def download_with_proxy(url, save_path, proxy_url=None):
+    if proxy_url is None:
+        return
+
+    import requests
+    try:
+        response = requests.get(url, proxies={'http': proxy_url, 'https': proxy_url}, timeout=15.0, stream=True)
+        response.raise_for_status()
+        with open(save_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+    except Exception as e:
+        print(f"[ERROR] Failed to download {url}: {e}")
 
 class Videos:
     def __init__(self, db: DatabaseManager):
@@ -24,12 +39,6 @@ def fetch_video_urls(self, channel_id: int, channel_url: str):
                 'quiet': True,
             }
 
-            # Choose proxy
-            proxy = Proxy().get_proxy()
-            if proxy:
-                ydl_opts['proxy'] = proxy
-                print(f"[INFO] Using proxy for videos: {proxy}")
-
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 info = ydl.extract_info(channel_url, download=False)
 
@@ -41,102 +50,47 @@ def fetch_video_urls(self, channel_id: int, channel_url: str):
 
                 for entry in entries:
                     entry_name = entry.get('title')
+                    proxy_url = Proxy().get_working_proxy()
 
                     # --- Normal Videos ---
                     if entry_name == f'{channel_name} - Videos':
-                        video_entries = entry.get('entries')
-                        for i, video_entry in enumerate(video_entries):
-                            video_id = video_entry.get('id')
-                            title = video_entry.get('title')
-                            url = video_entry.get('url')
-                            views = video_entry.get('view_count')
-                            duration = video_entry.get('duration')
-
-                            # Save JSON file for raw data
-                            file_path = self.db.save_json_file(
-                                self.db.video_dir, f"video_{video_id}", video_entry
-                            )
-
-                            # Insert into DB
-                            self.db.insert("VIDEO", {
-                                "channel_id": channel_id,
-                                "title": title,
-                                "desc": video_entry.get("description"),
-                                "duration": duration,
-                                "view_count": views,
-                                "like_count": video_entry.get("like_count"),
-                                "pub_date": video_entry.get("upload_date"),
-                                "status": "active",
-                                "created_at": datetime.now().isoformat(),
-                                "file_path": str(file_path)
-                            })
-
-                            self.videos[i] = {
-                                "title": title,
-                                "url": url,
-                                "views": views,
-                                "duration": duration
-                            }
-                            self.video_url.append(url)
-
-                    # --- Live Videos ---
-                    elif entry_name == f'{channel_name} - Live':
-                        live_entries = entry.get('entries')
-                        for i, live_entry in enumerate(live_entries):
-                            title = live_entry.get('title')
-                            url = live_entry.get('url')
-                            views = live_entry.get('view_count')
-                            duration = live_entry.get('duration')
-
-                            self.live[i] = {
-                                "title": title,
-                                "url": url,
-                                "views": views,
-                                "duration": duration
-                            }
-
-                    # --- Shorts ---
+                        video_type = 'video'
                     elif entry_name == f'{channel_name} - Shorts':
-                        shorts_entries = entry.get('entries')
-                        for i, shorts_entry in enumerate(shorts_entries):
-                            title = shorts_entry.get('title')
-                            url = shorts_entry.get('url')
-                            views = shorts_entry.get('view_count')
-                            duration = shorts_entry.get('duration')
-
-                            self.shorts[i] = {
-                                "title": title,
-                                "url": url,
-                                "views": views,
-                                "duration": duration
-                            }
-
-                # Final structured dict (for immediate use)
-                self.content = {
-                    "live": self.live,
-                    "shorts": self.shorts,
-                    "videos": self.videos,
-                    "video_url": self.video_url
-                }
-
-            return self.content
+                        video_type = 'shorts'
+                    elif entry_name == f'{channel_name} - Live':
+                        video_type = 'live'
+
+                    video_entries = entry.get('entries')
+                    for i, video_entry in enumerate(video_entries):
+                        video_id = video_entry.get('id')
+                        title = video_entry.get('title')
+                        url = video_entry.get('url')
+                        views = video_entry.get('view_count')
+                        duration = video_entry.get('duration')
+
+                        thumbnail_url = video_entry.get("thumbnails")[-1].get("url")
+                        os.makedirs(f"{self.db.thumbnail_dir}/{channel_id}", exist_ok=True)
+                        profile_save_path = rf"{self.db.thumbnail_dir}/{channel_id}/{video_id}.png"
+                        download_with_proxy(thumbnail_url, profile_save_path, proxy_url)
+
+                        # Insert into DB
+                        self.db.insert("VIDEO", {
+                            "video_id": video_id,
+                            "channel_id": channel_id,
+                            "video_type": video_type,
+                            "video_url": url,
+                            "title": title,
+                            "desc": video_entry.get("description"),
+                            "duration": duration,
+                            "view_count": views,
+                            "like_count": video_entry.get("like_count"),
+                            "pub_date": video_entry.get("upload_date"),
+                        })
+
+            return
 
         except Exception as e:
+            import traceback
+            traceback.print_exc()
             print(f"Error while fetching video URLs: {e}")
             return {}
-
-
-if __name__ == "__main__":
-    db = DatabaseManager()
-    videos = Videos(db)
-
-    # Let's say we already inserted a CHANNEL and got its id
-    channel_id = 1  
-    channel_url = "https://www.youtube.com/@mrbeast"
-
-    results = videos.fetch_video_urls(channel_id, channel_url)
-
-    print("Fetched:", results["videos"])
-    print("Saved video entries in DB:", db.fetch("VIDEO", "channel_id=?", (channel_id,)))
-
-    db.close()
diff --git a/Data/DatabaseManager.py b/Data/DatabaseManager.py
@@ -24,14 +24,16 @@ def __init__(self, base_dir: Optional[str] = None, db_name: str = "data.db"):
         self.channel_dir = self.base_dir / "Channels"
         self.profile_pic_dir = self.base_dir / "ProfilePics"
         self.transcript_dir = self.base_dir / "Transcripts"
+        self.thumbnail_dir = self.base_dir / "Thumbnails"
         self.comment_dir = self.base_dir / "Comments"
         self.proxy_dir = self.base_dir / "Proxies"
         self.video_dir = self.base_dir / "Videos"
 
         # Ensure directories exist
-        for folder in [self.db_dir, self.transcript_dir, self.comment_dir, self.proxy_dir, self.video_dir, self.channel_dir, self.profile_pic_dir]:
+        for folder in [self.db_dir, self.transcript_dir, self.comment_dir,
+                       self.proxy_dir, self.video_dir, self.channel_dir,
+                       self.profile_pic_dir, self.thumbnail_dir]:
             folder.mkdir(parents=True, exist_ok=True)
-            print(f"Created directory: {folder}")
 
         # Thread-local storage for database connections
         self._local = threading.local()
@@ -61,7 +63,9 @@ def _create_tables(self):
 
         CREATE TABLE IF NOT EXISTS VIDEO (
             video_id TEXT PRIMARY KEY,
-            channel_id TEXT,          
+            channel_id TEXT,
+            video_type TEXT,
+            video_url TEXT,
             title TEXT,
             desc TEXT,
             duration TEXT,
@@ -97,10 +101,33 @@ def insert(self, table: str, data: Dict[str, Any]) -> int:
         values = tuple(data.values())
         query = f"INSERT INTO {table} ({keys}) VALUES ({placeholders})"
         cursor = conn.cursor()
-        cursor.execute(f"SELECT * FROM {table}")
-        cursor.execute(query, values)
-        conn.commit()
-        return cursor.lastrowid
+
+        try:
+            cursor.execute(query, values)
+            conn.commit()
+            return cursor.lastrowid
+        except sqlite3.IntegrityError as e:
+            # If it's a UNIQUE constraint error, try updating instead
+            if "UNIQUE constraint failed" in str(e):
+                # Extract the primary key column name from the error or table
+                if table == "VIDEO":
+                    pk_column = "video_id"
+                elif table == "CHANNEL":
+                    pk_column = "channel_id"
+                else:
+                    # For other tables with auto-increment primary keys, re-raise
+                    raise
+
+                # Get the primary key value from data
+                if pk_column in data:
+                    pk_value = data[pk_column]
+                    # Update instead of insert
+                    update_data = {k: v for k, v in data.items() if k != pk_column}
+                    return self.update(table, update_data, f"{pk_column}=?", (pk_value,))
+                else:
+                    raise
+            else:
+                raise
 
     def fetch(self, table: str, where: Optional[str] = None, params: Tuple = ()) -> List[Dict[str, Any]]:
         conn = self._get_connection()