sleeper · sleeper · Jul 21, 2025 · Jul 21, 2025
diff --git a/recipe-extractor.py b/recipe-extractor.py
@@ -1,123 +1,34 @@
-import yt_dlp
-import openai
 import os
 import sys
 import argparse
 import json
 from http.server import BaseHTTPRequestHandler, HTTPServer
-from urllib.parse import urlparse, parse_qs
+from urllib.parse import parse_qs, urlparse
 from dotenv import load_dotenv
 
-try:
-    from youtube_transcript_api import YouTubeTranscriptApi
-except Exception:  # pragma: no cover - optional dep may not be installed
-    YouTubeTranscriptApi = None
+import openai
+import yt_dlp
+
+from video_transcripts import (
+    is_youtube_url,
+    download_audio_with_ytdlp,
+    fetch_video_info,
+    get_youtube_transcript,
+    get_post_text,
+    get_caption_languages,
+    transcribe_whisper,
+    extract_video_transcript,
+    AUDIO_FILE,
+)
+
 load_dotenv()
 
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 if not OPENAI_API_KEY:
     print("Error: OPENAI_API_KEY not set")
     sys.exit(1)
-AUDIO_FILE = "audio.mp3"
-
-
-def is_youtube_url(url: str) -> bool:
-    """Return True if the URL points to YouTube."""
-    host = urlparse(url).netloc.lower()
-    return "youtube.com" in host or "youtu.be" in host
-
-def download_audio_with_ytdlp(url, out_file=AUDIO_FILE):
-    # Remove extension from out_file since FFmpegExtractAudio will add it
-    base_name = out_file.rsplit('.', 1)[0] if '.' in out_file else out_file
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'outtmpl': base_name,
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'quiet': False,
-        'noplaylist': True
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        ydl.download([url])
-
-def fetch_video_info(url):
-    """Return video metadata without downloading the file."""
-    with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
-        return ydl.extract_info(url, download=False)
-
-def get_youtube_transcript(video_id, languages=None):
-    """Fetch transcript text from YouTube if available."""
-    if not YouTubeTranscriptApi:
-        print("⚠️  youtube-transcript-api not installed; skipping transcript fetch")
-        return None
-
-    ytt_api = YouTubeTranscriptApi()
-    try:
-        transcript_list = ytt_api.list(video_id)
-    except Exception as e:  # pragma: no cover - network dependent
-        print(f"⚠️  Could not list transcripts: {e}")
-        return None
-
-    languages = list(languages or [])
-
-    def fetch_text(transcript):
-        try:
-            segments = transcript.fetch()
-        except Exception as e:
-            print("⚠️ Issue while getting transcripts: ", e)
-            return None
-        return " ".join(seg.text for seg in segments)
-
-    # First try preferred languages
-    for lang in languages:
-        try:
-            t = transcript_list.find_transcript([lang])
-        except Exception:
-            t = None
-        if t:
-            text = fetch_text(t)
-            if text:
-                return text
-
-    # Fall back to the first available transcript
-    for t in transcript_list:
-        text = fetch_text(t)
-        if text:
-            return text
-
-    return None
-
-def get_post_text(info):
-    """Return video description or caption."""
-    for key in ("description", "caption", "summary"):
-        text = info.get(key)
-        if text:
-            return text
-    return ""
-
-def get_caption_languages(info):
-    """Return list of caption language codes from video metadata."""
-    languages = []
-    for key in ("subtitles", "automatic_captions"):
-        for lang in info.get(key, {}):
-            if lang not in languages:
-                languages.append(lang)
-    if info.get("language") and info["language"] not in languages:
-        languages.append(info["language"])
-    return languages
-
-def transcribe_whisper(file_path):
-    openai.api_key = OPENAI_API_KEY
-    with open(file_path, "rb") as audio_file:
-        transcript = openai.audio.transcriptions.create(
-            # model="whisper-1",
-            model="gpt-4o-mini-transcribe",
-            file=audio_file
-        )
-    return transcript.text
+
+
 
 def extract_recipe_with_gpt(transcript, language="english"):
     openai.api_key = OPENAI_API_KEY
@@ -252,23 +163,10 @@ def extract_recipe(url, language="english", output_format="json", save_transcrip
     """High-level helper to extract a recipe from a URL and return it as a string."""
     print(f"🎯 Extracting from URL: {url}")
 
-    print("⬇️  Downloading audio...")
-    download_audio_with_ytdlp(url)
-
-    print("🎙️  Transcribing audio...")
-    transcript = transcribe_whisper(AUDIO_FILE)
-
-    if save_transcript:
-        with open(save_transcript, "w", encoding="utf-8") as f:
-            f.write(transcript)
-
-    try:
-        os.remove(AUDIO_FILE)
-    except OSError:
-        pass
+    combined = extract_video_transcript(url, save_transcript=save_transcript)
 
     print(f"🤖 Extracting recipe using AI (language: {language})...")
-    structured_recipe = extract_recipe_with_gpt(transcript, language)
+    structured_recipe = extract_recipe_with_gpt(combined, language)
 
     if output_format == "markdown":
         return convert_to_markdown(structured_recipe, language)
@@ -410,39 +308,9 @@ def main():
     print(f"💾 Output: {args.output or 'structured_recipe'}")
     print()
 
-    info = fetch_video_info(args.url)
-    post_text = get_post_text(info)
-
-    transcript = None
-    if is_youtube_url(args.url):
-        caption_langs = get_caption_languages(info)
-        transcript = get_youtube_transcript(info.get('id'), caption_langs)
-        if transcript:
-            print("📝 Using existing YouTube transcript")
-
-    if not transcript:
-        print("⬇️  Downloading audio...")
-        download_audio_with_ytdlp(args.url)
-
-        print("🎙️  Transcribing audio...")
-        transcript = transcribe_whisper(AUDIO_FILE)
-        print(f"📏 Transcription length: {len(transcript)} characters")
-
-    combined = (post_text + "\n\n" + transcript).strip()
-
-    # Save transcription if requested
-    if args.save_transcript:
-        with open(args.save_transcript, "w", encoding="utf-8") as f:
-            f.write(transcript)
-        print(f"📝 Transcription saved to {args.save_transcript} for review")
-    print()
-
-    # Clean up audio file after transcription
-    try:
-        os.remove(AUDIO_FILE)
-        print("🧹 Audio file cleaned up.")
-    except OSError:
-        print("⚠️  Warning: Could not delete audio file.")
+    combined = extract_video_transcript(
+        args.url, save_transcript=args.save_transcript
+    )
 
     print(f"🤖 Extracting recipe using AI (language: {args.language})...")
     structured_recipe = extract_recipe_with_gpt(combined, args.language)

diff --git a/tests/test_transcript_logic.py b/tests/test_transcript_logic.py
@@ -16,6 +16,7 @@
 )
 recipe_extractor = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(recipe_extractor)
+video_transcripts = sys.modules["video_transcripts"]
 
 def test_is_youtube_url_detection():
     assert recipe_extractor.is_youtube_url("https://www.youtube.com/watch?v=abc")
@@ -28,19 +29,18 @@ def test_main_only_uses_youtube_transcripts(tmp_path, monkeypatch):
     def fake_fetch_info(url):
         return {"id": "abc"}
 
-    monkeypatch.setattr(recipe_extractor, "fetch_video_info", fake_fetch_info)
-    monkeypatch.setattr(recipe_extractor, "get_post_text", lambda info: "")
-    monkeypatch.setattr(recipe_extractor, "get_caption_languages", lambda info: [])
+    monkeypatch.setattr(video_transcripts, "fetch_video_info", fake_fetch_info)
+    monkeypatch.setattr(video_transcripts, "get_post_text", lambda info: "")
+    monkeypatch.setattr(video_transcripts, "get_caption_languages", lambda info: [])
 
     def fake_get_transcript(video_id, langs=None):
         calls["yt"] += 1
         return "transcript"
 
-    monkeypatch.setattr(recipe_extractor, "get_youtube_transcript", fake_get_transcript)
-    monkeypatch.setattr(recipe_extractor, "download_audio_with_ytdlp", lambda url: None)
-    monkeypatch.setattr(recipe_extractor, "transcribe_whisper", lambda path: "audio")
+    monkeypatch.setattr(video_transcripts, "get_youtube_transcript", fake_get_transcript)
+    monkeypatch.setattr(video_transcripts, "download_audio_with_ytdlp", lambda url: None)
+    monkeypatch.setattr(video_transcripts, "transcribe_whisper", lambda path: "audio")
     monkeypatch.setattr(recipe_extractor, "extract_recipe_with_gpt", lambda t, l: "{}")
-    monkeypatch.setattr(recipe_extractor.os, "remove", lambda path: None)
 
     # YouTube URL should trigger transcript fetch
     monkeypatch.setattr(sys, "argv", [

diff --git a/video_transcripts.py b/video_transcripts.py
@@ -0,0 +1,149 @@
+import yt_dlp
+import openai
+import os
+from urllib.parse import urlparse
+from dotenv import load_dotenv
+
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi
+except Exception:  # pragma: no cover - optional dependency
+    YouTubeTranscriptApi = None
+
+load_dotenv()
+
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+AUDIO_FILE = "audio.mp3"
+
+
+def is_youtube_url(url: str) -> bool:
+    """Return True if the URL points to YouTube."""
+    host = urlparse(url).netloc.lower()
+    return "youtube.com" in host or "youtu.be" in host
+
+
+def download_audio_with_ytdlp(url: str, out_file: str = AUDIO_FILE) -> None:
+    """Download the audio track from a video using yt-dlp."""
+    base_name = out_file.rsplit(".", 1)[0] if "." in out_file else out_file
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": base_name,
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "mp3",
+                "preferredquality": "192",
+            }
+        ],
+        "quiet": False,
+        "noplaylist": True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+
+
+def fetch_video_info(url: str) -> dict:
+    """Return video metadata without downloading the file."""
+    with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
+        return ydl.extract_info(url, download=False)
+
+
+def get_youtube_transcript(video_id: str, languages=None) -> str | None:
+    """Fetch transcript text from YouTube if available."""
+    if not YouTubeTranscriptApi:
+        print("⚠️  youtube-transcript-api not installed; skipping transcript fetch")
+        return None
+
+    ytt_api = YouTubeTranscriptApi()
+    try:
+        transcript_list = ytt_api.list(video_id)
+    except Exception as e:  # pragma: no cover - network dependent
+        print(f"⚠️  Could not list transcripts: {e}")
+        return None
+
+    languages = list(languages or [])
+
+    def fetch_text(transcript):
+        try:
+            segments = transcript.fetch()
+        except Exception as e:  # pragma: no cover - network dependent
+            print("⚠️ Issue while getting transcripts: ", e)
+            return None
+        return " ".join(seg.text for seg in segments)
+
+    for lang in languages:
+        try:
+            t = transcript_list.find_transcript([lang])
+        except Exception:
+            t = None
+        if t:
+            text = fetch_text(t)
+            if text:
+                return text
+
+    for t in transcript_list:
+        text = fetch_text(t)
+        if text:
+            return text
+    return None
+
+
+def get_post_text(info: dict) -> str:
+    """Return video description or caption."""
+    for key in ("description", "caption", "summary"):
+        text = info.get(key)
+        if text:
+            return text
+    return ""
+
+
+def get_caption_languages(info: dict) -> list:
+    """Return list of caption language codes from video metadata."""
+    languages = []
+    for key in ("subtitles", "automatic_captions"):
+        for lang in info.get(key, {}):
+            if lang not in languages:
+                languages.append(lang)
+    if info.get("language") and info["language"] not in languages:
+        languages.append(info["language"])
+    return languages
+
+
+def transcribe_whisper(file_path: str) -> str:
+    """Transcribe an audio file using OpenAI Whisper."""
+    openai.api_key = OPENAI_API_KEY
+    with open(file_path, "rb") as audio_file:
+        transcript = openai.audio.transcriptions.create(
+            model="gpt-4o-mini-transcribe",
+            file=audio_file,
+        )
+    return transcript.text
+
+
+def extract_video_transcript(url: str, *, save_transcript: str | None = None) -> str:
+    """Return combined post text and transcript for a video URL."""
+    info = fetch_video_info(url)
+    post_text = get_post_text(info)
+
+    transcript = None
+    if is_youtube_url(url):
+        caption_langs = get_caption_languages(info)
+        transcript = get_youtube_transcript(info.get("id"), caption_langs)
+        if transcript:
+            print("📝 Using existing YouTube transcript")
+
+    if not transcript:
+        print("⬇️  Downloading audio...")
+        download_audio_with_ytdlp(url)
+        print("🎙️  Transcribing audio...")
+        transcript = transcribe_whisper(AUDIO_FILE)
+        if save_transcript:
+            with open(save_transcript, "w", encoding="utf-8") as f:
+                f.write(transcript)
+
+    try:
+        os.remove(AUDIO_FILE)
+    except OSError:
+        pass
+
+    combined = (post_text + "\n\n" + transcript).strip()
+    return combined