From a4dca4a8ec66ec9c77eb5cae447585305899562b Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 10:25:10 -0400
Subject: [PATCH 01/19] feat(wistia): add subtitle download support

---
 .env.example                              |   5 +-
 README.md                                 |   3 +
 thinkific_downloader/config.py            |   5 +-
 thinkific_downloader/downloader.py        |  29 ++-
 thinkific_downloader/wistia_downloader.py | 226 +++++++++++++++++++++-
 5 files changed, 259 insertions(+), 9 deletions(-)

diff --git a/.env.example b/.env.example
index 654f1ab..32056dc 100644
--- a/.env.example
+++ b/.env.example
@@ -61,6 +61,9 @@ RESUME_PARTIAL=true
 # Enable detailed logging for troubleshooting
 DEBUG=false
 
+# Download subtitles/captions when available (default: true)
+SUBTITLE_DOWNLOAD_ENABLED=true
+
 # ===============================================
 # ADVANCED SETTINGS
 # ===============================================
@@ -83,4 +86,4 @@ COURSE_DATA_FILE=""
 # ALL_VIDEO_FORMATS=false
 
 # Log level (DEBUG, INFO, WARNING, ERROR)
-# LOG_LEVEL="INFO"
\ No newline at end of file
+# LOG_LEVEL="INFO"
diff --git a/README.md b/README.md
index a05530a..925986d 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ A modern, feature-rich Python utility to download courses from Thinkific platfor
 | 📄 **HTML Content** | ✅ Full | `downloader.py` | Clean extraction, formatting |
 | 📚 **PDF Documents** | ✅ Full | `downloader.py` | Direct download, validation |
 | 🎵 **Audio Files** | ✅ Full | `downloader.py` | MP3, M4A support |
+| 📝 **Subtitles (Wistia)** | ✅ Full | `wistia_downloader.py` | Multi-language caption downloads |
 | 🎯 **Quizzes** | ✅ Basic | `downloader.py` | Structure extraction |
 | 🎨 **Presentations** | ✅ Full | FFmpeg merge | Multi-slide processing |
 
@@ -70,6 +71,7 @@ A modern, feature-rich Python utility to download courses from Thinkific platfor
 - **Resume Support** - Skip existing files, continue interrupted downloads
 - **Atomic Resume/Backup** - Status file is always safely backed up and updated, works on Windows, Mac, Linux
 - **Multiple Quality Options** - Choose video quality (720p, 1080p, etc.)
+- **Subtitle Downloads** - Automatically grab Wistia caption tracks in multiple languages
 - **Comprehensive Logging** - Debug mode for troubleshooting
 
 ### 🛡️ **Safety & Compliance**
@@ -201,6 +203,7 @@ RATE_LIMIT_MB_S=            # Rate limit in MB/s (empty = unlimited)
 VALIDATE_DOWNLOADS=true     # Enable file integrity validation
 RESUME_PARTIAL=true         # Enable resume for partial downloads
 DEBUG=false                 # Enable debug logging
+SUBTITLE_DOWNLOAD_ENABLED=true # Download subtitles/captions when available
 
 # ===============================================
 # ADVANCED SETTINGS
diff --git a/thinkific_downloader/config.py b/thinkific_downloader/config.py
index c1e1286..ab75fd4 100644
--- a/thinkific_downloader/config.py
+++ b/thinkific_downloader/config.py
@@ -37,6 +37,7 @@ class Settings:
     resume_partial: bool = True
     debug: bool = False
     course_name: str = "Course"
+    subtitle_download_enabled: bool = True
 
     @classmethod
     def from_env(cls):
@@ -67,6 +68,7 @@ def from_env(cls):
         validate_downloads = os.getenv('VALIDATE_DOWNLOADS', 'true').lower() in ('1', 'true', 'yes', 'on')
         resume_partial = os.getenv('RESUME_PARTIAL', 'true').lower() in ('1', 'true', 'yes', 'on')
         debug = os.getenv('DEBUG', 'false').lower() in ('1', 'true', 'yes', 'on')
+        subtitle_download_enabled = os.getenv('SUBTITLE_DOWNLOAD_ENABLED', 'true').lower() in ('1', 'true', 'yes', 'on')
         
         # Clean cookie data to remove Unicode characters that cause encoding issues
         if cookie_data:
@@ -101,5 +103,6 @@ def from_env(cls):
             download_delay=download_delay,
             validate_downloads=validate_downloads,
             resume_partial=resume_partial,
-            debug=debug
+            debug=debug,
+            subtitle_download_enabled=subtitle_download_enabled
         )
diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index f2c2191..a6bb286 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -386,6 +386,9 @@ def download_file_chunked(src_url: str, dst_name: str, chunk_mb: int = 1):
 def init_course(data: Dict[str, Any]):
     """Initialize course structure and collect ALL download tasks first."""
     global COURSE_CONTENTS, ROOT_PROJECT_DIR, BASE_HOST, DOWNLOAD_TASKS
+
+    # Ensure settings/download manager are initialized so feature flags are available
+    init_settings()
     
     # Initialize download tasks list
     DOWNLOAD_TASKS = []
@@ -417,6 +420,21 @@ def init_course(data: Dict[str, Any]):
                 analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
                 saved_tasks = cache_data.get('download_tasks', [])
                 print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")
+                # If subtitle downloads are enabled but cached tasks do not contain subtitles,
+                # treat cache as outdated so we can regenerate tasks with captions.
+                if SETTINGS and SETTINGS.subtitle_download_enabled and saved_tasks:
+                    has_subtitle_tasks = any(
+                        (task.get('content_type') or '').lower() == 'subtitle'
+                        for task in saved_tasks
+                    )
+                    if not has_subtitle_tasks:
+                        print("🆕 Subtitle support enabled — refreshing cached analysis to include captions.")
+                        analyzed_chapters = set()
+                        saved_tasks = []
+                        try:
+                            cache_file.unlink()
+                        except Exception:
+                            pass
         except:
             analyzed_chapters = set()
             saved_tasks = []
@@ -835,9 +853,16 @@ def collect_video_task_wistia(wistia_id: str, file_name: str, dest_dir: Path):
             video_url = selected.get('url')
             if video_url:
                 ext = '.mp4'  # Default extension
-                resolved_name = filter_filename(file_name) + ext
+                resolved_name = filter_filename(file_name)
+                if not resolved_name.lower().endswith(ext):
+                    resolved_name += ext
                 print(f"   📹 Found video: {resolved_name}")
                 add_download_task(video_url, dest_dir / resolved_name, "video")
+                try:
+                    from .wistia_downloader import queue_wistia_subtitle_downloads
+                    queue_wistia_subtitle_downloads(data.get('media') or {}, dest_dir, resolved_name)
+                except Exception as subtitle_error:
+                    print(f"   ⚠️  Unable to queue subtitles for {resolved_name}: {subtitle_error}")
     except Exception as e:
         print(f"   ❌ Failed to collect Wistia video {wistia_id}: {e}")
 
@@ -1282,4 +1307,4 @@ def main(argv: List[str]):
 
 
 if __name__ == '__main__':
-    main(sys.argv)
\ No newline at end of file
+    main(sys.argv)
diff --git a/thinkific_downloader/wistia_downloader.py b/thinkific_downloader/wistia_downloader.py
index f53ae65..eb4ff20 100644
--- a/thinkific_downloader/wistia_downloader.py
+++ b/thinkific_downloader/wistia_downloader.py
@@ -1,12 +1,14 @@
 import json
+import os
 import re
-import requests
 import zlib
-from typing import Optional, List
 from pathlib import Path
-import os
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+
+import requests
+
 from .file_utils import filter_filename
-from .download_manager import DownloadManager
 # Local imports inside functions to avoid circular dependency during module import
 
 # Handles video proxy and wistia direct downloads
@@ -14,6 +16,218 @@
 WISTIA_JSON_URL = "https://fast.wistia.com/embed/medias/{id}.json"
 
 VIDEO_PROXY_JSONP_ID_PATTERN = re.compile(r"medias/(\w+)\.jsonp")
+DEFAULT_SUBTITLE_EXTENSION = "vtt"
+_LANGUAGE_SANITIZE_PATTERN = re.compile(r'[^A-Za-z0-9\-]+')
+
+
+def _normalize_wistia_track_url(url: Optional[str]) -> Optional[str]:
+    """Normalize Wistia caption track URLs to absolute HTTPS URLs."""
+    if not url or not isinstance(url, str):
+        return None
+
+    normalized = url.strip()
+    if not normalized:
+        return None
+
+    if normalized.startswith('//'):
+        normalized = f"https:{normalized}"
+    elif normalized.startswith('/'):
+        normalized = f"https://fast.wistia.com{normalized}"
+    elif not re.match(r'^https?://', normalized, re.IGNORECASE):
+        normalized = f"https://fast.wistia.com/{normalized.lstrip('/')}"
+
+    return normalized
+
+
+def _build_caption_url(hashed_id: Optional[str], language: Optional[str], extension: Optional[str] = None) -> Optional[str]:
+    """Construct a Wistia caption URL when only hashedId and language are available."""
+    if not hashed_id or not language:
+        return None
+
+    ext = (extension or DEFAULT_SUBTITLE_EXTENSION).lstrip('.') or DEFAULT_SUBTITLE_EXTENSION
+    return f"https://fast.wistia.com/embed/captions/{hashed_id}.{ext}?language={language}"
+
+
+def _infer_track_extension(url: str, fallback: str = DEFAULT_SUBTITLE_EXTENSION) -> str:
+    """Infer file extension from track URL."""
+    try:
+        parsed = urlparse(url)
+        suffix = Path(parsed.path).suffix
+        if suffix:
+            return suffix.lstrip('.').lower() or fallback
+    except Exception:
+        pass
+    return fallback
+
+
+def extract_wistia_subtitle_tracks(media: Dict[str, Any]) -> List[Dict[str, Optional[str]]]:
+    """Extract subtitle/caption track metadata from Wistia media JSON."""
+    if not isinstance(media, dict):
+        return []
+
+    hashed_id = media.get('hashedId') or media.get('hashed_id')
+    tracks: List[Dict[str, Optional[str]]] = []
+
+    def add_track(url: Optional[str], language: Optional[str], label: Optional[str], ext: Optional[str]):
+        normalized = _normalize_wistia_track_url(url)
+        if not normalized and hashed_id and language:
+            normalized = _build_caption_url(hashed_id, language, ext)
+        if not normalized:
+            return
+        tracks.append({
+            'url': normalized,
+            'language': language,
+            'label': label,
+            'ext': (ext or '').lstrip('.') or None
+        })
+
+    for track in media.get('captions') or []:
+        if isinstance(track, dict):
+            add_track(
+                track.get('url') or track.get('src'),
+                track.get('language') or track.get('lang'),
+                track.get('languageName') or track.get('label') or track.get('name'),
+                track.get('ext')
+            )
+
+    for track in media.get('text_tracks') or []:
+        if not isinstance(track, dict):
+            continue
+        sources = track.get('sources') or []
+        if sources:
+            for source in sources:
+                if isinstance(source, dict):
+                    add_track(
+                        source.get('url') or source.get('src'),
+                        track.get('language') or track.get('lang'),
+                        track.get('name') or track.get('label'),
+                        source.get('ext') or track.get('ext')
+                    )
+        else:
+            add_track(
+                track.get('url') or track.get('src'),
+                track.get('language') or track.get('lang'),
+                track.get('name') or track.get('label'),
+                track.get('ext')
+            )
+
+    for track in media.get('textTracks') or []:
+        if not isinstance(track, dict):
+            continue
+        sources = track.get('sources') or []
+        if sources:
+            for source in sources:
+                if isinstance(source, dict):
+                    add_track(
+                        source.get('url') or source.get('src'),
+                        track.get('language') or track.get('lang'),
+                        track.get('name') or track.get('label') or track.get('title'),
+                        source.get('ext') or track.get('ext')
+                    )
+        else:
+            add_track(
+                track.get('url') or track.get('src'),
+                track.get('language') or track.get('lang'),
+                track.get('name') or track.get('label') or track.get('title'),
+                track.get('ext')
+            )
+
+    for asset in media.get('assets') or []:
+        if isinstance(asset, dict):
+            asset_type = (asset.get('type') or '').lower()
+            asset_kind = (asset.get('kind') or '').lower()
+            if asset_type in ('caption', 'captions', 'subtitle', 'subtitles') or asset_kind in ('caption', 'captions', 'subtitle', 'subtitles'):
+                add_track(
+                    asset.get('url') or asset.get('src'),
+                    asset.get('language') or asset.get('lang'),
+                    asset.get('display_name') or asset.get('name'),
+                    asset.get('ext')
+                )
+
+    available_transcripts = media.get('availableTranscripts') or []
+    if hashed_id and available_transcripts:
+        for transcript in available_transcripts:
+            if not isinstance(transcript, dict) or not transcript.get('hasCaptions'):
+                continue
+            language = transcript.get('language') or transcript.get('wistiaLanguageCode') or transcript.get('bcp47LanguageTag')
+            if not language:
+                continue
+            add_track(
+                _build_caption_url(hashed_id, language, DEFAULT_SUBTITLE_EXTENSION),
+                language,
+                transcript.get('name') or transcript.get('familyName') or language,
+                DEFAULT_SUBTITLE_EXTENSION
+            )
+
+    unique_tracks: Dict[str, Dict[str, Optional[str]]] = {}
+    for track in tracks:
+        url = track['url']
+        if not url:
+            continue
+        if url not in unique_tracks:
+            unique_tracks[url] = track
+        else:
+            existing = unique_tracks[url]
+            # Prefer track data that includes language/label/ext
+            if not existing.get('language') and track.get('language'):
+                existing['language'] = track['language']
+            if not existing.get('label') and track.get('label'):
+                existing['label'] = track['label']
+            if not existing.get('ext') and track.get('ext'):
+                existing['ext'] = track['ext']
+
+    return list(unique_tracks.values())
+
+
+def queue_wistia_subtitle_downloads(media: Dict[str, Any], dest_dir: Path, video_base_name: str):
+    """Queue subtitle download tasks for a Wistia media object."""
+    from .downloader import SETTINGS, add_download_task, init_settings
+
+    if not isinstance(dest_dir, Path):
+        dest_dir = Path(dest_dir)
+
+    init_settings()
+    settings = SETTINGS
+    if settings and hasattr(settings, 'subtitle_download_enabled') and not settings.subtitle_download_enabled:
+        return
+
+    tracks = extract_wistia_subtitle_tracks(media)
+    if not tracks:
+        return
+
+    base_name = Path(video_base_name).stem
+    if not base_name:
+        fallback_name = media.get('name') or media.get('hashedId') or 'captions'
+        base_name = filter_filename(str(fallback_name))
+    else:
+        base_name = filter_filename(base_name)
+
+    if not base_name:
+        base_name = "captions"
+
+    counter = 1
+    for track in tracks:
+        url = track.get('url')
+        if not url:
+            continue
+
+        ext = (track.get('ext') or _infer_track_extension(url)).lstrip('.').lower() or DEFAULT_SUBTITLE_EXTENSION
+        language_part = track.get('language') or track.get('label') or ''
+        if isinstance(language_part, (list, dict)):
+            language_part = ''
+        language_part = str(language_part or '')
+        language_part = _LANGUAGE_SANITIZE_PATTERN.sub('-', language_part).strip('-')
+
+        if not language_part:
+            language_part = 'captions' if counter == 1 else f"captions-{counter}"
+
+        subtitle_filename = filter_filename(f"{base_name}.{language_part}.{ext}")
+        if not subtitle_filename:
+            subtitle_filename = filter_filename(f"{base_name}.captions-{counter}.{ext}")
+
+        print(f"   [Subs] Queued subtitles: {subtitle_filename}")
+        add_download_task(url, dest_dir / subtitle_filename, "subtitle")
+        counter += 1
 
 
 def video_downloader_videoproxy(video_url: str, file_name: str, quality: str = "720p"):
@@ -143,6 +357,7 @@ def infer_ext(asset: dict) -> str:
         return '.mp4'
 
     resolved_base = filter_filename(file_name if file_name else media.get('name') or wistia_id)
+    current_dir = Path.cwd()
 
     if all_formats_flag:
         print(f"Downloading all available Wistia assets for {resolved_base}")
@@ -172,6 +387,7 @@ def infer_ext(asset: dict) -> str:
                 DOWNLOAD_MANAGER.download_file(a_url, Path(filter_filename(out_name)))
             else:
                 print("Download manager not initialized")
+        queue_wistia_subtitle_downloads(media, current_dir, resolved_base)
         return
 
     # Single quality path
@@ -200,6 +416,6 @@ def infer_ext(asset: dict) -> str:
     
     # Queue video for parallel download with absolute path to current directory
     from .downloader import add_download_task
-    current_dir = Path.cwd()  # Capture current working directory
     full_path = current_dir / resolved_name  # Create absolute path
     add_download_task(video_url, full_path, "video")
+    queue_wistia_subtitle_downloads(media, current_dir, resolved_name)

From f2cd39d9ce037d0aafbee768e6c322e842ffd5a9 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:05:20 -0400
Subject: [PATCH 02/19] Respect subtitle flag when restoring cached tasks

---
 thinkific_downloader/downloader.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index a6bb286..91f3771 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -449,9 +449,23 @@ def init_course(data: Dict[str, Any]):
     
     # Restore saved download tasks
     if saved_tasks:
-        print(f"📥 Restoring {len(saved_tasks)} previously collected download tasks...")
-        for task_data in saved_tasks:
-            add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))
+        restored_tasks = saved_tasks
+        if SETTINGS and hasattr(SETTINGS, 'subtitle_download_enabled') and not SETTINGS.subtitle_download_enabled:
+            filtered_tasks = []
+            skipped_count = 0
+            for task in saved_tasks:
+                content_type = (task.get('content_type') or 'video').lower()
+                if content_type == 'subtitle':
+                    skipped_count += 1
+                    continue
+                filtered_tasks.append(task)
+            restored_tasks = filtered_tasks
+            if skipped_count:
+                print(f"⏭️  Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")
+        if restored_tasks:
+            print(f"📥 Restoring {len(restored_tasks)} previously collected download tasks...")
+            for task_data in restored_tasks:
+                add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))
     
     collect_all_download_tasks(data, analyzed_chapters, cache_file)
     

From 80129f80607dcf5ab56e3f6d33ac30302e6cf4ae Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:05:40 -0400
Subject: [PATCH 03/19] Update thinkific_downloader/downloader.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 thinkific_downloader/downloader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index 91f3771..69ea6a5 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -435,7 +435,7 @@ def init_course(data: Dict[str, Any]):
                             cache_file.unlink()
                         except Exception:
                             pass
-        except:
+        except (json.JSONDecodeError, OSError):
             analyzed_chapters = set()
             saved_tasks = []
     

From 7f4a58abd5b2815b3a278b13ab07d5aefc532356 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:05:57 -0400
Subject: [PATCH 04/19] Update thinkific_downloader/downloader.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 thinkific_downloader/downloader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index 69ea6a5..5900403 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -433,7 +433,7 @@ def init_course(data: Dict[str, Any]):
                         saved_tasks = []
                         try:
                             cache_file.unlink()
-                        except Exception:
+                        except OSError:
                             pass
         except (json.JSONDecodeError, OSError):
             analyzed_chapters = set()

From 60f827370954899cab6d707901cab6a060691ae1 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:06:11 -0400
Subject: [PATCH 05/19] Update thinkific_downloader/wistia_downloader.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 thinkific_downloader/wistia_downloader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thinkific_downloader/wistia_downloader.py b/thinkific_downloader/wistia_downloader.py
index eb4ff20..b9832ce 100644
--- a/thinkific_downloader/wistia_downloader.py
+++ b/thinkific_downloader/wistia_downloader.py
@@ -55,7 +55,7 @@ def _infer_track_extension(url: str, fallback: str = DEFAULT_SUBTITLE_EXTENSION)
         suffix = Path(parsed.path).suffix
         if suffix:
             return suffix.lstrip('.').lower() or fallback
-    except Exception:
+    except (AttributeError, TypeError):
         pass
     return fallback
 

From e02aca2d95eeca201077bc6917b20979e103de72 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:08:11 -0400
Subject: [PATCH 06/19] Refactor Wistia track extraction helper

---
 thinkific_downloader/wistia_downloader.py | 72 +++++++++++------------
 1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/thinkific_downloader/wistia_downloader.py b/thinkific_downloader/wistia_downloader.py
index b9832ce..7cdfc17 100644
--- a/thinkific_downloader/wistia_downloader.py
+++ b/thinkific_downloader/wistia_downloader.py
@@ -3,7 +3,7 @@
 import re
 import zlib
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Iterable, List, Optional
 from urllib.parse import urlparse
 
 import requests
@@ -90,47 +90,45 @@ def add_track(url: Optional[str], language: Optional[str], label: Optional[str],
                 track.get('ext')
             )
 
-    for track in media.get('text_tracks') or []:
-        if not isinstance(track, dict):
-            continue
-        sources = track.get('sources') or []
-        if sources:
-            for source in sources:
-                if isinstance(source, dict):
-                    add_track(
-                        source.get('url') or source.get('src'),
-                        track.get('language') or track.get('lang'),
-                        track.get('name') or track.get('label'),
-                        source.get('ext') or track.get('ext')
-                    )
-        else:
-            add_track(
-                track.get('url') or track.get('src'),
-                track.get('language') or track.get('lang'),
-                track.get('name') or track.get('label'),
-                track.get('ext')
-            )
+    def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], label_keys: Iterable[str]):
+        if not collection:
+            return
 
-    for track in media.get('textTracks') or []:
-        if not isinstance(track, dict):
-            continue
-        sources = track.get('sources') or []
-        if sources:
-            for source in sources:
-                if isinstance(source, dict):
+        def _get_label(track_dict: Dict[str, Any]) -> Optional[str]:
+            for key in label_keys:
+                value = track_dict.get(key)
+                if value:
+                    return value
+            return None
+
+        for track in collection:
+            if not isinstance(track, dict):
+                continue
+
+            language = track.get('language') or track.get('lang')
+            label = _get_label(track)
+            sources = track.get('sources') or []
+
+            if sources:
+                for source in sources:
+                    if not isinstance(source, dict):
+                        continue
                     add_track(
                         source.get('url') or source.get('src'),
-                        track.get('language') or track.get('lang'),
-                        track.get('name') or track.get('label') or track.get('title'),
+                        language,
+                        label,
                         source.get('ext') or track.get('ext')
                     )
-        else:
-            add_track(
-                track.get('url') or track.get('src'),
-                track.get('language') or track.get('lang'),
-                track.get('name') or track.get('label') or track.get('title'),
-                track.get('ext')
-            )
+            else:
+                add_track(
+                    track.get('url') or track.get('src'),
+                    language,
+                    label,
+                    track.get('ext')
+                )
+
+    process_track_collection(media.get('text_tracks'), ('name', 'label'))
+    process_track_collection(media.get('textTracks'), ('name', 'label', 'title'))
 
     for asset in media.get('assets') or []:
         if isinstance(asset, dict):

From 72573acd9ed17f1f601d2908fbcaaa6e46d7a134 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:28:11 -0400
Subject: [PATCH 07/19] Factor cache restore helpers

---
 thinkific_downloader/downloader.py | 110 +++++++++++++++++------------
 1 file changed, 66 insertions(+), 44 deletions(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index 5900403..d086bd7 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -382,6 +382,70 @@ def download_file_chunked(src_url: str, dst_name: str, chunk_mb: int = 1):
     add_download_task(src_url, dst_path, "file")
 
 
+def _load_cached_progress(cache_file: Path):
+    """Return previously analyzed chapters and queued tasks from the resume cache."""
+    analyzed_chapters = set()
+    saved_tasks: List[Dict[str, Any]] = []
+
+    if not cache_file.exists():
+        return analyzed_chapters, saved_tasks
+
+    try:
+        with open(cache_file, 'r', encoding='utf-8') as f:
+            cache_data = json.load(f)
+
+        analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
+        saved_tasks = cache_data.get('download_tasks', [])
+        print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")
+
+        # If subtitle downloads were newly enabled, invalidate cache so we can regenerate tasks.
+        if SETTINGS and SETTINGS.subtitle_download_enabled and saved_tasks:
+            has_subtitle_tasks = any(
+                (task.get('content_type') or '').lower() == 'subtitle'
+                for task in saved_tasks
+            )
+            if not has_subtitle_tasks:
+                print("🆕 Subtitle support enabled — refreshing cached analysis to include captions.")
+                analyzed_chapters = set()
+                saved_tasks = []
+                try:
+                    cache_file.unlink()
+                except OSError:
+                    pass
+    except (json.JSONDecodeError, OSError):
+        analyzed_chapters = set()
+        saved_tasks = []
+
+    return analyzed_chapters, saved_tasks
+
+
+def _restore_saved_tasks(saved_tasks: List[Dict[str, Any]]):
+    """Restore cached download tasks, respecting the subtitle feature flag."""
+    if not saved_tasks:
+        return
+
+    restored_tasks = saved_tasks
+    if SETTINGS and hasattr(SETTINGS, 'subtitle_download_enabled') and not SETTINGS.subtitle_download_enabled:
+        filtered_tasks: List[Dict[str, Any]] = []
+        skipped_count = 0
+        for task in saved_tasks:
+            content_type = (task.get('content_type') or 'video').lower()
+            if content_type == 'subtitle':
+                skipped_count += 1
+                continue
+            filtered_tasks.append(task)
+        restored_tasks = filtered_tasks
+        if skipped_count:
+            print(f"⏭️  Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")
+
+    if not restored_tasks:
+        return
+
+    print(f"📥 Restoring {len(restored_tasks)} previously collected download tasks...")
+    for task_data in restored_tasks:
+        add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))
+
+
 
 def init_course(data: Dict[str, Any]):
     """Initialize course structure and collect ALL download tasks first."""
@@ -412,32 +476,7 @@ def init_course(data: Dict[str, Any]):
     analyzed_chapters = set()
     saved_tasks = []
     
-    if cache_file.exists():
-        try:
-            import json
-            with open(cache_file, 'r', encoding='utf-8') as f:
-                cache_data = json.load(f)
-                analyzed_chapters = set(cache_data.get('analyzed_chapters', []))
-                saved_tasks = cache_data.get('download_tasks', [])
-                print(f"📋 Found previous progress: {len(analyzed_chapters)} chapters analyzed, {len(saved_tasks)} tasks cached")
-                # If subtitle downloads are enabled but cached tasks do not contain subtitles,
-                # treat cache as outdated so we can regenerate tasks with captions.
-                if SETTINGS and SETTINGS.subtitle_download_enabled and saved_tasks:
-                    has_subtitle_tasks = any(
-                        (task.get('content_type') or '').lower() == 'subtitle'
-                        for task in saved_tasks
-                    )
-                    if not has_subtitle_tasks:
-                        print("🆕 Subtitle support enabled — refreshing cached analysis to include captions.")
-                        analyzed_chapters = set()
-                        saved_tasks = []
-                        try:
-                            cache_file.unlink()
-                        except OSError:
-                            pass
-        except (json.JSONDecodeError, OSError):
-            analyzed_chapters = set()
-            saved_tasks = []
+    analyzed_chapters, saved_tasks = _load_cached_progress(cache_file)
     
     # Derive base host from landing_page_url if available
     landing = data['course'].get('landing_page_url')
@@ -448,24 +487,7 @@ def init_course(data: Dict[str, Any]):
     print("\n🔍 Phase 1: Analyzing course content and collecting download links...")
     
     # Restore saved download tasks
-    if saved_tasks:
-        restored_tasks = saved_tasks
-        if SETTINGS and hasattr(SETTINGS, 'subtitle_download_enabled') and not SETTINGS.subtitle_download_enabled:
-            filtered_tasks = []
-            skipped_count = 0
-            for task in saved_tasks:
-                content_type = (task.get('content_type') or 'video').lower()
-                if content_type == 'subtitle':
-                    skipped_count += 1
-                    continue
-                filtered_tasks.append(task)
-            restored_tasks = filtered_tasks
-            if skipped_count:
-                print(f"⏭️  Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")
-        if restored_tasks:
-            print(f"📥 Restoring {len(restored_tasks)} previously collected download tasks...")
-            for task_data in restored_tasks:
-                add_download_task(task_data['url'], Path(task_data['dest_path']), task_data.get('content_type', 'video'))
+    _restore_saved_tasks(saved_tasks)
     
     collect_all_download_tasks(data, analyzed_chapters, cache_file)
     

From bd00862dc1d2a86a0a1e606f0dcecfd11088c37a Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 12:30:28 -0400
Subject: [PATCH 08/19] Decompose Wistia track processing helpers

---
 thinkific_downloader/wistia_downloader.py | 57 ++++++++++-------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/thinkific_downloader/wistia_downloader.py b/thinkific_downloader/wistia_downloader.py
index 7cdfc17..fbf5694 100644
--- a/thinkific_downloader/wistia_downloader.py
+++ b/thinkific_downloader/wistia_downloader.py
@@ -90,42 +90,33 @@ def add_track(url: Optional[str], language: Optional[str], label: Optional[str],
                 track.get('ext')
             )
 
-    def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], label_keys: Iterable[str]):
-        if not collection:
-            return
-
-        def _get_label(track_dict: Dict[str, Any]) -> Optional[str]:
-            for key in label_keys:
-                value = track_dict.get(key)
-                if value:
-                    return value
-            return None
+    def iter_track_dicts(collection: Optional[Iterable[Dict[str, Any]]]):
+        for item in collection or []:
+            if isinstance(item, dict):
+                yield item
+
+    def extract_label(track_dict: Dict[str, Any], label_keys: Iterable[str]) -> Optional[str]:
+        for key in label_keys:
+            value = track_dict.get(key)
+            if value:
+                return value
+        return None
 
-        for track in collection:
-            if not isinstance(track, dict):
-                continue
+    def iter_track_sources(track_dict: Dict[str, Any]):
+        sources = track_dict.get('sources') or []
+        if not sources:
+            yield track_dict.get('url') or track_dict.get('src'), track_dict.get('ext')
+            return
+        for source in sources:
+            if isinstance(source, dict):
+                yield source.get('url') or source.get('src'), source.get('ext') or track_dict.get('ext')
 
+    def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], label_keys: Iterable[str]):
+        for track in iter_track_dicts(collection):
             language = track.get('language') or track.get('lang')
-            label = _get_label(track)
-            sources = track.get('sources') or []
-
-            if sources:
-                for source in sources:
-                    if not isinstance(source, dict):
-                        continue
-                    add_track(
-                        source.get('url') or source.get('src'),
-                        language,
-                        label,
-                        source.get('ext') or track.get('ext')
-                    )
-            else:
-                add_track(
-                    track.get('url') or track.get('src'),
-                    language,
-                    label,
-                    track.get('ext')
-                )
+            label = extract_label(track, label_keys)
+            for source_url, source_ext in iter_track_sources(track):
+                add_track(source_url, language, label, source_ext)
 
     process_track_collection(media.get('text_tracks'), ('name', 'label'))
     process_track_collection(media.get('textTracks'), ('name', 'label', 'title'))

From c705cc3c4c431a85fbedb75a85b1556b8eb7dcf0 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 13:05:22 -0400
Subject: [PATCH 09/19] Update thinkific_downloader/downloader.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 thinkific_downloader/downloader.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index d086bd7..870647f 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -425,17 +425,14 @@ def _restore_saved_tasks(saved_tasks: List[Dict[str, Any]]):
         return
 
     restored_tasks = saved_tasks
-    if SETTINGS and hasattr(SETTINGS, 'subtitle_download_enabled') and not SETTINGS.subtitle_download_enabled:
-        filtered_tasks: List[Dict[str, Any]] = []
-        skipped_count = 0
-        for task in saved_tasks:
-            content_type = (task.get('content_type') or 'video').lower()
-            if content_type == 'subtitle':
-                skipped_count += 1
-                continue
-            filtered_tasks.append(task)
-        restored_tasks = filtered_tasks
-        if skipped_count:
+    if SETTINGS and not SETTINGS.subtitle_download_enabled:
+        all_tasks_count = len(restored_tasks)
+        restored_tasks = [
+            task for task in restored_tasks
+            if (task.get('content_type') or 'video').lower() != 'subtitle'
+        ]
+        skipped_count = all_tasks_count - len(restored_tasks)
+        if skipped_count > 0:
             print(f"⏭️  Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")
 
     if not restored_tasks:

From 08b0129d7017ae37c4f2ab23a262a9094f9f57b3 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 13:06:05 -0400
Subject: [PATCH 10/19] Update thinkific_downloader/wistia_downloader.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 thinkific_downloader/wistia_downloader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thinkific_downloader/wistia_downloader.py b/thinkific_downloader/wistia_downloader.py
index fbf5694..f64d02c 100644
--- a/thinkific_downloader/wistia_downloader.py
+++ b/thinkific_downloader/wistia_downloader.py
@@ -125,7 +125,7 @@ def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], lab
         if isinstance(asset, dict):
             asset_type = (asset.get('type') or '').lower()
             asset_kind = (asset.get('kind') or '').lower()
-            if asset_type in ('caption', 'captions', 'subtitle', 'subtitles') or asset_kind in ('caption', 'captions', 'subtitle', 'subtitles'):
+            if asset_type in {'caption', 'captions', 'subtitle', 'subtitles'} or asset_kind in {'caption', 'captions', 'subtitle', 'subtitles'}:
                 add_track(
                     asset.get('url') or asset.get('src'),
                     asset.get('language') or asset.get('lang'),

From 47192c43ad6251034f904ccf9f9da0adfdb1d72b Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 13:06:26 -0400
Subject: [PATCH 11/19] Update thinkific_downloader/downloader.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 thinkific_downloader/downloader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index 870647f..d389817 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -410,8 +410,8 @@ def _load_cached_progress(cache_file: Path):
                 saved_tasks = []
                 try:
                     cache_file.unlink()
-                except OSError:
-                    pass
+                except OSError as e:
+                    print(f"   ⚠️  Warning: Failed to delete cache file for refresh: {e}")
     except (json.JSONDecodeError, OSError):
         analyzed_chapters = set()
         saved_tasks = []

From 98112af0031924e8b40c99f6cafe52eb360e4a94 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 13:13:51 -0400
Subject: [PATCH 12/19] Improve resume cache handling for subtitles

---
 thinkific_downloader/downloader.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index d389817..a79599d 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -410,8 +410,8 @@ def _load_cached_progress(cache_file: Path):
                 saved_tasks = []
                 try:
                     cache_file.unlink()
-                except OSError as e:
-                    print(f"   ⚠️  Warning: Failed to delete cache file for refresh: {e}")
+                except OSError as exc:
+                    print(f"   ⚠️  Warning: Failed to delete cache file for refresh: {exc}")
     except (json.JSONDecodeError, OSError):
         analyzed_chapters = set()
         saved_tasks = []
@@ -424,14 +424,14 @@ def _restore_saved_tasks(saved_tasks: List[Dict[str, Any]]):
     if not saved_tasks:
         return
 
-    restored_tasks = saved_tasks
+    restored_tasks = list(saved_tasks)
     if SETTINGS and not SETTINGS.subtitle_download_enabled:
-        all_tasks_count = len(restored_tasks)
+        total_tasks = len(restored_tasks)
         restored_tasks = [
             task for task in restored_tasks
             if (task.get('content_type') or 'video').lower() != 'subtitle'
         ]
-        skipped_count = all_tasks_count - len(restored_tasks)
+        skipped_count = total_tasks - len(restored_tasks)
         if skipped_count > 0:
             print(f"⏭️  Skipping {skipped_count} cached subtitle task(s) because subtitle downloads are disabled.")
 

From 462b3b6fab583a836e0b2f22557a9e20c47974aa Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 13:13:59 -0400
Subject: [PATCH 13/19] Return Wistia subtitle tasks for callers

---
 thinkific_downloader/downloader.py        |  12 +-
 thinkific_downloader/wistia_downloader.py | 139 +++++++++++++---------
 2 files changed, 93 insertions(+), 58 deletions(-)

diff --git a/thinkific_downloader/downloader.py b/thinkific_downloader/downloader.py
index a79599d..4b93d69 100644
--- a/thinkific_downloader/downloader.py
+++ b/thinkific_downloader/downloader.py
@@ -892,8 +892,16 @@ def collect_video_task_wistia(wistia_id: str, file_name: str, dest_dir: Path):
                 print(f"   📹 Found video: {resolved_name}")
                 add_download_task(video_url, dest_dir / resolved_name, "video")
                 try:
-                    from .wistia_downloader import queue_wistia_subtitle_downloads
-                    queue_wistia_subtitle_downloads(data.get('media') or {}, dest_dir, resolved_name)
+                    from .wistia_downloader import build_wistia_subtitle_tasks
+                    subtitle_tasks = build_wistia_subtitle_tasks(
+                        data.get('media') or {},
+                        dest_dir,
+                        resolved_name,
+                        SETTINGS,
+                    )
+                    for task in subtitle_tasks:
+                        print(f"   [Subs] Queued subtitles: {Path(task['dest_path']).name}")
+                        add_download_task(task['url'], Path(task['dest_path']), task.get('content_type', 'subtitle'))
                 except Exception as subtitle_error:
                     print(f"   ⚠️  Unable to queue subtitles for {resolved_name}: {subtitle_error}")
     except Exception as e:
diff --git a/thinkific_downloader/wistia_downloader.py b/thinkific_downloader/wistia_downloader.py
index f64d02c..6421f86 100644
--- a/thinkific_downloader/wistia_downloader.py
+++ b/thinkific_downloader/wistia_downloader.py
@@ -81,8 +81,10 @@ def add_track(url: Optional[str], language: Optional[str], label: Optional[str],
             'ext': (ext or '').lstrip('.') or None
         })
 
-    for track in media.get('captions') or []:
-        if isinstance(track, dict):
+    def collect_from_captions(caption_items: Optional[Iterable[Dict[str, Any]]]):
+        for track in caption_items or []:
+            if not isinstance(track, dict):
+                continue
             add_track(
                 track.get('url') or track.get('src'),
                 track.get('language') or track.get('lang'),
@@ -90,42 +92,40 @@ def add_track(url: Optional[str], language: Optional[str], label: Optional[str],
                 track.get('ext')
             )
 
-    def iter_track_dicts(collection: Optional[Iterable[Dict[str, Any]]]):
-        for item in collection or []:
-            if isinstance(item, dict):
-                yield item
-
-    def extract_label(track_dict: Dict[str, Any], label_keys: Iterable[str]) -> Optional[str]:
-        for key in label_keys:
-            value = track_dict.get(key)
-            if value:
-                return value
-        return None
-
-    def iter_track_sources(track_dict: Dict[str, Any]):
-        sources = track_dict.get('sources') or []
-        if not sources:
-            yield track_dict.get('url') or track_dict.get('src'), track_dict.get('ext')
-            return
-        for source in sources:
-            if isinstance(source, dict):
-                yield source.get('url') or source.get('src'), source.get('ext') or track_dict.get('ext')
-
-    def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], label_keys: Iterable[str]):
-        for track in iter_track_dicts(collection):
+    def collect_from_text_tracks(track_items: Optional[Iterable[Dict[str, Any]]], label_keys: Iterable[str]):
+        label_key_order = tuple(label_keys)
+        for track in track_items or []:
+            if not isinstance(track, dict):
+                continue
             language = track.get('language') or track.get('lang')
-            label = extract_label(track, label_keys)
-            for source_url, source_ext in iter_track_sources(track):
-                add_track(source_url, language, label, source_ext)
-
-    process_track_collection(media.get('text_tracks'), ('name', 'label'))
-    process_track_collection(media.get('textTracks'), ('name', 'label', 'title'))
+            label = next((track.get(key) for key in label_key_order if track.get(key)), None)
+            sources = track.get('sources') or []
+            if sources:
+                for source in sources:
+                    if not isinstance(source, dict):
+                        continue
+                    add_track(
+                        source.get('url') or source.get('src'),
+                        language,
+                        label,
+                        source.get('ext') or track.get('ext')
+                    )
+            else:
+                add_track(
+                    track.get('url') or track.get('src'),
+                    language,
+                    label,
+                    track.get('ext')
+                )
 
-    for asset in media.get('assets') or []:
-        if isinstance(asset, dict):
+    def collect_from_assets(asset_items: Optional[Iterable[Dict[str, Any]]]):
+        subtitle_flags = {'caption', 'captions', 'subtitle', 'subtitles'}
+        for asset in asset_items or []:
+            if not isinstance(asset, dict):
+                continue
             asset_type = (asset.get('type') or '').lower()
             asset_kind = (asset.get('kind') or '').lower()
-            if asset_type in {'caption', 'captions', 'subtitle', 'subtitles'} or asset_kind in {'caption', 'captions', 'subtitle', 'subtitles'}:
+            if asset_type in subtitle_flags or asset_kind in subtitle_flags:
                 add_track(
                     asset.get('url') or asset.get('src'),
                     asset.get('language') or asset.get('lang'),
@@ -133,12 +133,17 @@ def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], lab
                     asset.get('ext')
                 )
 
-    available_transcripts = media.get('availableTranscripts') or []
-    if hashed_id and available_transcripts:
-        for transcript in available_transcripts:
+    def collect_from_transcripts(transcripts: Optional[Iterable[Dict[str, Any]]]):
+        if not hashed_id:
+            return
+        for transcript in transcripts or []:
             if not isinstance(transcript, dict) or not transcript.get('hasCaptions'):
                 continue
-            language = transcript.get('language') or transcript.get('wistiaLanguageCode') or transcript.get('bcp47LanguageTag')
+            language = (
+                transcript.get('language')
+                or transcript.get('wistiaLanguageCode')
+                or transcript.get('bcp47LanguageTag')
+            )
             if not language:
                 continue
             add_track(
@@ -148,6 +153,12 @@ def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], lab
                 DEFAULT_SUBTITLE_EXTENSION
             )
 
+    collect_from_captions(media.get('captions'))
+    collect_from_text_tracks(media.get('text_tracks'), ('name', 'label'))
+    collect_from_text_tracks(media.get('textTracks'), ('name', 'label', 'title'))
+    collect_from_assets(media.get('assets'))
+    collect_from_transcripts(media.get('availableTranscripts'))
+
     unique_tracks: Dict[str, Dict[str, Optional[str]]] = {}
     for track in tracks:
         url = track['url']
@@ -168,21 +179,22 @@ def process_track_collection(collection: Optional[Iterable[Dict[str, Any]]], lab
     return list(unique_tracks.values())
 
 
-def queue_wistia_subtitle_downloads(media: Dict[str, Any], dest_dir: Path, video_base_name: str):
-    """Queue subtitle download tasks for a Wistia media object."""
-    from .downloader import SETTINGS, add_download_task, init_settings
-
+def build_wistia_subtitle_tasks(
+    media: Dict[str, Any],
+    dest_dir: Path,
+    video_base_name: str,
+    settings: Optional[Any] = None,
+) -> List[Dict[str, Any]]:
+    """Construct subtitle download task dicts for a Wistia media object."""
     if not isinstance(dest_dir, Path):
         dest_dir = Path(dest_dir)
 
-    init_settings()
-    settings = SETTINGS
-    if settings and hasattr(settings, 'subtitle_download_enabled') and not settings.subtitle_download_enabled:
-        return
+    if settings and not getattr(settings, 'subtitle_download_enabled', True):
+        return []
 
     tracks = extract_wistia_subtitle_tracks(media)
     if not tracks:
-        return
+        return []
 
     base_name = Path(video_base_name).stem
     if not base_name:
@@ -194,6 +206,7 @@ def queue_wistia_subtitle_downloads(media: Dict[str, Any], dest_dir: Path, video
     if not base_name:
         base_name = "captions"
 
+    tasks: List[Dict[str, Any]] = []
     counter = 1
     for track in tracks:
         url = track.get('url')
@@ -201,11 +214,11 @@ def queue_wistia_subtitle_downloads(media: Dict[str, Any], dest_dir: Path, video
             continue
 
         ext = (track.get('ext') or _infer_track_extension(url)).lstrip('.').lower() or DEFAULT_SUBTITLE_EXTENSION
-        language_part = track.get('language') or track.get('label') or ''
-        if isinstance(language_part, (list, dict)):
+        language_raw = track.get('language') or track.get('label')
+        if isinstance(language_raw, str):
+            language_part = _LANGUAGE_SANITIZE_PATTERN.sub('-', language_raw).strip('-')
+        else:
             language_part = ''
-        language_part = str(language_part or '')
-        language_part = _LANGUAGE_SANITIZE_PATTERN.sub('-', language_part).strip('-')
 
         if not language_part:
             language_part = 'captions' if counter == 1 else f"captions-{counter}"
@@ -214,10 +227,17 @@ def queue_wistia_subtitle_downloads(media: Dict[str, Any], dest_dir: Path, video
         if not subtitle_filename:
             subtitle_filename = filter_filename(f"{base_name}.captions-{counter}.{ext}")
 
-        print(f"   [Subs] Queued subtitles: {subtitle_filename}")
-        add_download_task(url, dest_dir / subtitle_filename, "subtitle")
+        tasks.append({
+            'url': url,
+            'dest_path': dest_dir / subtitle_filename,
+            'content_type': 'subtitle',
+            'label': track.get('label'),
+            'language': track.get('language'),
+        })
         counter += 1
 
+    return tasks
+
 
 def video_downloader_videoproxy(video_url: str, file_name: str, quality: str = "720p"):
     from .downloader import http_get  # delayed import
@@ -376,7 +396,11 @@ def infer_ext(asset: dict) -> str:
                 DOWNLOAD_MANAGER.download_file(a_url, Path(filter_filename(out_name)))
             else:
                 print("Download manager not initialized")
-        queue_wistia_subtitle_downloads(media, current_dir, resolved_base)
+        from .downloader import SETTINGS, add_download_task
+        subtitle_tasks = build_wistia_subtitle_tasks(media, current_dir, resolved_base, SETTINGS)
+        for task in subtitle_tasks:
+            print(f"   [Subs] Queued subtitles: {task['dest_path'].name}")
+            add_download_task(task['url'], task['dest_path'], task.get('content_type', 'subtitle'))
         return
 
     # Single quality path
@@ -404,7 +428,10 @@ def infer_ext(asset: dict) -> str:
     print(f"URL : {video_url}\nFile Name : {resolved_name}")
     
     # Queue video for parallel download with absolute path to current directory
-    from .downloader import add_download_task
+    from .downloader import SETTINGS, add_download_task
     full_path = current_dir / resolved_name  # Create absolute path
     add_download_task(video_url, full_path, "video")
-    queue_wistia_subtitle_downloads(media, current_dir, resolved_name)
+    subtitle_tasks = build_wistia_subtitle_tasks(media, current_dir, resolved_name, SETTINGS)
+    for task in subtitle_tasks:
+        print(f"   [Subs] Queued subtitles: {task['dest_path'].name}")
+        add_download_task(task['url'], task['dest_path'], task.get('content_type', 'subtitle'))

From 750d3614ab9c7fc6b4426210c70d7e10acdb8cb1 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 17:49:38 -0400
Subject: [PATCH 14/19] Add PRD and tasks for offline course viewer

---
 docs/tasks/prd-local-course-viewer.md       | 85 +++++++++++++++++++++
 docs/tasks/tasks-prd-local-course-viewer.md | 64 ++++++++++++++++
 2 files changed, 149 insertions(+)
 create mode 100644 docs/tasks/prd-local-course-viewer.md
 create mode 100644 docs/tasks/tasks-prd-local-course-viewer.md

diff --git a/docs/tasks/prd-local-course-viewer.md b/docs/tasks/prd-local-course-viewer.md
new file mode 100644
index 0000000..35173c8
--- /dev/null
+++ b/docs/tasks/prd-local-course-viewer.md
@@ -0,0 +1,85 @@
+# Local Course Viewer PRD
+
+## 1. Introduction / Overview
+Create a Python-based generator that turns a downloaded Thinkific course into a self-contained static website for offline consumption. The script should read the provided course metadata JSON (e.g., `beginner-chess-mastery.json`) and the corresponding assets already stored under `downloads/<course-slug>/`, validate that everything needed is present, and produce an easy-to-navigate two-pane interface. The generated site must work when opened directly from the filesystem (no server) and allow a learner to browse chapters, play videos, and read text lessons completely offline.
+
+## 2. Goals
+- Provide a one-command workflow that accepts a Thinkific course metadata JSON file and emits an offline-ready static site in the matching `downloads/<course-slug>/` directory.
+- Mirror the course hierarchy (chapters → lessons) in a left-hand navigation tree with quick access to each lesson.
+- Render lesson content appropriately in the main pane: embedded video playback (with captions) for video lessons, and readable formatted text for HTML lessons.
+- Package all required assets (CSS, JS, fonts) locally so the experience works without network access.
+
+## 3. User Stories
+1. **As a learner traveling without reliable internet**, I want to open `downloads/<course>/index.html` and continue the course offline, so I can make use of the content anywhere.
+2. **As a downloader maintainer**, I want the generator to fail fast if lesson assets are missing, so I can fix gaps before distributing the course dump.
+3. **As a learner**, I want to jump between lessons quickly using a chapter tree, so I can find specific topics without scrolling through a long page.
+4. **As a learner**, I want video lessons to include captions when available, so I can follow along in noisy environments.
+5. **As a learner**, I want links to attachments (e.g., PDFs) surfaced with each lesson, so I can access supporting materials.
+
+## 4. Functional Requirements
+1. **CLI entrypoint**  
+   - Provide a Python command (e.g., `python -m thinkific_downloader.generate_site <metadata.json>`) that accepts at minimum: path to the metadata JSON, optional `--downloads-dir` override (default `downloads/`), and optional `--output-subdir` name (default the course slug).
+2. **Metadata ingestion and validation**  
+   - Parse the JSON and confirm required keys exist (`course.slug`, `chapters`, `contents`).  
+   - Build an in-memory course model linking chapters to lesson content via IDs.  
+   - Emit actionable errors when the JSON structure is unexpected.
+3. **Asset validation**  
+   - Locate the base course folder at `downloads/<course-slug>/` (configurable via CLI).  
+   - For each lesson, verify the expected asset directory exists (matching lesson slug or already-downloaded folder naming).  
+   - Confirm that required primary assets exist: `.mp4` for videos, `.html` for text lessons, plus optional assets (`.vtt`, PDFs, images).  
+   - Surface a consolidated report of missing assets before generation.
+4. **Output structure**  
+   - Generate a static site rooted at `downloads/<course-slug>/index.html`.  
+   - Place shared assets under a subfolder (e.g., `downloads/<course-slug>/site-assets/`) containing CSS, JS, icons, and fonts (if any).  
+   - Preserve or reuse existing lesson folders; do not modify original media files.
+5. **Navigation UI**  
+   - Render an always-expanded chapter list in the left sidebar reflecting chapter order (`position`) without collapse/expand controls.  
+   - List lessons within each chapter in order, distinguishing lesson types (video vs text) with an icon or label.  
+   - Highlight the currently selected lesson and keep the selection in sync when switching content.
+6. **Lesson rendering**  
+   - For video lessons, embed the local `.mp4` via `<video controls>` and attach `<track>` elements for available `.vtt` caption files when the browser supports them; if caption injection fails, continue without blocking playback.  
+   - Display lesson metadata such as title, duration (when provided by JSON), and any description or summary from the metadata.  
+   - For text lessons, inline the HTML content into the page at build time (e.g., inject sanitized markup into a template or embed via `<template>` tags) so it can render without runtime network/file fetches.  
+   - Detect and list downloadable attachments (e.g., `.pdf`, `.zip`, `.png`) beneath the main content with relative links.
+7. **Client-side behavior**  
+   - Implement navigation without full page reloads (SPA feel) using vanilla JS so switching lessons updates the main pane dynamically.  
+   - Ensure initialization logic selects the first lesson by default and updates the URL hash (optional) for deep linking when feasible offline.  
+   - Handle malformed content gracefully (show fallback message if media fails to load).
+8. **Styling and layout**  
+   - Deliver a responsive layout (desktop optimized, mobile acceptable) using locally bundled CSS. Tailwind utility classes may be mimicked via a pre-generated static CSS file, but no CDN links or build steps at runtime.  
+   - Provide a dark-on-light theme with sufficient contrast, clear typography, and distinct section headers.
+9. **Accessibility**  
+   - Ensure keyboard navigation can move between sidebar items and activate lessons.  
+   - Include labels for screen readers on navigation controls and video players.
+10. **Build idempotency**  
+    - Regeneration should overwrite previously generated site assets deterministically without duplicating content or leaving stale files.  
+    - Provide a `--clean` flag to remove prior generated files before rebuild if necessary.
+
+## 5. Non-Goals (Out of Scope)
+- Tracking learner progress, bookmarking, or syncing state across sessions.
+- Hosting or serving the site via a backend server or adding authentication.  
+- Building or bundling third-party tooling beyond Python standard libraries (no Node/Tailwind build pipeline).  
+- Streaming remote media; all content must remain local.
+
+## 6. Design Considerations
+- Keep the HTML/CSS/JS footprint small; consider hand-crafted CSS or a precompiled utility stylesheet shipped with the generator to approximate Tailwind ergonomics offline.  
+- Use semantic HTML to ensure screen readers work as expected.  
+- Anticipate long chapter/lesson names and ensure they truncate or wrap gracefully in the sidebar.  
+- Consider providing optional keyboard shortcuts for previous/next lesson navigation to improve usability.
+
+## 7. Technical Considerations
+- Leverage existing project structure (`thinkific_downloader` package) for command wiring if possible.  
+- Use Python templating (e.g., `jinja2`) only if already available in dependencies; otherwise, rely on standard-library templating (`string.Template`) or manual composition.  
+- Handle file paths with `pathlib` to simplify cross-platform compatibility.  
+- Guard against browser security limitations when opening `file://` URLs by embedding lesson content directly or via data attributes instead of runtime `fetch` calls.  
+- Ensure generated HTML references assets using relative paths (no absolute `/` paths).  
+- Provide unit coverage for JSON parsing and asset discovery logic, and add an integration smoke test that generates the sample course site into a temp directory.
+
+## 8. Success Metrics
+- Opening `downloads/<course-slug>/index.html` in a modern browser renders the full navigation tree within 2 seconds on the sample course.  
+- 100% of lessons in `beginner-chess-mastery` are reachable and display the correct content type offline.  
+- Generation script returns a non-zero exit code when any required lesson asset is missing.  
+- Manual QA confirms video playback with captions works for at least one lesson that ships `.vtt` files.
+
+## 9. Open Questions
+- None at this time.
diff --git a/docs/tasks/tasks-prd-local-course-viewer.md b/docs/tasks/tasks-prd-local-course-viewer.md
new file mode 100644
index 0000000..bfb01f4
--- /dev/null
+++ b/docs/tasks/tasks-prd-local-course-viewer.md
@@ -0,0 +1,64 @@
+# Local Course Viewer PRD – MVP Task List
+Offline viewer generator that converts downloaded Thinkific courses into a local two-pane site. Plan derived from `docs/tasks/prd-local-course-viewer.md`.
+
+## Relevant Files
+
+- `thinkific_downloader/site_generator.py` - New module to parse course metadata, validate assets, and emit static HTML/CSS/JS.
+- `thinkific_downloader/templates/base.html` - Template for the main layout (sidebar + main pane).
+- `thinkific_downloader/templates/lesson.html` - Template partial used to render individual lesson payloads.
+- `thinkific_downloader/static/viewer.css` - Bundled stylesheet for offline-friendly styling.
+- `thinkific_downloader/static/viewer.js` - Client-side behavior for SPA-style navigation.
+- `thinkific_downloader/__main__.py` - CLI entry point; extend to expose the site generation command.
+- `thinkific_downloader/config.py` - Reuse settings utilities; ensure downloads path configuration hooks in here if needed.
+- `README.md` - Document usage instructions for generating the offline site.
+
+### Notes
+
+- Keep static assets (CSS/JS) referenced with relative URLs so `file://` browsing works.
+- Manual QA (spot-check of video playback and text rendering) is sufficient; automated tests are not required for this pass.
+
+## Tasks
+
+- [ ] 1. Implement course metadata parsing and validation for offline site generation.
+
+- [ ] 1.1 Load the course JSON, map chapters to lessons, and preserve the ordering from Thinkific metadata.
+
+- [ ] 1.2 Model lessons (video vs text) and attachments so downstream renderers know which assets to expect.
+
+- [ ] 1.3 Verify every lesson folder exists under `downloads/<course-slug>/` and report missing media or HTML files before rendering.
+
+- [ ] 1.4 Surface lesson metadata (titles, durations, descriptions) for template consumption.
+
+- [ ] 2. Build static site generation templates, asset pipeline, and lesson rendering logic.
+
+- [ ] 2.1 Create base layout and lesson partial templates for the two-pane interface.
+
+- [ ] 2.2 Produce `viewer.css` with offline-friendly styling (handcrafted Tailwind-like utilities or custom rules).
+
+- [ ] 2.3 Render video lessons with `<video controls>` pointing to local `.mp4` files and attach `<track>` captions when `.vtt` exists.
+
+- [ ] 2.4 Inline HTML/text lesson content safely and list any downloadable attachments (PDFs, etc.) with relative links.
+
+- [ ] 2.5 Write idempotent file output routines that place assets under `downloads/<course-slug>/` and optionally clear prior builds when `--clean` is passed.
+
+- [ ] 3. Add client-side navigation behavior and accessibility polish for the generated site.
+
+- [ ] 3.1 Implement `viewer.js` to swap lessons in the main pane without page reloads, updating the active state in the sidebar.
+
+- [ ] 3.2 Default to the first lesson on load, and optionally sync selection with `location.hash` for deep linking.
+
+- [ ] 3.3 Ensure video playback resets or pauses when switching lessons to avoid overlapping audio.
+
+- [ ] 3.4 Provide keyboard navigation and ARIA labeling for sidebar items and focus management around the video element.
+
+- [ ] 3.5 Display graceful fallback messaging if a media element fails to load.
+
+- [ ] 4. Wire the generator into a CLI command with configuration options and regeneration handling.
+
+- [ ] 4.1 Add a `generate-site` CLI entry point (e.g., `python -m thinkific_downloader generate-site <metadata.json>`).
+
+- [ ] 4.2 Support flags for downloads root override, output subdirectory selection, dry-run validation, and `--clean`.
+
+- [ ] 4.3 Align CLI logging with existing downloader tone (progress banners, validation warnings, success summary).
+
+- [ ] 4.4 Exit with non-zero status when validation fails or generation encounters missing assets.

From 398e9580dbdd6964103b2d12219460ff31ca27e5 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Tue, 14 Oct 2025 18:03:14 -0400
Subject: [PATCH 15/19] Implement offline course loader and static site
 scaffolding

---
 .gitignore                                 |   3 +
 thinkific_downloader/site_generator.py     | 698 +++++++++++++++++++++
 thinkific_downloader/static/viewer.css     | 277 ++++++++
 thinkific_downloader/static/viewer.js      |  25 +
 thinkific_downloader/templates/base.html   |  28 +
 thinkific_downloader/templates/lesson.html |  10 +
 6 files changed, 1041 insertions(+)
 create mode 100644 thinkific_downloader/site_generator.py
 create mode 100644 thinkific_downloader/static/viewer.css
 create mode 100644 thinkific_downloader/static/viewer.js
 create mode 100644 thinkific_downloader/templates/base.html
 create mode 100644 thinkific_downloader/templates/lesson.html

diff --git a/.gitignore b/.gitignore
index a7fc79a..bb7c60f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -85,6 +85,9 @@ downloads/
 *.mkv
 ffmpeg.log
 
+# Allow HTML templates used by the offline site generator
+!thinkific_downloader/templates/*.html
+
 # But allow certain JSON files
 !package.json
 !requirements.json
diff --git a/thinkific_downloader/site_generator.py b/thinkific_downloader/site_generator.py
new file mode 100644
index 0000000..9a3f4fe
--- /dev/null
+++ b/thinkific_downloader/site_generator.py
@@ -0,0 +1,698 @@
+"""
+Utilities for turning a downloaded Thinkific course into an offline static website.
+
+This module currently focuses on:
+1. Parsing course metadata JSON files and validating that the associated local assets
+   (videos, text lessons, attachments) exist.
+2. Rendering a basic two-pane static site (HTML + CSS + JS stubs) that can be opened
+   directly from the filesystem.
+
+Further CLI plumbing and richer client-side behaviour will be added in subsequent steps.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import shutil
+from dataclasses import dataclass, field
+from datetime import datetime
+from html import escape as html_escape
+from pathlib import Path
+from string import Template
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+from urllib.parse import quote
+
+from .file_utils import filter_filename
+
+# File categorisation helpers
+VIDEO_EXTENSIONS = {".mp4", ".m4v", ".mov", ".webm"}
+CAPTION_EXTENSIONS = {".vtt", ".srt"}
+TEXT_EXTENSIONS = {".html", ".htm"}
+IGNORED_FILENAMES = {".ds_store"}
+
+LESSON_SUFFIX_PATTERN = re.compile(r"[._-](lesson|text)$", re.IGNORECASE)
+NUMERIC_PREFIX_PATTERN = re.compile(r"^\d+\.?\s*")
+
+
+class SiteGenerationError(Exception):
+    """Collects validation failures encountered during site generation."""
+
+    def __init__(self, errors: Sequence[str]):
+        self.errors = list(errors)
+        message = "Site generation encountered issues:\n" + "\n".join(f"- {err}" for err in self.errors)
+        super().__init__(message)
+
+
+@dataclass
+class LessonAssets:
+    """Represents the local files associated with a lesson."""
+
+    videos: List[Path] = field(default_factory=list)
+    captions: List[Path] = field(default_factory=list)
+    html_file: Optional[Path] = None
+    attachments: List[Path] = field(default_factory=list)
+
+
+@dataclass
+class Lesson:
+    """Course lesson enriched with local filesystem references."""
+
+    id: int
+    name: str
+    slug: str
+    position: int
+    chapter_id: int
+    lesson_type: str  # "video" or "text"
+    display_name: str
+    duration_seconds: Optional[int]
+    description: Optional[str]
+    directory: Path
+    assets: LessonAssets = field(default_factory=LessonAssets)
+
+    @property
+    def is_video(self) -> bool:
+        return self.lesson_type == "video"
+
+    @property
+    def is_text(self) -> bool:
+        return self.lesson_type == "text"
+
+
+@dataclass
+class Chapter:
+    """A Thinkific chapter containing lessons."""
+
+    id: int
+    name: str
+    position: int
+    directory: Path
+    lessons: List[Lesson] = field(default_factory=list)
+
+
+@dataclass
+class Course:
+    """Top-level course representation ready for rendering."""
+
+    id: int
+    name: str
+    slug: str
+    output_dir: Path
+    metadata_path: Path
+    landing_page_url: Optional[str]
+    chapters: List[Chapter] = field(default_factory=list)
+
+    def iter_lessons(self) -> Iterable[Lesson]:
+        for chapter in self.chapters:
+            yield from chapter.lessons
+
+    @property
+    def first_lesson(self) -> Optional[Lesson]:
+        for chapter in self.chapters:
+            if chapter.lessons:
+                return chapter.lessons[0]
+        return None
+
+
+def load_course(metadata_path: Path | str, downloads_root: Path | str | None = None) -> Course:
+    """
+    Load course metadata and validate the presence of corresponding local assets.
+
+    :param metadata_path: Path to the Thinkific course JSON dump.
+    :param downloads_root: Optional override for the downloads directory root.
+    :returns: Course model containing chapters, lessons, and asset references.
+    :raises SiteGenerationError: if required assets are missing or structure mismatches are detected.
+    """
+    metadata_path = Path(metadata_path)
+    if downloads_root is None:
+        downloads_root = metadata_path.parent / "downloads"
+    downloads_root = Path(downloads_root)
+
+    if not metadata_path.exists():
+        raise FileNotFoundError(f"Metadata file not found: {metadata_path}")
+
+    with metadata_path.open("r", encoding="utf-8") as fh:
+        data = json.load(fh)
+
+    course_info = data.get("course") or {}
+    course_slug = course_info.get("slug")
+    if not course_slug:
+        raise SiteGenerationError(["Course slug missing from metadata."])
+
+    course_dir = downloads_root / course_slug
+
+    errors: List[str] = []
+    if not course_dir.exists():
+        errors.append(f"Course directory not found: {course_dir}")
+
+    contents_map: Dict[int, Dict] = {content["id"]: content for content in data.get("contents", [])}
+    chapters: List[Chapter] = []
+
+    for chapter_data in sorted(data.get("chapters", []), key=lambda c: c.get("position", 0)):
+        chapter_id = chapter_data.get("id")
+        chapter_name = chapter_data.get("name", f"Chapter {chapter_id}")
+        chapter_position = chapter_data.get("position", 0)
+        chapter_dir_name = f"{chapter_position + 1}. {filter_filename(chapter_name)}"
+        chapter_dir = course_dir / chapter_dir_name
+
+        if not chapter_dir.exists():
+            errors.append(
+                f"Missing chapter directory for '{chapter_name}' (expected '{chapter_dir_name}')"
+            )
+            continue
+
+        chapter = Chapter(
+            id=chapter_id,
+            name=chapter_name,
+            position=chapter_position,
+            directory=chapter_dir,
+            lessons=[],
+        )
+
+        lesson_dirs = sorted(
+            [entry for entry in chapter_dir.iterdir() if entry.is_dir()],
+            key=lambda path: path.name.lower(),
+        )
+        claimed_dirs: set[Path] = set()
+
+        lesson_ids = chapter_data.get("content_ids", [])
+        lessons_for_chapter = [
+            contents_map.get(lesson_id) for lesson_id in lesson_ids if contents_map.get(lesson_id)
+        ]
+        lessons_for_chapter.sort(key=lambda lesson: lesson.get("position", 0))
+
+        for index, lesson_data in enumerate(lessons_for_chapter):
+            content_type = lesson_data.get("contentable_type")
+            if content_type not in {"Lesson", "HtmlItem"}:
+                # Ignore unsupported content types (quizzes, surveys, etc.) for now.
+                continue
+
+            lesson_name = lesson_data.get("name", f"Lesson {lesson_data.get('id')}")
+            lesson_kind = _classify_lesson_type(lesson_data)
+
+            lesson_dir = _find_lesson_directory(
+                lesson_dirs=lesson_dirs,
+                claimed_dirs=claimed_dirs,
+                lesson_name=lesson_name,
+                lesson_index=index,
+            )
+
+            if lesson_dir is None:
+                errors.append(
+                    f"Missing lesson directory for '{lesson_name}' in chapter '{chapter_name}'"
+                )
+                continue
+
+            assets = _scan_lesson_assets(lesson_dir)
+
+            if lesson_kind == "video" and not assets.videos:
+                errors.append(
+                    f"Video files not found for lesson '{lesson_name}' at {lesson_dir}"
+                )
+            if lesson_kind == "text" and assets.html_file is None:
+                errors.append(
+                    f"HTML content not found for text lesson '{lesson_name}' at {lesson_dir}"
+                )
+
+            duration_seconds = _extract_duration_seconds(lesson_data)
+            description = _extract_description(lesson_data)
+
+            lesson = Lesson(
+                id=lesson_data.get("id"),
+                name=lesson_name,
+                slug=lesson_data.get("slug", filter_filename(lesson_name)),
+                position=lesson_data.get("position", index),
+                chapter_id=chapter_id,
+                lesson_type=lesson_kind,
+                display_name=lesson_data.get("display_name", lesson_kind.title()),
+                duration_seconds=duration_seconds,
+                description=description,
+                directory=lesson_dir,
+                assets=assets,
+            )
+            chapter.lessons.append(lesson)
+
+        chapters.append(chapter)
+
+    if not chapters:
+        errors.append("No chapters discovered in metadata.")
+
+    total_lessons = sum(len(chapter.lessons) for chapter in chapters)
+    if total_lessons == 0:
+        errors.append("No lessons were successfully mapped to local directories.")
+
+    if errors:
+        raise SiteGenerationError(errors)
+
+    return Course(
+        id=course_info.get("id"),
+        name=course_info.get("name", "Thinkific Course"),
+        slug=course_slug,
+        output_dir=course_dir,
+        metadata_path=metadata_path,
+        landing_page_url=course_info.get("landing_page_url"),
+        chapters=chapters,
+    )
+
+
+def generate_site(
+    metadata_path: Path | str,
+    downloads_root: Path | str | None = None,
+    output_dir: Path | str | None = None,
+    *,
+    clean: bool = False,
+    assets_dirname: str = "site-assets",
+) -> Path:
+    """
+    High-level helper that loads a course and renders the static site.
+
+    :returns: Path to the generated index.html file.
+    """
+    course = load_course(metadata_path, downloads_root=downloads_root)
+    target_dir = Path(output_dir) if output_dir else course.output_dir
+    target_dir.mkdir(parents=True, exist_ok=True)
+    _render_course(
+        course=course,
+        output_dir=target_dir,
+        clean=clean,
+        assets_dirname=assets_dirname,
+    )
+    return target_dir / "index.html"
+
+
+def _render_course(course: Course, output_dir: Path, *, clean: bool, assets_dirname: str) -> None:
+    """Render HTML/CSS/JS assets for a course."""
+    templates_dir = Path(__file__).with_name("templates")
+    static_dir = Path(__file__).with_name("static")
+
+    assets_dir = output_dir / assets_dirname
+
+    if clean and assets_dir.exists():
+        shutil.rmtree(assets_dir)
+    assets_dir.mkdir(parents=True, exist_ok=True)
+
+    if clean:
+        index_path = output_dir / "index.html"
+        if index_path.exists():
+            index_path.unlink()
+
+    # Copy static assets
+    (assets_dir / "viewer.css").write_text(
+        (static_dir / "viewer.css").read_text(encoding="utf-8"),
+        encoding="utf-8",
+    )
+    (assets_dir / "viewer.js").write_text(
+        (static_dir / "viewer.js").read_text(encoding="utf-8"),
+        encoding="utf-8",
+    )
+
+    # Prepare template fragments
+    base_template = Template((templates_dir / "base.html").read_text(encoding="utf-8"))
+    lesson_template = Template((templates_dir / "lesson.html").read_text(encoding="utf-8"))
+
+    sidebar_html = _render_sidebar(course)
+    lesson_templates_html, initial_lesson_html = _render_lessons(
+        course=course,
+        lesson_template=lesson_template,
+        output_dir=output_dir,
+        assets_dirname=assets_dirname,
+    )
+
+    course_payload = _build_course_payload(course)
+
+    subtitle_html = ""
+    if course.landing_page_url:
+        subtitle_html = f'<p class="course-link">Original: <a href="{html_escape(course.landing_page_url)}">{html_escape(course.landing_page_url)}</a></p>'
+
+    index_html = base_template.substitute(
+        title=html_escape(course.name),
+        subtitle=subtitle_html,
+        sidebar=sidebar_html,
+        initial_lesson=initial_lesson_html,
+        lesson_templates=lesson_templates_html,
+        course_json=json.dumps(course_payload, ensure_ascii=False),
+        css_path=f"{assets_dirname}/viewer.css",
+        js_path=f"{assets_dirname}/viewer.js",
+    )
+
+    (output_dir / "index.html").write_text(index_html, encoding="utf-8")
+
+    manifest = {
+        "generated_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
+        "course": {
+            "id": course.id,
+            "name": course.name,
+            "slug": course.slug,
+        },
+        "files": [
+            "index.html",
+            f"{assets_dirname}/viewer.css",
+            f"{assets_dirname}/viewer.js",
+        ],
+        "lessons": [
+            {
+                "id": lesson.id,
+                "name": lesson.name,
+                "type": lesson.lesson_type,
+                "directory": str(lesson.directory.relative_to(output_dir)),
+            }
+            for lesson in course.iter_lessons()
+        ],
+    }
+    (assets_dir / "manifest.json").write_text(
+        json.dumps(manifest, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+
+
+def _render_sidebar(course: Course) -> str:
+    """Create the sidebar navigation markup."""
+    lines: List[str] = [
+        '<nav class="sidebar-nav" aria-label="Course navigation">',
+    ]
+    active_lesson_id = course.first_lesson.id if course.first_lesson else None
+
+    for chapter in course.chapters:
+        lines.append(
+            f'  <section class="sidebar-chapter" data-chapter-id="{chapter.id}">'
+        )
+        lines.append(f'    <h2 class="chapter-title">{html_escape(chapter.name)}</h2>')
+        lines.append('    <ol class="lesson-list">')
+        for lesson in chapter.lessons:
+            is_active = " is-active" if lesson.id == active_lesson_id else ""
+            lines.append(
+                "      <li>"
+                f'<button type="button" class="lesson-link{is_active}" '
+                f'data-lesson-id="{lesson.id}" data-lesson-type="{lesson.lesson_type}">'
+                f"{html_escape(lesson.name)}</button>"
+                "</li>"
+            )
+        lines.append("    </ol>")
+        lines.append("  </section>")
+    lines.append("</nav>")
+    return "\n".join(lines)
+
+
+def _render_lessons(
+    course: Course,
+    lesson_template: Template,
+    output_dir: Path,
+    assets_dirname: str,
+) -> Tuple[str, str]:
+    """Render lesson templates and return (templates_html, initial_lesson_html)."""
+    templates: List[str] = ['<div id="lesson-templates" hidden>']
+    initial_html = ""
+    first_lesson_id = course.first_lesson.id if course.first_lesson else None
+
+    for lesson in course.iter_lessons():
+        lesson_html = _render_lesson(
+            lesson=lesson,
+            template=lesson_template,
+            output_dir=output_dir,
+            assets_dirname=assets_dirname,
+        )
+        templates.append(
+            f'<template id="lesson-template-{lesson.id}">{lesson_html}</template>'
+        )
+        if lesson.id == first_lesson_id and not initial_html:
+            initial_html = lesson_html
+
+    templates.append("</div>")
+    return "\n".join(templates), initial_html
+
+
+def _render_lesson(lesson: Lesson, template: Template, output_dir: Path, assets_dirname: str) -> str:
+    """Render a single lesson section."""
+    body_html = _render_lesson_body(lesson, output_dir)
+    attachments_html = _render_attachments(lesson, output_dir)
+
+    meta_fragments: List[str] = []
+    if lesson.duration_seconds:
+        meta_fragments.append(
+            f'<span class="lesson-duration">{_format_duration(lesson.duration_seconds)}</span>'
+        )
+    if lesson.description:
+        meta_fragments.append(
+            f'<p class="lesson-description">{html_escape(lesson.description)}</p>'
+        )
+    lesson_meta = ""
+    if meta_fragments:
+        lesson_meta = '<div class="lesson-meta">' + "".join(meta_fragments) + "</div>"
+
+    return template.substitute(
+        lesson_id=lesson.id,
+        lesson_type=lesson.lesson_type,
+        lesson_title=html_escape(lesson.name),
+        lesson_meta=lesson_meta,
+        lesson_body=body_html,
+        attachments=attachments_html,
+    )
+
+
+def _render_lesson_body(lesson: Lesson, output_dir: Path) -> str:
+    """Generate the primary lesson content markup."""
+    if lesson.is_video and lesson.assets.videos:
+        video_sources = []
+        for video_path in lesson.assets.videos:
+            rel_url = _relative_url(video_path, output_dir)
+            video_sources.append(f'<source src="{rel_url}" type="video/mp4">')
+
+        caption_tracks = []
+        for caption in lesson.assets.captions:
+            srclang, label = _guess_caption_language(caption)
+            caption_tracks.append(
+                f'<track src="{_relative_url(caption, output_dir)}" kind="subtitles" srclang="{srclang}" label="{label}">'
+            )
+
+        return (
+            '<div class="video-wrapper">'
+            '<video class="lesson-video" controls preload="metadata">'
+            + "".join(video_sources)
+            + "".join(caption_tracks)
+            + "Sorry, your browser does not support embedded videos."
+            "</video>"
+            "</div>"
+        )
+
+    if lesson.is_text and lesson.assets.html_file:
+        html_content = lesson.assets.html_file.read_text(encoding="utf-8")
+        return f'<article class="lesson-article">{html_content}</article>'
+
+    return (
+        '<div class="lesson-unavailable">'
+        "<p>This lesson type is not yet supported for offline viewing.</p>"
+        "</div>"
+    )
+
+
+def _render_attachments(lesson: Lesson, output_dir: Path) -> str:
+    """Render lesson attachment links, if any."""
+    if not lesson.assets.attachments:
+        return ""
+
+    items = []
+    for attachment in lesson.assets.attachments:
+        rel_url = _relative_url(attachment, output_dir)
+        items.append(
+            f'<li><a class="attachment-link" href="{rel_url}" download>{html_escape(attachment.name)}</a></li>'
+        )
+    return (
+        '<section class="lesson-attachments">'
+        "<h3>Downloads</h3>"
+        "<ul>"
+        + "".join(items)
+        + "</ul>"
+        "</section>"
+    )
+
+
+def _build_course_payload(course: Course) -> Dict:
+    """Build a lightweight JSON payload for client-side consumption."""
+    payload = {
+        "id": course.id,
+        "name": course.name,
+        "slug": course.slug,
+        "chapters": [],
+    }
+    for chapter in course.chapters:
+        payload["chapters"].append(
+            {
+                "id": chapter.id,
+                "name": chapter.name,
+                "lessons": [
+                    {
+                        "id": lesson.id,
+                        "name": lesson.name,
+                        "type": lesson.lesson_type,
+                    }
+                    for lesson in chapter.lessons
+                ],
+            }
+        )
+    return payload
+
+
+def _classify_lesson_type(lesson_data: Dict) -> str:
+    """Normalise lesson type labels from metadata."""
+    label = (
+        lesson_data.get("lesson_type_label")
+        or lesson_data.get("display_name")
+        or ""
+    ).lower()
+    content_type = (lesson_data.get("contentable_type") or "").lower()
+    if "video" in label:
+        return "video"
+    if "text" in label or "html" in label or content_type == "htmlitem":
+        return "text"
+    if content_type == "lesson":
+        return "video"
+    return "other"
+
+
+def _find_lesson_directory(
+    lesson_dirs: List[Path],
+    claimed_dirs: set[Path],
+    lesson_name: str,
+    lesson_index: int,
+) -> Optional[Path]:
+    """Find the best matching directory for a lesson by name and order."""
+    target_key = _normalise_dir_key(lesson_name)
+
+    # First pass: exact match on the normalised directory name.
+    for directory in lesson_dirs:
+        if directory in claimed_dirs:
+            continue
+        if _normalise_existing_dir(directory.name) == target_key:
+            claimed_dirs.add(directory)
+            return directory
+
+    # Second pass: substring overlap.
+    for directory in lesson_dirs:
+        if directory in claimed_dirs:
+            continue
+        existing_key = _normalise_existing_dir(directory.name)
+        if target_key in existing_key or existing_key in target_key:
+            claimed_dirs.add(directory)
+            return directory
+
+    # Fallback: choose by ordering to keep generation moving.
+    for directory in lesson_dirs:
+        if directory not in claimed_dirs:
+            claimed_dirs.add(directory)
+            return directory
+
+    return None
+
+
+def _normalise_existing_dir(name: str) -> str:
+    """Normalise an existing directory name down to its semantic slug."""
+    name = NUMERIC_PREFIX_PATTERN.sub("", name)
+    name = LESSON_SUFFIX_PATTERN.sub("", name)
+    return filter_filename(name)
+
+
+def _normalise_dir_key(name: str) -> str:
+    """Normalise metadata lesson names to align with directory naming conventions."""
+    return filter_filename(name)
+
+
+def _scan_lesson_assets(lesson_dir: Path) -> LessonAssets:
+    """Inspect a lesson directory and categorise its files."""
+    videos: List[Path] = []
+    captions: List[Path] = []
+    html_files: List[Path] = []
+    attachments: List[Path] = []
+
+    for file_path in sorted(lesson_dir.iterdir(), key=lambda p: p.name.lower()):
+        if not file_path.is_file():
+            continue
+        if file_path.name.lower() in IGNORED_FILENAMES:
+            continue
+
+        suffix = file_path.suffix.lower()
+        if suffix in VIDEO_EXTENSIONS:
+            videos.append(file_path)
+            continue
+        if suffix in CAPTION_EXTENSIONS:
+            captions.append(file_path)
+            continue
+        if suffix in TEXT_EXTENSIONS:
+            html_files.append(file_path)
+            continue
+
+        attachments.append(file_path)
+
+    primary_html = html_files[0] if html_files else None
+    # Treat additional HTML files as attachments to keep them accessible.
+    for extra_html in html_files[1:]:
+        attachments.append(extra_html)
+
+    return LessonAssets(
+        videos=videos,
+        captions=captions,
+        html_file=primary_html,
+        attachments=attachments,
+    )
+
+
+def _relative_url(path: Path, base: Path) -> str:
+    """Convert an absolute path to a file:// friendly relative URL."""
+    try:
+        relative_path = path.relative_to(base)
+    except ValueError:
+        relative_path = path
+    return quote(str(relative_path).replace("\\", "/"))
+
+
+def _guess_caption_language(path: Path) -> Tuple[str, str]:
+    """Heuristically derive subtitle metadata from the filename."""
+    stem = path.stem
+    if "." in stem:
+        lang = stem.split(".")[-1]
+    else:
+        lang = "en"
+    lang = lang.lower()
+    label = lang.upper()
+    return lang, label
+
+
+def _format_duration(seconds: int | float) -> str:
+    """Render a human-friendly duration string."""
+    total_seconds = int(float(seconds))
+    hours, remainder = divmod(total_seconds, 3600)
+    minutes, secs = divmod(remainder, 60)
+    if hours:
+        return f"{hours:d}:{minutes:02d}:{secs:02d}"
+    return f"{minutes:d}:{secs:02d}"
+
+
+def _extract_duration_seconds(lesson_data: Dict) -> Optional[int]:
+    """Extract duration from metadata when available."""
+    meta = lesson_data.get("meta_data") or {}
+    duration = meta.get("duration_in_seconds")
+    if duration is None:
+        return None
+    try:
+        return int(float(duration))
+    except (TypeError, ValueError):
+        return None
+
+
+def _extract_description(lesson_data: Dict) -> Optional[str]:
+    """Pull optional description fields from lesson metadata."""
+    return (
+        lesson_data.get("description")
+        or (lesson_data.get("meta_data") or {}).get("description")
+        or None
+    )
+
+
+__all__ = [
+    "Course",
+    "Chapter",
+    "Lesson",
+    "LessonAssets",
+    "SiteGenerationError",
+    "generate_site",
+    "load_course",
+]
diff --git a/thinkific_downloader/static/viewer.css b/thinkific_downloader/static/viewer.css
new file mode 100644
index 0000000..85aefec
--- /dev/null
+++ b/thinkific_downloader/static/viewer.css
@@ -0,0 +1,277 @@
+:root {
+  color-scheme: light;
+  font-family: "Inter", "Segoe UI", -apple-system, BlinkMacSystemFont, "Helvetica Neue", Arial, sans-serif;
+  --sidebar-bg: #0f172a;
+  --sidebar-text: #e2e8f0;
+  --sidebar-accent: #38bdf8;
+  --content-bg: #f8fafc;
+  --content-text: #0f172a;
+  --divider: rgba(15, 23, 42, 0.12);
+}
+
+*,
+*::before,
+*::after {
+  box-sizing: border-box;
+}
+
+body {
+  margin: 0;
+  background: var(--content-bg);
+  color: var(--content-text);
+  line-height: 1.6;
+}
+
+a {
+  color: var(--sidebar-accent);
+  text-decoration: none;
+}
+
+a:hover,
+a:focus {
+  text-decoration: underline;
+}
+
+.viewer {
+  display: flex;
+  min-height: 100vh;
+  width: 100%;
+}
+
+.sidebar {
+  width: 320px;
+  background: var(--sidebar-bg);
+  color: var(--sidebar-text);
+  padding: 24px;
+  display: flex;
+  flex-direction: column;
+  gap: 24px;
+}
+
+.course-header {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+}
+
+.course-title {
+  margin: 0;
+  font-size: 1.5rem;
+  font-weight: 600;
+  letter-spacing: 0.01em;
+}
+
+.course-link {
+  margin: 0;
+  font-size: 0.85rem;
+  color: rgba(226, 232, 240, 0.72);
+  word-break: break-word;
+}
+
+.sidebar-nav {
+  flex: 1 1 auto;
+  overflow-y: auto;
+  padding-right: 4px;
+}
+
+.sidebar-chapter + .sidebar-chapter {
+  margin-top: 24px;
+}
+
+.chapter-title {
+  margin: 0 0 8px;
+  font-size: 0.95rem;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: rgba(226, 232, 240, 0.8);
+}
+
+.lesson-list {
+  list-style: none;
+  margin: 0;
+  padding: 0;
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.lesson-link {
+  width: 100%;
+  border: 0;
+  background: transparent;
+  color: inherit;
+  text-align: left;
+  padding: 8px 12px;
+  border-radius: 6px;
+  font-size: 0.95rem;
+  line-height: 1.4;
+  cursor: pointer;
+  transition: background 0.12s ease, color 0.12s ease;
+}
+
+.lesson-link:hover,
+.lesson-link:focus {
+  outline: none;
+  background: rgba(148, 163, 184, 0.18);
+  color: #fff;
+}
+
+.lesson-link.is-active {
+  background: rgba(56, 189, 248, 0.16);
+  color: #fff;
+}
+
+.content-pane {
+  flex: 1 1 auto;
+  padding: 32px 48px;
+  overflow-y: auto;
+  background: var(--content-bg);
+  color: var(--content-text);
+}
+
+.lesson {
+  max-width: 900px;
+  margin: 0 auto;
+  display: flex;
+  flex-direction: column;
+  gap: 24px;
+}
+
+.lesson-header {
+  border-bottom: 1px solid var(--divider);
+  padding-bottom: 12px;
+}
+
+.lesson-title {
+  margin: 0 0 8px;
+  font-size: 1.75rem;
+  font-weight: 600;
+  line-height: 1.3;
+}
+
+.lesson-meta {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px 16px;
+  font-size: 0.9rem;
+  color: rgba(15, 23, 42, 0.72);
+}
+
+.lesson-duration {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+}
+
+.lesson-duration::before {
+  content: "⏱";
+  font-size: 0.85rem;
+}
+
+.lesson-description {
+  margin: 0;
+}
+
+.lesson-content {
+  display: flex;
+  flex-direction: column;
+  gap: 24px;
+}
+
+.video-wrapper {
+  background: #000;
+  border-radius: 12px;
+  overflow: hidden;
+  box-shadow: 0 18px 40px rgba(15, 23, 42, 0.18);
+}
+
+.lesson-video {
+  width: 100%;
+  height: auto;
+  display: block;
+  background: #000;
+}
+
+.lesson-article {
+  font-size: 1.05rem;
+  line-height: 1.7;
+  color: inherit;
+}
+
+.lesson-article img,
+.lesson-article video,
+.lesson-article iframe {
+  max-width: 100%;
+  height: auto;
+}
+
+.lesson-article h1,
+.lesson-article h2,
+.lesson-article h3,
+.lesson-article h4,
+.lesson-article h5,
+.lesson-article h6 {
+  color: var(--content-text);
+  margin-top: 1.6em;
+}
+
+.lesson-article p {
+  margin: 1em 0;
+}
+
+.lesson-attachments {
+  border-top: 1px solid var(--divider);
+  padding-top: 16px;
+}
+
+.lesson-attachments h3 {
+  margin: 0 0 8px;
+  font-size: 1rem;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+}
+
+.lesson-attachments ul {
+  list-style: none;
+  margin: 0;
+  padding: 0;
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+}
+
+.attachment-link {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  font-size: 0.95rem;
+}
+
+.attachment-link::before {
+  content: "⬇";
+  font-size: 0.85rem;
+}
+
+.lesson-unavailable {
+  padding: 16px;
+  border-radius: 8px;
+  background: rgba(56, 189, 248, 0.15);
+}
+
+@media (max-width: 960px) {
+  .viewer {
+    flex-direction: column;
+  }
+
+  .sidebar {
+    width: 100%;
+    max-height: 320px;
+    overflow-y: auto;
+    border-bottom: 1px solid rgba(148, 163, 184, 0.24);
+  }
+
+  .content-pane {
+    padding: 24px 20px;
+  }
+}
diff --git a/thinkific_downloader/static/viewer.js b/thinkific_downloader/static/viewer.js
new file mode 100644
index 0000000..2494be8
--- /dev/null
+++ b/thinkific_downloader/static/viewer.js
@@ -0,0 +1,25 @@
+(function () {
+  function onReady(fn) {
+    if (document.readyState === "loading") {
+      document.addEventListener("DOMContentLoaded", fn, { once: true });
+    } else {
+      fn();
+    }
+  }
+
+  function initViewer() {
+    const container = document.getElementById("lesson-container");
+    if (container) {
+      container.setAttribute("data-viewer-initialised", "true");
+      // Focus main pane for accessibility when opening directly from the filesystem.
+      container.focus({ preventScroll: true });
+    }
+
+    // Placeholder for richer navigation that will be implemented later.
+    if (typeof console !== "undefined" && console.info) {
+      console.info("Thinkific offline viewer ready. Interactive navigation will be wired up later.");
+    }
+  }
+
+  onReady(initViewer);
+})();
diff --git a/thinkific_downloader/templates/base.html b/thinkific_downloader/templates/base.html
new file mode 100644
index 0000000..4986e9d
--- /dev/null
+++ b/thinkific_downloader/templates/base.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>$title</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="stylesheet" href="$css_path">
+  </head>
+  <body>
+    <div class="viewer">
+      <aside class="sidebar">
+        <div class="course-header">
+          <h1 class="course-title">$title</h1>
+          $subtitle
+        </div>
+        $sidebar
+      </aside>
+      <main class="content-pane" id="lesson-container" tabindex="-1">
+        $initial_lesson
+      </main>
+    </div>
+    $lesson_templates
+    <script>
+      window.__COURSE_DATA__ = $course_json;
+    </script>
+    <script src="$js_path"></script>
+  </body>
+</html>
diff --git a/thinkific_downloader/templates/lesson.html b/thinkific_downloader/templates/lesson.html
new file mode 100644
index 0000000..b0a121f
--- /dev/null
+++ b/thinkific_downloader/templates/lesson.html
@@ -0,0 +1,10 @@
+<section class="lesson" data-lesson-id="$lesson_id" data-lesson-type="$lesson_type">
+  <header class="lesson-header">
+    <h2 class="lesson-title">$lesson_title</h2>
+    $lesson_meta
+  </header>
+  <div class="lesson-content">
+    $lesson_body
+  </div>
+  $attachments
+</section>

From 8b02694926e61ef0ba1fa3e0bc8a55ba169f3ad1 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Wed, 15 Oct 2025 09:44:30 -0400
Subject: [PATCH 16/19] Add client-side lesson navigation for offline viewer

---
 thinkific_downloader/site_generator.py |   5 +-
 thinkific_downloader/static/viewer.js  | 172 ++++++++++++++++++++++++-
 2 files changed, 166 insertions(+), 11 deletions(-)

diff --git a/thinkific_downloader/site_generator.py b/thinkific_downloader/site_generator.py
index 9a3f4fe..b0b5dc9 100644
--- a/thinkific_downloader/site_generator.py
+++ b/thinkific_downloader/site_generator.py
@@ -315,7 +315,6 @@ def _render_course(course: Course, output_dir: Path, *, clean: bool, assets_dirn
         course=course,
         lesson_template=lesson_template,
         output_dir=output_dir,
-        assets_dirname=assets_dirname,
     )
 
     course_payload = _build_course_payload(course)
@@ -397,7 +396,6 @@ def _render_lessons(
     course: Course,
     lesson_template: Template,
     output_dir: Path,
-    assets_dirname: str,
 ) -> Tuple[str, str]:
     """Render lesson templates and return (templates_html, initial_lesson_html)."""
     templates: List[str] = ['<div id="lesson-templates" hidden>']
@@ -409,7 +407,6 @@ def _render_lessons(
             lesson=lesson,
             template=lesson_template,
             output_dir=output_dir,
-            assets_dirname=assets_dirname,
         )
         templates.append(
             f'<template id="lesson-template-{lesson.id}">{lesson_html}</template>'
@@ -421,7 +418,7 @@ def _render_lessons(
     return "\n".join(templates), initial_html
 
 
-def _render_lesson(lesson: Lesson, template: Template, output_dir: Path, assets_dirname: str) -> str:
+def _render_lesson(lesson: Lesson, template: Template, output_dir: Path) -> str:
     """Render a single lesson section."""
     body_html = _render_lesson_body(lesson, output_dir)
     attachments_html = _render_attachments(lesson, output_dir)
diff --git a/thinkific_downloader/static/viewer.js b/thinkific_downloader/static/viewer.js
index 2494be8..97620d5 100644
--- a/thinkific_downloader/static/viewer.js
+++ b/thinkific_downloader/static/viewer.js
@@ -1,4 +1,6 @@
 (function () {
+  var LESSON_HASH_PREFIX = "lesson-";
+
   function onReady(fn) {
     if (document.readyState === "loading") {
       document.addEventListener("DOMContentLoaded", fn, { once: true });
@@ -8,17 +10,173 @@
   }
 
   function initViewer() {
-    const container = document.getElementById("lesson-container");
-    if (container) {
-      container.setAttribute("data-viewer-initialised", "true");
-      // Focus main pane for accessibility when opening directly from the filesystem.
+    var container = document.getElementById("lesson-container");
+    var sidebar = document.querySelector(".sidebar-nav");
+    var templatesRoot = document.getElementById("lesson-templates");
+
+    if (!container || !sidebar || !templatesRoot) {
+      return;
+    }
+
+    container.setAttribute("data-viewer-initialised", "true");
+
+    var lessonButtons = Array.prototype.slice.call(
+      sidebar.querySelectorAll(".lesson-link")
+    );
+    if (!lessonButtons.length) {
+      return;
+    }
+
+    var state = {
+      currentLessonId: null,
+    };
+
+    function getTemplateForLesson(lessonId) {
+      return document.getElementById("lesson-template-" + lessonId);
+    }
+
+    function getHashLessonId(hash) {
+      if (!hash) return null;
+      var value = hash.charAt(0) === "#" ? hash.slice(1) : hash;
+      if (value.indexOf(LESSON_HASH_PREFIX) !== 0) return null;
+      return value.slice(LESSON_HASH_PREFIX.length);
+    }
+
+    function updateHash(lessonId, fromHash) {
+      if (fromHash) return;
+      var targetHash = "#" + LESSON_HASH_PREFIX + lessonId;
+      if (window.history && window.history.replaceState) {
+        window.history.replaceState(null, "", targetHash);
+      } else {
+        window.location.hash = targetHash;
+      }
+    }
+
+    function pauseActiveMedia() {
+      var media = container.querySelectorAll("video, audio");
+      for (var i = 0; i < media.length; i += 1) {
+        try {
+          media[i].pause();
+          media[i].currentTime = 0;
+        } catch (err) {
+          /* noop */
+        }
+      }
+    }
+
+    function setActiveButton(lessonId) {
+      lessonButtons.forEach(function (button) {
+        var isActive = button.dataset.lessonId === String(lessonId);
+        button.classList.toggle("is-active", isActive);
+        if (isActive) {
+          button.setAttribute("aria-current", "page");
+        } else {
+          button.removeAttribute("aria-current");
+        }
+      });
+    }
+
+    function focusLessonContent() {
       container.focus({ preventScroll: true });
+      container.scrollTop = 0;
+      var heading = container.querySelector(".lesson-title");
+      if (heading) {
+        heading.setAttribute("tabindex", "-1");
+        heading.focus({ preventScroll: true });
+        heading.removeAttribute("tabindex");
+      }
+    }
+
+    function renderLesson(lessonId, options) {
+      if (!lessonId) return;
+      if (!options || !options.force) {
+        if (state.currentLessonId === lessonId) return;
+      }
+
+      var template = getTemplateForLesson(lessonId);
+      if (!template || !("content" in template)) {
+        console.warn("Missing template for lesson", lessonId);
+        return;
+      }
+
+      pauseActiveMedia();
+      container.innerHTML = "";
+      container.appendChild(template.content.cloneNode(true));
+
+      state.currentLessonId = lessonId;
+      setActiveButton(lessonId);
+      focusLessonContent();
+      updateHash(lessonId, options && options.fromHash);
     }
 
-    // Placeholder for richer navigation that will be implemented later.
-    if (typeof console !== "undefined" && console.info) {
-      console.info("Thinkific offline viewer ready. Interactive navigation will be wired up later.");
+    function handleLessonClick(event) {
+      var lessonId = event.currentTarget.dataset.lessonId;
+      renderLesson(lessonId);
     }
+
+    function handleLessonKeydown(event) {
+      var key = event.key;
+      var currentIndex = lessonButtons.indexOf(event.currentTarget);
+      if (key === "ArrowDown") {
+        event.preventDefault();
+        var next = lessonButtons[currentIndex + 1] || lessonButtons[0];
+        next.focus();
+        return;
+      }
+      if (key === "ArrowUp") {
+        event.preventDefault();
+        var prev =
+          lessonButtons[currentIndex - 1] ||
+          lessonButtons[lessonButtons.length - 1];
+        prev.focus();
+        return;
+      }
+      if (key === "Home") {
+        event.preventDefault();
+        lessonButtons[0].focus();
+        return;
+      }
+      if (key === "End") {
+        event.preventDefault();
+        lessonButtons[lessonButtons.length - 1].focus();
+        return;
+      }
+      if (key === " " || key === "Enter") {
+        event.preventDefault();
+        var lessonId = event.currentTarget.dataset.lessonId;
+        renderLesson(lessonId);
+      }
+    }
+
+    function bindEvents() {
+      lessonButtons.forEach(function (button) {
+        button.addEventListener("click", handleLessonClick);
+        button.addEventListener("keydown", handleLessonKeydown);
+      });
+
+      window.addEventListener("hashchange", function () {
+        var lessonId = getHashLessonId(window.location.hash);
+        if (lessonId) {
+          renderLesson(lessonId, { fromHash: true, force: true });
+        }
+      });
+    }
+
+    function initInitialLesson() {
+      var hashLessonId = getHashLessonId(window.location.hash);
+      var activeButton = lessonButtons[0];
+      for (var i = 0; i < lessonButtons.length; i += 1) {
+        if (lessonButtons[i].classList.contains("is-active")) {
+          activeButton = lessonButtons[i];
+          break;
+        }
+      }
+      var initialId = hashLessonId || (activeButton && activeButton.dataset.lessonId);
+      renderLesson(initialId, { fromHash: true, force: true });
+    }
+
+    bindEvents();
+    initInitialLesson();
   }
 
   onReady(initViewer);

From 4717945ecee452c540c6b78bb56319683cb7d356 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Wed, 15 Oct 2025 10:08:49 -0400
Subject: [PATCH 17/19] Inline caption tracks to support file:// playback

---
 thinkific_downloader/site_generator.py | 64 ++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/thinkific_downloader/site_generator.py b/thinkific_downloader/site_generator.py
index b0b5dc9..5cbcffd 100644
--- a/thinkific_downloader/site_generator.py
+++ b/thinkific_downloader/site_generator.py
@@ -22,6 +22,7 @@
 from string import Template
 from typing import Dict, Iterable, List, Optional, Sequence, Tuple
 from urllib.parse import quote
+import base64
 
 from .file_utils import filter_filename
 
@@ -455,10 +456,12 @@ def _render_lesson_body(lesson: Lesson, output_dir: Path) -> str:
             video_sources.append(f'<source src="{rel_url}" type="video/mp4">')
 
         caption_tracks = []
-        for caption in lesson.assets.captions:
+        for idx, caption in enumerate(lesson.assets.captions):
             srclang, label = _guess_caption_language(caption)
+            default_attr = " default" if idx == 0 else ""
+            caption_src = _build_caption_data_uri(caption)
             caption_tracks.append(
-                f'<track src="{_relative_url(caption, output_dir)}" kind="subtitles" srclang="{srclang}" label="{label}">'
+                f'<track src="{caption_src}" kind="subtitles" srclang="{srclang}" label="{label}"{default_attr}>'
             )
 
         return (
@@ -649,10 +652,65 @@ def _guess_caption_language(path: Path) -> Tuple[str, str]:
     else:
         lang = "en"
     lang = lang.lower()
-    label = lang.upper()
+
+    canonical = _map_language_code(lang)
+    label = canonical.upper()
+    lang = canonical
+
     return lang, label
 
 
+def _map_language_code(lang: str) -> str:
+    """Map common language fragments to two-letter ISO codes."""
+    language_map = {
+        "eng": "en",
+        "english": "en",
+        "en-us": "en",
+        "en-gb": "en",
+        "es": "es",
+        "spa": "es",
+        "spanish": "es",
+        "fr": "fr",
+        "fre": "fr",
+        "fra": "fr",
+        "french": "fr",
+        "de": "de",
+        "ger": "de",
+        "deu": "de",
+        "german": "de",
+        "it": "it",
+        "ita": "it",
+        "italian": "it",
+        "pt": "pt",
+        "por": "pt",
+        "pt-br": "pt",
+        "pt-pt": "pt",
+        "portuguese": "pt",
+        "ru": "ru",
+        "rus": "ru",
+        "russian": "ru",
+        "zh": "zh",
+        "chi": "zh",
+        "zho": "zh",
+        "chinese": "zh",
+    }
+
+    if lang in language_map:
+        return language_map[lang]
+    if len(lang) > 2:
+        return lang[:2]
+    if not lang:
+        return "en"
+    return lang
+
+
+def _build_caption_data_uri(path: Path) -> str:
+    """Embed caption file content into a data URI to avoid file:// origin issues."""
+    data = path.read_bytes()
+    encoded = base64.b64encode(data).decode("ascii")
+    return f"data:text/vtt;base64,{encoded}"
+
+
 def _format_duration(seconds: int | float) -> str:
     """Render a human-friendly duration string."""
     total_seconds = int(float(seconds))

From bc84f575a530992d061e03557f2223ca2d6552b6 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Wed, 15 Oct 2025 10:12:07 -0400
Subject: [PATCH 18/19] Expose offline site generator via CLI

---
 thinkific_downloader/__main__.py       | 119 ++++++++++++++++++++++++-
 thinkific_downloader/site_generator.py |   2 +-
 2 files changed, 117 insertions(+), 4 deletions(-)

diff --git a/thinkific_downloader/__main__.py b/thinkific_downloader/__main__.py
index ea48286..5d0a6d3 100644
--- a/thinkific_downloader/__main__.py
+++ b/thinkific_downloader/__main__.py
@@ -1,10 +1,123 @@
 #!/usr/bin/env python3
 """
-Command line entry point for Thinkific Downloader
+Command line entry point for Thinkific Downloader and offline site generator.
+
+Usage examples:
+  python -m thinkific_downloader <course_url>
+  python -m thinkific_downloader --json beginner-course.json
+  python -m thinkific_downloader generate-site beginner-course.json --clean
 """
 
+from __future__ import annotations
+
+import argparse
 import sys
-from thinkific_downloader.downloader import main
+from pathlib import Path
+from typing import List, Optional
+
+from thinkific_downloader.downloader import main as downloader_main
+from thinkific_downloader.site_generator import (
+    SiteGenerationError,
+    generate_site,
+    load_course,
+)
+
+# Note: keep console output lightweight so it mirrors existing downloader UX.
+
+
+def _run_generate_site(argv: List[str]) -> int:
+    parser = argparse.ArgumentParser(
+        prog="thinkific_downloader generate-site",
+        description="Validate downloaded Thinkific course assets and build an offline viewer.",
+    )
+    parser.add_argument(
+        "metadata",
+        help="Path to the course metadata JSON file (e.g., beginner-chess-mastery.json).",
+    )
+    parser.add_argument(
+        "--downloads-dir",
+        dest="downloads_dir",
+        help="Override the downloads root directory (defaults to <metadata>/../downloads).",
+    )
+    parser.add_argument(
+        "--output-dir",
+        dest="output_dir",
+        help="Directory to write the generated site (defaults to downloads/<course-slug>/).",
+    )
+    parser.add_argument(
+        "--assets-dirname",
+        dest="assets_dirname",
+        default="site-assets",
+        help="Subdirectory name for bundled CSS/JS assets (default: site-assets).",
+    )
+    parser.add_argument(
+        "--clean",
+        action="store_true",
+        help="Remove previously generated site files before rendering.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate metadata and assets without writing any files.",
+    )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Suppress success output; errors will still be printed.",
+    )
+
+    args = parser.parse_args(argv)
+
+    metadata_path = Path(args.metadata).expanduser()
+    downloads_dir: Optional[Path] = None
+    output_dir: Optional[Path] = None
+
+    if args.downloads_dir:
+        downloads_dir = Path(args.downloads_dir).expanduser()
+    if args.output_dir:
+        output_dir = Path(args.output_dir).expanduser()
+
+    try:
+        if args.dry_run:
+            load_course(metadata_path, downloads_root=downloads_dir)
+            if not args.quiet:
+                print("✅ Course assets validated (dry run).")
+            return 0
+
+        generated_index = generate_site(
+            metadata_path,
+            downloads_root=downloads_dir,
+            output_dir=output_dir,
+            clean=args.clean,
+            assets_dirname=args.assets_dirname,
+        )
+        if not args.quiet:
+            print(f"✅ Offline course generated: {generated_index}")
+        return 0
+
+    except SiteGenerationError as exc:
+        print("✖ Site generation failed:")
+        for error in exc.errors:
+            print(f"  - {error}")
+        return 1
+    except FileNotFoundError as exc:
+        print(f"✖ {exc}")
+        return 1
+    except Exception as exc:  # pragma: no cover - unexpected edge cases
+        print(f"✖ Unexpected error: {exc}")
+        return 1
+
+
+def main(argv: Optional[List[str]] = None) -> None:
+    argv = argv or sys.argv
+    if len(argv) > 1 and argv[1] in {"generate-site", "generate_site"}:
+        exit_code = _run_generate_site(argv[2:])
+        sys.exit(exit_code)
+
+    # Fallback to the legacy downloader behaviour.
+    downloader_main(argv)
+
 
 if __name__ == "__main__":
-    main(sys.argv)
\ No newline at end of file
+    main(sys.argv)
diff --git a/thinkific_downloader/site_generator.py b/thinkific_downloader/site_generator.py
index 5cbcffd..89cb5b9 100644
--- a/thinkific_downloader/site_generator.py
+++ b/thinkific_downloader/site_generator.py
@@ -12,6 +12,7 @@
 
 from __future__ import annotations
 
+import base64
 import json
 import re
 import shutil
@@ -22,7 +23,6 @@
 from string import Template
 from typing import Dict, Iterable, List, Optional, Sequence, Tuple
 from urllib.parse import quote
-import base64
 
 from .file_utils import filter_filename
 

From 4dcf7a878c7d86bb0a53c517327e1eca74971ce0 Mon Sep 17 00:00:00 2001
From: Oleksiy Kovyrin <oleksiy@kovyrin.net>
Date: Wed, 15 Oct 2025 10:21:56 -0400
Subject: [PATCH 19/19] Remove superseded PRD and task checklist

---
 docs/tasks/prd-local-course-viewer.md       | 85 ---------------------
 docs/tasks/tasks-prd-local-course-viewer.md | 64 ----------------
 2 files changed, 149 deletions(-)
 delete mode 100644 docs/tasks/prd-local-course-viewer.md
 delete mode 100644 docs/tasks/tasks-prd-local-course-viewer.md

diff --git a/docs/tasks/prd-local-course-viewer.md b/docs/tasks/prd-local-course-viewer.md
deleted file mode 100644
index 35173c8..0000000
--- a/docs/tasks/prd-local-course-viewer.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Local Course Viewer PRD
-
-## 1. Introduction / Overview
-Create a Python-based generator that turns a downloaded Thinkific course into a self-contained static website for offline consumption. The script should read the provided course metadata JSON (e.g., `beginner-chess-mastery.json`) and the corresponding assets already stored under `downloads/<course-slug>/`, validate that everything needed is present, and produce an easy-to-navigate two-pane interface. The generated site must work when opened directly from the filesystem (no server) and allow a learner to browse chapters, play videos, and read text lessons completely offline.
-
-## 2. Goals
-- Provide a one-command workflow that accepts a Thinkific course metadata JSON file and emits an offline-ready static site in the matching `downloads/<course-slug>/` directory.
-- Mirror the course hierarchy (chapters → lessons) in a left-hand navigation tree with quick access to each lesson.
-- Render lesson content appropriately in the main pane: embedded video playback (with captions) for video lessons, and readable formatted text for HTML lessons.
-- Package all required assets (CSS, JS, fonts) locally so the experience works without network access.
-
-## 3. User Stories
-1. **As a learner traveling without reliable internet**, I want to open `downloads/<course>/index.html` and continue the course offline, so I can make use of the content anywhere.
-2. **As a downloader maintainer**, I want the generator to fail fast if lesson assets are missing, so I can fix gaps before distributing the course dump.
-3. **As a learner**, I want to jump between lessons quickly using a chapter tree, so I can find specific topics without scrolling through a long page.
-4. **As a learner**, I want video lessons to include captions when available, so I can follow along in noisy environments.
-5. **As a learner**, I want links to attachments (e.g., PDFs) surfaced with each lesson, so I can access supporting materials.
-
-## 4. Functional Requirements
-1. **CLI entrypoint**  
-   - Provide a Python command (e.g., `python -m thinkific_downloader.generate_site <metadata.json>`) that accepts at minimum: path to the metadata JSON, optional `--downloads-dir` override (default `downloads/`), and optional `--output-subdir` name (default the course slug).
-2. **Metadata ingestion and validation**  
-   - Parse the JSON and confirm required keys exist (`course.slug`, `chapters`, `contents`).  
-   - Build an in-memory course model linking chapters to lesson content via IDs.  
-   - Emit actionable errors when the JSON structure is unexpected.
-3. **Asset validation**  
-   - Locate the base course folder at `downloads/<course-slug>/` (configurable via CLI).  
-   - For each lesson, verify the expected asset directory exists (matching lesson slug or already-downloaded folder naming).  
-   - Confirm that required primary assets exist: `.mp4` for videos, `.html` for text lessons, plus optional assets (`.vtt`, PDFs, images).  
-   - Surface a consolidated report of missing assets before generation.
-4. **Output structure**  
-   - Generate a static site rooted at `downloads/<course-slug>/index.html`.  
-   - Place shared assets under a subfolder (e.g., `downloads/<course-slug>/site-assets/`) containing CSS, JS, icons, and fonts (if any).  
-   - Preserve or reuse existing lesson folders; do not modify original media files.
-5. **Navigation UI**  
-   - Render an always-expanded chapter list in the left sidebar reflecting chapter order (`position`) without collapse/expand controls.  
-   - List lessons within each chapter in order, distinguishing lesson types (video vs text) with an icon or label.  
-   - Highlight the currently selected lesson and keep the selection in sync when switching content.
-6. **Lesson rendering**  
-   - For video lessons, embed the local `.mp4` via `<video controls>` and attach `<track>` elements for available `.vtt` caption files when the browser supports them; if caption injection fails, continue without blocking playback.  
-   - Display lesson metadata such as title, duration (when provided by JSON), and any description or summary from the metadata.  
-   - For text lessons, inline the HTML content into the page at build time (e.g., inject sanitized markup into a template or embed via `<template>` tags) so it can render without runtime network/file fetches.  
-   - Detect and list downloadable attachments (e.g., `.pdf`, `.zip`, `.png`) beneath the main content with relative links.
-7. **Client-side behavior**  
-   - Implement navigation without full page reloads (SPA feel) using vanilla JS so switching lessons updates the main pane dynamically.  
-   - Ensure initialization logic selects the first lesson by default and updates the URL hash (optional) for deep linking when feasible offline.  
-   - Handle malformed content gracefully (show fallback message if media fails to load).
-8. **Styling and layout**  
-   - Deliver a responsive layout (desktop optimized, mobile acceptable) using locally bundled CSS. Tailwind utility classes may be mimicked via a pre-generated static CSS file, but no CDN links or build steps at runtime.  
-   - Provide a dark-on-light theme with sufficient contrast, clear typography, and distinct section headers.
-9. **Accessibility**  
-   - Ensure keyboard navigation can move between sidebar items and activate lessons.  
-   - Include labels for screen readers on navigation controls and video players.
-10. **Build idempotency**  
-    - Regeneration should overwrite previously generated site assets deterministically without duplicating content or leaving stale files.  
-    - Provide a `--clean` flag to remove prior generated files before rebuild if necessary.
-
-## 5. Non-Goals (Out of Scope)
-- Tracking learner progress, bookmarking, or syncing state across sessions.
-- Hosting or serving the site via a backend server or adding authentication.  
-- Building or bundling third-party tooling beyond Python standard libraries (no Node/Tailwind build pipeline).  
-- Streaming remote media; all content must remain local.
-
-## 6. Design Considerations
-- Keep the HTML/CSS/JS footprint small; consider hand-crafted CSS or a precompiled utility stylesheet shipped with the generator to approximate Tailwind ergonomics offline.  
-- Use semantic HTML to ensure screen readers work as expected.  
-- Anticipate long chapter/lesson names and ensure they truncate or wrap gracefully in the sidebar.  
-- Consider providing optional keyboard shortcuts for previous/next lesson navigation to improve usability.
-
-## 7. Technical Considerations
-- Leverage existing project structure (`thinkific_downloader` package) for command wiring if possible.  
-- Use Python templating (e.g., `jinja2`) only if already available in dependencies; otherwise, rely on standard-library templating (`string.Template`) or manual composition.  
-- Handle file paths with `pathlib` to simplify cross-platform compatibility.  
-- Guard against browser security limitations when opening `file://` URLs by embedding lesson content directly or via data attributes instead of runtime `fetch` calls.  
-- Ensure generated HTML references assets using relative paths (no absolute `/` paths).  
-- Provide unit coverage for JSON parsing and asset discovery logic, and add an integration smoke test that generates the sample course site into a temp directory.
-
-## 8. Success Metrics
-- Opening `downloads/<course-slug>/index.html` in a modern browser renders the full navigation tree within 2 seconds on the sample course.  
-- 100% of lessons in `beginner-chess-mastery` are reachable and display the correct content type offline.  
-- Generation script returns a non-zero exit code when any required lesson asset is missing.  
-- Manual QA confirms video playback with captions works for at least one lesson that ships `.vtt` files.
-
-## 9. Open Questions
-- None at this time.
diff --git a/docs/tasks/tasks-prd-local-course-viewer.md b/docs/tasks/tasks-prd-local-course-viewer.md
deleted file mode 100644
index bfb01f4..0000000
--- a/docs/tasks/tasks-prd-local-course-viewer.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Local Course Viewer PRD – MVP Task List
-Offline viewer generator that converts downloaded Thinkific courses into a local two-pane site. Plan derived from `docs/tasks/prd-local-course-viewer.md`.
-
-## Relevant Files
-
-- `thinkific_downloader/site_generator.py` - New module to parse course metadata, validate assets, and emit static HTML/CSS/JS.
-- `thinkific_downloader/templates/base.html` - Template for the main layout (sidebar + main pane).
-- `thinkific_downloader/templates/lesson.html` - Template partial used to render individual lesson payloads.
-- `thinkific_downloader/static/viewer.css` - Bundled stylesheet for offline-friendly styling.
-- `thinkific_downloader/static/viewer.js` - Client-side behavior for SPA-style navigation.
-- `thinkific_downloader/__main__.py` - CLI entry point; extend to expose the site generation command.
-- `thinkific_downloader/config.py` - Reuse settings utilities; ensure downloads path configuration hooks in here if needed.
-- `README.md` - Document usage instructions for generating the offline site.
-
-### Notes
-
-- Keep static assets (CSS/JS) referenced with relative URLs so `file://` browsing works.
-- Manual QA (spot-check of video playback and text rendering) is sufficient; automated tests are not required for this pass.
-
-## Tasks
-
-- [ ] 1. Implement course metadata parsing and validation for offline site generation.
-
-- [ ] 1.1 Load the course JSON, map chapters to lessons, and preserve the ordering from Thinkific metadata.
-
-- [ ] 1.2 Model lessons (video vs text) and attachments so downstream renderers know which assets to expect.
-
-- [ ] 1.3 Verify every lesson folder exists under `downloads/<course-slug>/` and report missing media or HTML files before rendering.
-
-- [ ] 1.4 Surface lesson metadata (titles, durations, descriptions) for template consumption.
-
-- [ ] 2. Build static site generation templates, asset pipeline, and lesson rendering logic.
-
-- [ ] 2.1 Create base layout and lesson partial templates for the two-pane interface.
-
-- [ ] 2.2 Produce `viewer.css` with offline-friendly styling (handcrafted Tailwind-like utilities or custom rules).
-
-- [ ] 2.3 Render video lessons with `<video controls>` pointing to local `.mp4` files and attach `<track>` captions when `.vtt` exists.
-
-- [ ] 2.4 Inline HTML/text lesson content safely and list any downloadable attachments (PDFs, etc.) with relative links.
-
-- [ ] 2.5 Write idempotent file output routines that place assets under `downloads/<course-slug>/` and optionally clear prior builds when `--clean` is passed.
-
-- [ ] 3. Add client-side navigation behavior and accessibility polish for the generated site.
-
-- [ ] 3.1 Implement `viewer.js` to swap lessons in the main pane without page reloads, updating the active state in the sidebar.
-
-- [ ] 3.2 Default to the first lesson on load, and optionally sync selection with `location.hash` for deep linking.
-
-- [ ] 3.3 Ensure video playback resets or pauses when switching lessons to avoid overlapping audio.
-
-- [ ] 3.4 Provide keyboard navigation and ARIA labeling for sidebar items and focus management around the video element.
-
-- [ ] 3.5 Display graceful fallback messaging if a media element fails to load.
-
-- [ ] 4. Wire the generator into a CLI command with configuration options and regeneration handling.
-
-- [ ] 4.1 Add a `generate-site` CLI entry point (e.g., `python -m thinkific_downloader generate-site <metadata.json>`).
-
-- [ ] 4.2 Support flags for downloads root override, output subdirectory selection, dry-run validation, and `--clean`.
-
-- [ ] 4.3 Align CLI logging with existing downloader tone (progress banners, validation warnings, success summary).
-
-- [ ] 4.4 Exit with non-zero status when validation fails or generation encounters missing assets.