diff --git a/pyproject.toml b/pyproject.toml index e1eaed6..43576f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "jinja2", "markdown", "questionary", + "watchdog", ] [project.urls] diff --git a/src/claude_code_transcripts/__init__.py b/src/claude_code_transcripts/__init__.py index f2246a2..16e8b21 100644 --- a/src/claude_code_transcripts/__init__.py +++ b/src/claude_code_transcripts/__init__.py @@ -8,6 +8,7 @@ import shutil import subprocess import tempfile +import time import webbrowser from datetime import datetime from pathlib import Path @@ -18,6 +19,8 @@ from jinja2 import Environment, PackageLoader import markdown import questionary +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler # Set up Jinja2 environment _jinja_env = Environment( @@ -380,6 +383,106 @@ def generate_batch_html( } +def generate_incremental_html( + source_folder, + output_dir, + changed_files, + include_agents=False, + progress_callback=None, +): + """Incrementally regenerate HTML for only the changed session files. + + Only regenerates the sessions corresponding to the changed files, + updates the affected project indexes, and updates the master index. + + Args: + source_folder: Path to the Claude projects folder + output_dir: Path for output archive + changed_files: Set of Path objects for files that have changed + include_agents: Whether to include agent-* session files + progress_callback: Optional callback(project_name, session_name, current, total) + + Returns statistics dict with sessions_regenerated, failed_sessions, projects_updated. + """ + source_folder = Path(source_folder) + output_dir = Path(output_dir) + + # Filter to only JSONL files that exist + jsonl_files = {f for f in changed_files if f.suffix == ".jsonl" and f.exists()} + + # Skip agent files unless requested + if not include_agents: + jsonl_files = {f for f in jsonl_files if not f.name.startswith("agent-")} + + # Track which projects need their index updated + affected_projects = set() + sessions_regenerated = 0 + failed_sessions = [] + + total_count = len(jsonl_files) + processed_count = 0 + + for session_file in jsonl_files: + # Get summary and skip boring sessions + summary = get_session_summary(session_file) + if summary.lower() == "warmup" or summary == "(no summary)": + continue + + # Determine project info + project_folder = session_file.parent + project_key = project_folder.name + project_name = get_project_display_name(project_key) + + # Track this project as needing index update + affected_projects.add(project_key) + + # Generate session HTML + session_name = session_file.stem + project_dir = output_dir / project_name + project_dir.mkdir(exist_ok=True) + session_dir = project_dir / session_name + + try: + generate_html(session_file, session_dir) + sessions_regenerated += 1 + except Exception as e: + failed_sessions.append( + { + "project": project_name, + "session": session_name, + "error": str(e), + } + ) + + processed_count += 1 + if progress_callback: + progress_callback(project_name, session_name, processed_count, total_count) + + # Rebuild project indexes for affected projects only + # We need the full project data to generate the index + all_projects = find_all_sessions(source_folder, include_agents=include_agents) + projects_by_key = {} + for project in all_projects: + # Match by the original folder name + project_folder_name = project["path"].name + projects_by_key[project_folder_name] = project + + for project_key in affected_projects: + if project_key in projects_by_key: + project = projects_by_key[project_key] + project_dir = output_dir / project["name"] + _generate_project_index(project, project_dir) + + # Always regenerate master index (session counts might have changed) + _generate_master_index(all_projects, output_dir) + + return { + "sessions_regenerated": sessions_regenerated, + "failed_sessions": failed_sessions, + "projects_updated": len(affected_projects), + } + + def _generate_project_index(project, output_dir): """Generate index.html for a single project.""" template = get_template("project_index.html") @@ -505,6 +608,59 @@ class CredentialsError(Exception): pass +class TranscriptWatcher(FileSystemEventHandler): + """File system event handler for watching transcript source directory.""" + + def __init__(self, output_dir, debounce_seconds, quiet=False): + super().__init__() + self.output_dir = output_dir + self.debounce_seconds = debounce_seconds + self.quiet = quiet + self.last_trigger_time = 0 + self.pending_update = False + self.changed_files = set() + self.generation_callback = None + + def should_process_event(self, event): + """Check if we should process this file system event.""" + if event.is_directory: + return False + + event_path = Path(event.src_path) + try: + if self.output_dir in event_path.parents or event_path == self.output_dir: + return False + except (ValueError, AttributeError): + pass + + ignore_patterns = [".tmp", ".swp", "~", ".DS_Store", "__pycache__"] + if any(pattern in event_path.name for pattern in ignore_patterns): + return False + + return True + + def on_any_event(self, event): + """Handle file system events.""" + if not self.should_process_event(event): + return + self.pending_update = True + self.changed_files.add(Path(event.src_path)) + self.last_trigger_time = time.time() + + def check_and_update(self): + """Check if enough time has passed and trigger regeneration if needed.""" + if not self.pending_update: + return + + time_since_last_trigger = time.time() - self.last_trigger_time + if time_since_last_trigger >= self.debounce_seconds: + self.pending_update = False + changed = self.changed_files.copy() + self.changed_files.clear() + if self.generation_callback: + self.generation_callback(changed) + + def get_access_token_from_keychain(): """Get access token from macOS keychain. @@ -2030,7 +2186,20 @@ def web_cmd( is_flag=True, help="Suppress all output except errors.", ) -def all_cmd(source, output, include_agents, dry_run, open_browser, quiet): +@click.option( + "--watch", + is_flag=True, + help="Watch for changes and regenerate automatically.", +) +@click.option( + "--debounce", + type=int, + default=60, + help="Seconds to wait after last change before regenerating (default: 60).", +) +def all_cmd( + source, output, include_agents, dry_run, open_browser, quiet, watch, debounce +): """Convert all local Claude Code sessions to a browsable HTML archive. Creates a directory structure with: @@ -2049,6 +2218,63 @@ def all_cmd(source, output, include_agents, dry_run, open_browser, quiet): output = Path(output) + if watch: + _run_watch_mode(source, output, include_agents, open_browser, quiet, debounce) + else: + _run_all_generation( + source, output, include_agents, dry_run, open_browser, quiet + ) + + +def _run_watch_mode(source, output, include_agents, open_browser, quiet, debounce): + """Watch source directory and regenerate transcripts on changes.""" + + def run_incremental_generation(changed_files): + start_time = time.time() + if not quiet: + click.echo(f"\nRegenerating {len(changed_files)} changed file(s)...") + + stats = generate_incremental_html( + source, output, changed_files, include_agents=include_agents + ) + + duration = time.time() - start_time + if not quiet: + click.echo( + f"Regenerated {stats['sessions_regenerated']} session(s) ({duration:.1f}s)" + ) + + # Initial full generation + if not quiet: + click.echo(f"Generating initial archive...") + _run_all_generation(source, output, include_agents, False, False, quiet) + + if open_browser: + index_url = (output / "index.html").resolve().as_uri() + webbrowser.open(index_url) + + if not quiet: + click.echo(f"\nWatching {source} for changes...") + click.echo("Press Ctrl+C to stop.") + + observer = Observer() + handler = TranscriptWatcher(output, debounce, quiet) + handler.generation_callback = run_incremental_generation + observer.schedule(handler, str(source), recursive=True) + observer.start() + + try: + while True: + time.sleep(1) + handler.check_and_update() + except KeyboardInterrupt: + observer.stop() + + observer.join() + + +def _run_all_generation(source, output, include_agents, dry_run, open_browser, quiet): + """Run the all-sessions generation logic.""" if not quiet: click.echo(f"Scanning {source}...") diff --git a/tests/test_watch.py b/tests/test_watch.py new file mode 100644 index 0000000..00046f9 --- /dev/null +++ b/tests/test_watch.py @@ -0,0 +1,301 @@ +"""Tests for watch mode functionality.""" + +import tempfile +import time +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from claude_code_transcripts import TranscriptWatcher, generate_batch_html + + +@pytest.fixture +def mock_projects_dir(): + """Create a mock ~/.claude/projects structure with test sessions.""" + with tempfile.TemporaryDirectory() as tmpdir: + projects_dir = Path(tmpdir) + + # Create project-a with 2 sessions + project_a = projects_dir / "-home-user-projects-project-a" + project_a.mkdir(parents=True) + + session_a1 = project_a / "abc123.jsonl" + session_a1.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Hello from project A"}}\n' + '{"type": "assistant", "timestamp": "2025-01-01T10:00:05.000Z", "message": {"role": "assistant", "content": [{"type": "text", "text": "Hi there!"}]}}\n' + ) + + session_a2 = project_a / "def456.jsonl" + session_a2.write_text( + '{"type": "user", "timestamp": "2025-01-02T10:00:00.000Z", "message": {"role": "user", "content": "Second session in project A"}}\n' + '{"type": "assistant", "timestamp": "2025-01-02T10:00:05.000Z", "message": {"role": "assistant", "content": [{"type": "text", "text": "Got it!"}]}}\n' + ) + + # Create project-b with 1 session + project_b = projects_dir / "-home-user-projects-project-b" + project_b.mkdir(parents=True) + + session_b1 = project_b / "ghi789.jsonl" + session_b1.write_text( + '{"type": "user", "timestamp": "2025-01-04T10:00:00.000Z", "message": {"role": "user", "content": "Hello from project B"}}\n' + '{"type": "assistant", "timestamp": "2025-01-04T10:00:05.000Z", "message": {"role": "assistant", "content": [{"type": "text", "text": "Welcome!"}]}}\n' + ) + + yield projects_dir + + +@pytest.fixture +def output_dir(): + """Create a temporary output directory.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + +class TestTranscriptWatcher: + """Tests for TranscriptWatcher class.""" + + def test_tracks_changed_files(self, output_dir): + """Test that TranscriptWatcher tracks which files have changed.""" + watcher = TranscriptWatcher(output_dir, debounce_seconds=0.1) + + # Simulate a file change event + mock_event = MagicMock() + mock_event.is_directory = False + mock_event.src_path = "/home/user/.claude/projects/test/session1.jsonl" + + watcher.on_any_event(mock_event) + + # Should have tracked the changed file + assert hasattr(watcher, "changed_files") + assert len(watcher.changed_files) > 0 + assert Path(mock_event.src_path) in watcher.changed_files + + def test_tracks_multiple_changed_files(self, output_dir): + """Test that multiple file changes are tracked.""" + watcher = TranscriptWatcher(output_dir, debounce_seconds=0.1) + + # Simulate multiple file change events + paths = [ + "/home/user/.claude/projects/test/session1.jsonl", + "/home/user/.claude/projects/test/session2.jsonl", + "/home/user/.claude/projects/other/session3.jsonl", + ] + + for path in paths: + mock_event = MagicMock() + mock_event.is_directory = False + mock_event.src_path = path + watcher.on_any_event(mock_event) + + assert len(watcher.changed_files) == 3 + + def test_clears_changed_files_after_callback(self, output_dir): + """Test that changed files are cleared after callback is invoked.""" + watcher = TranscriptWatcher(output_dir, debounce_seconds=0) + callback_args = [] + + def callback(changed_paths): + callback_args.append(changed_paths.copy()) + + watcher.generation_callback = callback + + # Simulate a file change + mock_event = MagicMock() + mock_event.is_directory = False + mock_event.src_path = "/home/user/.claude/projects/test/session1.jsonl" + + watcher.on_any_event(mock_event) + watcher.check_and_update() + + # Callback should have received the changed files + assert len(callback_args) == 1 + assert len(callback_args[0]) == 1 + + # Changed files should be cleared + assert len(watcher.changed_files) == 0 + + def test_passes_changed_files_to_callback(self, output_dir): + """Test that callback receives set of changed files.""" + watcher = TranscriptWatcher(output_dir, debounce_seconds=0) + received_changes = [] + + def callback(changed_paths): + received_changes.extend(changed_paths) + + watcher.generation_callback = callback + + # Simulate file changes + mock_event = MagicMock() + mock_event.is_directory = False + mock_event.src_path = "/home/user/.claude/projects/test/session1.jsonl" + + watcher.on_any_event(mock_event) + watcher.check_and_update() + + assert len(received_changes) == 1 + assert ( + Path("/home/user/.claude/projects/test/session1.jsonl") in received_changes + ) + + +class TestIncrementalGeneration: + """Tests for incremental HTML generation.""" + + def test_incremental_update_only_regenerates_changed_session( + self, mock_projects_dir, output_dir + ): + """Test that incremental update only regenerates the changed session.""" + # Do initial full generation + generate_batch_html(mock_projects_dir, output_dir) + + # Record initial modification times + session_a1_html = output_dir / "project-a" / "abc123" / "index.html" + session_a2_html = output_dir / "project-a" / "def456" / "index.html" + session_b_html = output_dir / "project-b" / "ghi789" / "index.html" + + initial_a1_mtime = session_a1_html.stat().st_mtime + initial_a2_mtime = session_a2_html.stat().st_mtime + initial_b_mtime = session_b_html.stat().st_mtime + + # Wait a bit to ensure mtime differences are detectable + time.sleep(0.1) + + # Import the incremental update function + from claude_code_transcripts import generate_incremental_html + + # Simulate only session a1 changing + changed_files = { + mock_projects_dir / "-home-user-projects-project-a" / "abc123.jsonl" + } + + # Do incremental update + stats = generate_incremental_html( + mock_projects_dir, output_dir, changed_files, include_agents=False + ) + + # Only the changed session should have been regenerated + assert stats["sessions_regenerated"] == 1 + + # session_a1 should have new mtime + assert session_a1_html.stat().st_mtime > initial_a1_mtime + + # session_a2 and session_b should have same mtime (not regenerated) + assert session_a2_html.stat().st_mtime == initial_a2_mtime + assert session_b_html.stat().st_mtime == initial_b_mtime + + def test_incremental_update_updates_affected_project_index( + self, mock_projects_dir, output_dir + ): + """Test that project index is updated when a session in it changes.""" + # Do initial full generation + generate_batch_html(mock_projects_dir, output_dir) + + project_a_index = output_dir / "project-a" / "index.html" + project_b_index = output_dir / "project-b" / "index.html" + + initial_a_index_mtime = project_a_index.stat().st_mtime + initial_b_index_mtime = project_b_index.stat().st_mtime + + time.sleep(0.1) + + from claude_code_transcripts import generate_incremental_html + + # Simulate session in project-a changing + changed_files = { + mock_projects_dir / "-home-user-projects-project-a" / "abc123.jsonl" + } + + generate_incremental_html( + mock_projects_dir, output_dir, changed_files, include_agents=False + ) + + # project-a index should be updated + assert project_a_index.stat().st_mtime > initial_a_index_mtime + + # project-b index should NOT be updated + assert project_b_index.stat().st_mtime == initial_b_index_mtime + + def test_incremental_update_updates_master_index( + self, mock_projects_dir, output_dir + ): + """Test that master index is updated on incremental changes.""" + # Do initial full generation + generate_batch_html(mock_projects_dir, output_dir) + + master_index = output_dir / "index.html" + initial_master_mtime = master_index.stat().st_mtime + + time.sleep(0.1) + + from claude_code_transcripts import generate_incremental_html + + changed_files = { + mock_projects_dir / "-home-user-projects-project-a" / "abc123.jsonl" + } + + generate_incremental_html( + mock_projects_dir, output_dir, changed_files, include_agents=False + ) + + # Master index should be updated + assert master_index.stat().st_mtime > initial_master_mtime + + def test_incremental_update_handles_new_session( + self, mock_projects_dir, output_dir + ): + """Test that incremental update can handle a newly created session.""" + # Do initial full generation + generate_batch_html(mock_projects_dir, output_dir) + + # Create a new session file + new_session = ( + mock_projects_dir / "-home-user-projects-project-a" / "newfile.jsonl" + ) + new_session.write_text( + '{"type": "user", "timestamp": "2025-01-10T10:00:00.000Z", "message": {"role": "user", "content": "New session content"}}\n' + '{"type": "assistant", "timestamp": "2025-01-10T10:00:05.000Z", "message": {"role": "assistant", "content": [{"type": "text", "text": "New response!"}]}}\n' + ) + + from claude_code_transcripts import generate_incremental_html + + changed_files = {new_session} + + stats = generate_incremental_html( + mock_projects_dir, output_dir, changed_files, include_agents=False + ) + + # New session should have been generated + new_session_html = output_dir / "project-a" / "newfile" / "index.html" + assert new_session_html.exists() + assert stats["sessions_regenerated"] == 1 + + def test_incremental_update_handles_multiple_changed_sessions( + self, mock_projects_dir, output_dir + ): + """Test that multiple changed sessions are all regenerated.""" + generate_batch_html(mock_projects_dir, output_dir) + + session_a1_html = output_dir / "project-a" / "abc123" / "index.html" + session_b_html = output_dir / "project-b" / "ghi789" / "index.html" + + initial_a1_mtime = session_a1_html.stat().st_mtime + initial_b_mtime = session_b_html.stat().st_mtime + + time.sleep(0.1) + + from claude_code_transcripts import generate_incremental_html + + # Both sessions changed + changed_files = { + mock_projects_dir / "-home-user-projects-project-a" / "abc123.jsonl", + mock_projects_dir / "-home-user-projects-project-b" / "ghi789.jsonl", + } + + stats = generate_incremental_html( + mock_projects_dir, output_dir, changed_files, include_agents=False + ) + + assert stats["sessions_regenerated"] == 2 + assert session_a1_html.stat().st_mtime > initial_a1_mtime + assert session_b_html.stat().st_mtime > initial_b_mtime