diff --git a/README.md b/README.md index 46effea..209dc7c 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,8 @@ Options: - `--dry-run` - show what would be converted without creating files - `--open` - open the generated archive in your default browser - `-q, --quiet` - suppress all output except errors +- `-m, --merge` - merge with existing archive (preserve orphan sessions) +- `--prefix NAME` - prefix for sessions in index (e.g., machine name) Examples: @@ -192,6 +194,28 @@ claude-code-transcripts all -o ./my-archive claude-code-transcripts all --include-agents ``` +### Merging archives from multiple machines + +Use `-m`/`--merge` to combine sessions from different machines into a single archive: + +```bash +# Machine A: create initial archive +claude-code-transcripts all -o /shared/archive + +# Machine B: merge additional sessions +claude-code-transcripts all -o /shared/archive -m + +# With prefix (shown in index) +claude-code-transcripts all -o /shared/archive --merge --prefix=laptop +``` + +The `--merge` option: +- Regenerates all sessions found in the source directory +- Preserves "orphan" sessions in the archive that are no longer in the source +- Useful for maintaining a unified archive from multiple machines + +The `--prefix` option adds a label to sessions in the index, helping identify which machine they came from. + ## Development To contribute to this tool, first checkout the code. You can run the tests using `uv run`: diff --git a/src/claude_code_transcripts/__init__.py b/src/claude_code_transcripts/__init__.py index 6318120..e9e62bd 100644 --- a/src/claude_code_transcripts/__init__.py +++ b/src/claude_code_transcripts/__init__.py @@ -303,8 +303,179 @@ def find_all_sessions(folder, include_agents=False): return result +def find_existing_sessions(output_dir): + """Find existing sessions in an output archive directory. + + Scans the output directory structure to find already-generated sessions. + Returns a list of project dicts in the same format as find_all_sessions(). + + Args: + output_dir: Path to the archive directory (e.g., ./claude-archive) + + Returns: + List of project dicts: [{"name": str, "path": None, "sessions": [...]}] + Session dicts contain: path (session dir), summary, mtime, size + """ + output_dir = Path(output_dir) + if not output_dir.exists(): + return [] + + # Regex patterns to extract session info from project index HTML + session_pattern = re.compile( + r'\s*' + r'
\s*' + r'([^<]+)\s*' + r"]*>([^<]+)\s*" + r"
\s*" + r'
\s*' + r"]*>([^<]*)", + re.DOTALL, + ) + + projects = [] + + # Iterate over subdirectories (projects) + for project_dir in output_dir.iterdir(): + if not project_dir.is_dir(): + continue + # Skip if it's the master index.html in root + if project_dir.name == "index.html": + continue + + project_index = project_dir / "index.html" + if not project_index.exists(): + continue + + # Parse project index HTML + html_content = project_index.read_text() + sessions = [] + + for match in session_pattern.finditer(html_content): + session_name = match.group(1) + # date = match.group(2) # Not used currently + # size_str = match.group(3) # e.g., "15 KB" + summary = match.group(4).strip() + # Handle truncated summaries (ending with ...) + if summary.endswith("..."): + summary = summary[:-3] + + session_dir = project_dir / session_name + if not session_dir.exists() or not session_dir.is_dir(): + continue + + # Get mtime from session directory + try: + mtime = session_dir.stat().st_mtime + except OSError: + mtime = 0 + + # Calculate size as sum of HTML files in session + size = sum( + f.stat().st_size + for f in session_dir.iterdir() + if f.is_file() and f.suffix == ".html" + ) + + sessions.append( + { + "path": session_dir, + "summary": summary, + "mtime": mtime, + "size": size, + } + ) + + if sessions: + # Sort sessions by mtime (most recent first) + sessions.sort(key=lambda s: s["mtime"], reverse=True) + projects.append( + { + "name": project_dir.name, + "path": None, # No source path for existing sessions + "sessions": sessions, + } + ) + + # Sort projects by most recent session + projects.sort( + key=lambda p: p["sessions"][0]["mtime"] if p["sessions"] else 0, reverse=True + ) + + return projects + + +def merge_sessions(source_sessions, existing_sessions): + """Merge sessions from source with existing archive. + + All source sessions will be (re)generated. + Sessions only in existing archive are preserved in the merged index. + + Args: + source_sessions: Projects from find_all_sessions() (new/updated) + existing_sessions: Projects from find_existing_sessions() + + Returns: + merged_projects: Combined list for index generation (source + orphans) + """ + # Build lookup of existing sessions by (project_name, session_stem) + existing_lookup = {} + for project in existing_sessions: + for session in project["sessions"]: + # Get session stem (filename without extension for source, dir name for existing) + session_stem = ( + session["path"].stem if session["path"].suffix else session["path"].name + ) + existing_lookup[(project["name"], session_stem)] = session + + # Build merged projects dict + merged = {} + + # Add all source sessions (they will be regenerated) + for project in source_sessions: + project_name = project["name"] + if project_name not in merged: + merged[project_name] = { + "name": project_name, + "path": project.get("path"), + "sessions": [], + } + + for session in project["sessions"]: + session_stem = session["path"].stem + # Mark this session as "from source" so we know it's not orphan + existing_lookup.pop((project_name, session_stem), None) + merged[project_name]["sessions"].append(session) + + # Add orphaned sessions (only in existing, not in source) + for (project_name, session_stem), session in existing_lookup.items(): + if project_name not in merged: + merged[project_name] = { + "name": project_name, + "path": None, + "sessions": [], + } + merged[project_name]["sessions"].append(session) + + # Sort sessions within each project by mtime (most recent first) + for project in merged.values(): + project["sessions"].sort(key=lambda s: s["mtime"], reverse=True) + + # Convert to list and sort projects by most recent session + result = list(merged.values()) + result.sort( + key=lambda p: p["sessions"][0]["mtime"] if p["sessions"] else 0, reverse=True + ) + + return result + + def generate_batch_html( - source_folder, output_dir, include_agents=False, progress_callback=None + source_folder, + output_dir, + include_agents=False, + progress_callback=None, + merge=False, + prefix=None, ): """Generate HTML archive for all sessions in a Claude projects folder. @@ -319,6 +490,8 @@ def generate_batch_html( include_agents: Whether to include agent-* session files progress_callback: Optional callback(project_name, session_name, current, total) called after each session is processed + merge: If True, preserve orphan sessions from existing archive in the index + prefix: Optional prefix to display for sessions in the index (e.g., machine name) Returns statistics dict with total_projects, total_sessions, failed_sessions, output_dir. """ @@ -326,17 +499,30 @@ def generate_batch_html( output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) - # Find all sessions - projects = find_all_sessions(source_folder, include_agents=include_agents) + # Find all sessions from source + source_projects = find_all_sessions(source_folder, include_agents=include_agents) + + # Add prefix to source sessions if specified + if prefix: + for project in source_projects: + for session in project["sessions"]: + session["prefix"] = prefix - # Calculate total for progress tracking - total_session_count = sum(len(p["sessions"]) for p in projects) + # Determine projects for index generation + if merge and output_dir.exists(): + existing_projects = find_existing_sessions(output_dir) + projects_for_index = merge_sessions(source_projects, existing_projects) + else: + projects_for_index = source_projects + + # Calculate total for progress tracking (only source sessions are generated) + total_session_count = sum(len(p["sessions"]) for p in source_projects) processed_count = 0 successful_sessions = 0 failed_sessions = [] - # Process each project - for project in projects: + # Process each source project (generate HTML for source sessions only) + for project in source_projects: project_dir = output_dir / project["name"] project_dir.mkdir(exist_ok=True) @@ -366,14 +552,17 @@ def generate_batch_html( project["name"], session_name, processed_count, total_session_count ) - # Generate project index + # Generate project indexes (using merged projects if applicable) + for project in projects_for_index: + project_dir = output_dir / project["name"] + project_dir.mkdir(exist_ok=True) _generate_project_index(project, project_dir) # Generate master index - _generate_master_index(projects, output_dir) + _generate_master_index(projects_for_index, output_dir) return { - "total_projects": len(projects), + "total_projects": len(projects_for_index), "total_sessions": successful_sessions, "failed_sessions": failed_sessions, "output_dir": output_dir, @@ -388,14 +577,20 @@ def _generate_project_index(project, output_dir): sessions_data = [] for session in project["sessions"]: mod_time = datetime.fromtimestamp(session["mtime"]) - sessions_data.append( - { - "name": session["path"].stem, - "summary": session["summary"], - "date": mod_time.strftime("%Y-%m-%d %H:%M"), - "size_kb": session["size"] / 1024, - } + # Get session name: stem for files, name for directories + session_name = ( + session["path"].stem if session["path"].suffix else session["path"].name ) + session_entry = { + "name": session_name, + "summary": session["summary"], + "date": mod_time.strftime("%Y-%m-%d %H:%M"), + "size_kb": session["size"] / 1024, + } + # Add prefix if present + if "prefix" in session: + session_entry["prefix"] = session["prefix"] + sessions_data.append(session_entry) html_content = template.render( project_name=project["name"], @@ -1927,7 +2122,21 @@ def web_cmd( is_flag=True, help="Suppress all output except errors.", ) -def all_cmd(source, output, include_agents, dry_run, open_browser, quiet): +@click.option( + "-m", + "--merge", + is_flag=True, + help="Merge with existing archive: regenerate source sessions, preserve orphans in index.", +) +@click.option( + "--prefix", + type=str, + default=None, + help="Prefix for sessions in index (e.g., --prefix=laptop).", +) +def all_cmd( + source, output, include_agents, dry_run, open_browser, quiet, merge, prefix +): """Convert all local Claude Code sessions to a browsable HTML archive. Creates a directory structure with: @@ -1993,6 +2202,8 @@ def on_progress(project_name, session_name, current, total): output, include_agents=include_agents, progress_callback=on_progress, + merge=merge, + prefix=prefix, ) # Report any failures diff --git a/src/claude_code_transcripts/templates/project_index.html b/src/claude_code_transcripts/templates/project_index.html index 3fc5750..6dbf01d 100644 --- a/src/claude_code_transcripts/templates/project_index.html +++ b/src/claude_code_transcripts/templates/project_index.html @@ -10,7 +10,7 @@

Claud
- {{ session.date }} + {% if session.prefix %}[{{ session.prefix }}] {% endif %}{{ session.date }} {{ "%.0f"|format(session.size_kb) }} KB
diff --git a/tests/test_all.py b/tests/test_all.py index 2a9c9e1..eaf4064 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -9,6 +9,8 @@ from claude_code_transcripts import ( cli, find_all_sessions, + find_existing_sessions, + merge_sessions, get_project_display_name, generate_batch_html, ) @@ -413,3 +415,452 @@ def test_all_quiet_with_dry_run(self, mock_projects_dir, output_dir): assert "project-a" not in result.output # Should not create any files assert not (output_dir / "index.html").exists() + + +@pytest.fixture +def mock_existing_archive(): + """Create a mock existing archive with project index HTML files.""" + with tempfile.TemporaryDirectory() as tmpdir: + archive_dir = Path(tmpdir) + + # Create project-a with 2 sessions + project_a = archive_dir / "project-a" + project_a.mkdir(parents=True) + + # Create project index HTML + project_a_index = """ + +project-a - Claude Code Archive + + + + +""" + (project_a / "index.html").write_text(project_a_index) + + # Create session directories with index.html + (project_a / "abc123").mkdir() + (project_a / "abc123" / "index.html").write_text("Session abc123") + (project_a / "def456").mkdir() + (project_a / "def456" / "index.html").write_text("Session def456") + + # Create project-b with 1 session + project_b = archive_dir / "project-b" + project_b.mkdir(parents=True) + + project_b_index = """ + +project-b - Claude Code Archive + + + +""" + (project_b / "index.html").write_text(project_b_index) + + (project_b / "ghi789").mkdir() + (project_b / "ghi789" / "index.html").write_text("Session ghi789") + + # Create master index (not parsed, but should exist) + (archive_dir / "index.html").write_text("Master index") + + yield archive_dir + + +class TestFindExistingSessions: + """Tests for find_existing_sessions function.""" + + def test_finds_existing_projects(self, mock_existing_archive): + """Test that existing projects are discovered.""" + result = find_existing_sessions(mock_existing_archive) + + assert len(result) == 2 + project_names = [p["name"] for p in result] + assert "project-a" in project_names + assert "project-b" in project_names + + def test_finds_existing_sessions(self, mock_existing_archive): + """Test that sessions within projects are discovered.""" + result = find_existing_sessions(mock_existing_archive) + + project_a = next(p for p in result if p["name"] == "project-a") + assert len(project_a["sessions"]) == 2 + + session_names = [s["path"].name for s in project_a["sessions"]] + assert "abc123" in session_names + assert "def456" in session_names + + def test_extracts_session_metadata(self, mock_existing_archive): + """Test that session summary is extracted from HTML.""" + result = find_existing_sessions(mock_existing_archive) + + project_a = next(p for p in result if p["name"] == "project-a") + session = next(s for s in project_a["sessions"] if s["path"].name == "abc123") + + assert session["summary"] == "Hello from project A" + assert "mtime" in session + assert "size" in session + + def test_returns_empty_for_nonexistent_dir(self): + """Test handling of non-existent output directory.""" + result = find_existing_sessions(Path("/nonexistent/path")) + assert result == [] + + def test_returns_empty_for_empty_dir(self, output_dir): + """Test handling of empty output directory.""" + result = find_existing_sessions(output_dir) + assert result == [] + + +class TestMergeSessions: + """Tests for merge_sessions function.""" + + def test_merge_includes_all_source_sessions(self): + """Test that all source sessions are included in merged result.""" + source = [ + { + "name": "project-a", + "path": Path("/src/project-a"), + "sessions": [ + { + "path": Path("/src/abc.jsonl"), + "summary": "Session A", + "mtime": 100, + "size": 1000, + }, + ], + } + ] + existing = [] + + result = merge_sessions(source, existing) + + assert len(result) == 1 + assert result[0]["name"] == "project-a" + assert len(result[0]["sessions"]) == 1 + + def test_merge_preserves_orphaned_sessions(self): + """Test that sessions only in archive are preserved in merged result.""" + source = [ + { + "name": "project-a", + "path": Path("/src/project-a"), + "sessions": [ + { + "path": Path("/src/abc.jsonl"), + "summary": "New session", + "mtime": 200, + "size": 1000, + }, + ], + } + ] + existing = [ + { + "name": "project-a", + "path": None, + "sessions": [ + { + "path": Path("/archive/project-a/orphan"), + "summary": "Orphan session", + "mtime": 100, + "size": 500, + }, + ], + } + ] + + result = merge_sessions(source, existing) + + # Should have 1 project with 2 sessions + assert len(result) == 1 + project_a = result[0] + assert len(project_a["sessions"]) == 2 + + # Both sessions should be present + summaries = [s["summary"] for s in project_a["sessions"]] + assert "New session" in summaries + assert "Orphan session" in summaries + + def test_merge_combines_projects(self): + """Test that projects from both sources are combined.""" + source = [ + { + "name": "project-a", + "path": Path("/src/project-a"), + "sessions": [ + { + "path": Path("/src/abc.jsonl"), + "summary": "Session A", + "mtime": 100, + "size": 1000, + }, + ], + } + ] + existing = [ + { + "name": "project-b", + "path": None, + "sessions": [ + { + "path": Path("/archive/project-b/xyz"), + "summary": "Session B", + "mtime": 50, + "size": 500, + }, + ], + } + ] + + result = merge_sessions(source, existing) + + # Should have 2 projects + assert len(result) == 2 + project_names = [p["name"] for p in result] + assert "project-a" in project_names + assert "project-b" in project_names + + def test_merge_source_overwrites_existing_with_same_name(self): + """Test that source session replaces existing session with same name.""" + source = [ + { + "name": "project-a", + "path": Path("/src/project-a"), + "sessions": [ + { + "path": Path("/src/abc123.jsonl"), + "summary": "Updated session", + "mtime": 200, + "size": 2000, + }, + ], + } + ] + existing = [ + { + "name": "project-a", + "path": None, + "sessions": [ + { + "path": Path("/archive/project-a/abc123"), + "summary": "Old session", + "mtime": 100, + "size": 1000, + }, + ], + } + ] + + result = merge_sessions(source, existing) + + # Should have 1 project with 1 session (source wins) + assert len(result) == 1 + assert len(result[0]["sessions"]) == 1 + # The source session should be present (identified by stem) + assert result[0]["sessions"][0]["summary"] == "Updated session" + + +class TestGenerateBatchHtmlMerge: + """Tests for generate_batch_html with merge functionality.""" + + def test_merge_mode_regenerates_source_sessions( + self, mock_projects_dir, output_dir + ): + """Test that source sessions are regenerated in merge mode.""" + # First run: create initial archive + generate_batch_html(mock_projects_dir, output_dir) + assert (output_dir / "project-a" / "abc123" / "index.html").exists() + + # Get initial mtime + initial_mtime = ( + (output_dir / "project-a" / "abc123" / "index.html").stat().st_mtime + ) + + # Wait a tiny bit to ensure mtime changes + import time + + time.sleep(0.01) + + # Second run with merge: should regenerate + generate_batch_html(mock_projects_dir, output_dir, merge=True) + + # File should still exist and be regenerated (mtime updated) + assert (output_dir / "project-a" / "abc123" / "index.html").exists() + new_mtime = (output_dir / "project-a" / "abc123" / "index.html").stat().st_mtime + assert new_mtime >= initial_mtime + + def test_merge_mode_preserves_orphans_in_index(self, output_dir): + """Test that orphan sessions are preserved in the index when merging.""" + # Create initial archive with a session + with tempfile.TemporaryDirectory() as tmpdir: + source1 = Path(tmpdir) + project = source1 / "-home-user-projects-project-a" + project.mkdir(parents=True) + + session = project / "original.jsonl" + session.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Original session"}}\n' + ) + + generate_batch_html(source1, output_dir) + + # Verify original session exists + assert (output_dir / "project-a" / "original" / "index.html").exists() + + # Create a new source without the original session + with tempfile.TemporaryDirectory() as tmpdir: + source2 = Path(tmpdir) + project = source2 / "-home-user-projects-project-a" + project.mkdir(parents=True) + + session = project / "new.jsonl" + session.write_text( + '{"type": "user", "timestamp": "2025-01-02T10:00:00.000Z", "message": {"role": "user", "content": "New session"}}\n' + ) + + # Merge: should add new session AND preserve orphan in index + generate_batch_html(source2, output_dir, merge=True) + + # Both sessions should exist + assert (output_dir / "project-a" / "original" / "index.html").exists() + assert (output_dir / "project-a" / "new" / "index.html").exists() + + # Project index should list both sessions + project_index = (output_dir / "project-a" / "index.html").read_text() + assert "original" in project_index + assert "new" in project_index + + def test_prefix_in_session_index(self, mock_projects_dir, output_dir): + """Test that prefix is shown in project index.""" + generate_batch_html(mock_projects_dir, output_dir, prefix="laptop") + + # Check project index contains prefix + project_index = (output_dir / "project-a" / "index.html").read_text() + assert "laptop" in project_index + + +class TestAllMergeCommand: + """Tests for the all command with --merge and --prefix options.""" + + def test_merge_option_exists(self): + """Test that --merge option is recognized.""" + runner = CliRunner() + result = runner.invoke(cli, ["all", "--help"]) + assert result.exit_code == 0 + assert "--merge" in result.output or "-m" in result.output + + def test_prefix_option_exists(self): + """Test that --prefix option is recognized.""" + runner = CliRunner() + result = runner.invoke(cli, ["all", "--help"]) + assert result.exit_code == 0 + assert "--prefix" in result.output + + def test_merge_preserves_orphan_sessions(self, output_dir): + """Test that --merge preserves orphan sessions in the index.""" + runner = CliRunner() + + # Create initial archive with a session + with tempfile.TemporaryDirectory() as tmpdir: + source1 = Path(tmpdir) + project = source1 / "-home-user-projects-project-a" + project.mkdir(parents=True) + + session = project / "original.jsonl" + session.write_text( + '{"type": "user", "timestamp": "2025-01-01T10:00:00.000Z", "message": {"role": "user", "content": "Original session"}}\n' + ) + + result = runner.invoke( + cli, + ["all", "--source", str(source1), "--output", str(output_dir)], + ) + assert result.exit_code == 0 + + # Verify original session exists + assert (output_dir / "project-a" / "original" / "index.html").exists() + + # Create a new source without the original session + with tempfile.TemporaryDirectory() as tmpdir: + source2 = Path(tmpdir) + project = source2 / "-home-user-projects-project-a" + project.mkdir(parents=True) + + session = project / "new.jsonl" + session.write_text( + '{"type": "user", "timestamp": "2025-01-02T10:00:00.000Z", "message": {"role": "user", "content": "New session"}}\n' + ) + + # Merge: should add new session AND preserve orphan in index + result = runner.invoke( + cli, + [ + "all", + "--source", + str(source2), + "--output", + str(output_dir), + "--merge", + ], + ) + assert result.exit_code == 0 + + # Both sessions should exist + assert (output_dir / "project-a" / "original" / "index.html").exists() + assert (output_dir / "project-a" / "new" / "index.html").exists() + + # Project index should list both sessions + project_index = (output_dir / "project-a" / "index.html").read_text() + assert "original" in project_index + assert "new" in project_index + + def test_prefix_in_cli(self, mock_projects_dir, output_dir): + """Test that --prefix adds prefix to sessions in index.""" + runner = CliRunner() + result = runner.invoke( + cli, + [ + "all", + "--source", + str(mock_projects_dir), + "--output", + str(output_dir), + "--prefix", + "workstation", + ], + ) + assert result.exit_code == 0 + + # Check project index contains prefix + project_index = (output_dir / "project-a" / "index.html").read_text() + assert "workstation" in project_index