|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Generate llms-full.txt by using gitingest instead of docusaurus-plugin-llms |
| 4 | +""" |
| 5 | + |
| 6 | +import asyncio |
| 7 | +import sys |
| 8 | +import textwrap |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | +try: |
| 12 | + from gitingest import ingest_async |
| 13 | +except ImportError: |
| 14 | + print("❌ gitingest not found. Please install it first:") |
| 15 | + print(" pip install --user gitingest") |
| 16 | + print(" # or") |
| 17 | + print(" pipx install gitingest") |
| 18 | + print("") |
| 19 | + print("For GitHub Actions, this should be installed automatically in the workflow.") |
| 20 | + sys.exit(1) |
| 21 | + |
| 22 | + |
| 23 | +async def generate_llms_full(): |
| 24 | + """Generate llms-full.txt by using gitingest.""" |
| 25 | + try: |
| 26 | + print("Generating llms-full.txt by using gitingest...") |
| 27 | + |
| 28 | + # Current repository path |
| 29 | + repo_path = Path(__file__).parent.parent |
| 30 | + build_dir = repo_path / "build" |
| 31 | + build_dir.mkdir(exist_ok=True) |
| 32 | + |
| 33 | + # Configure the gitingest parameters. |
| 34 | + include_patterns = { |
| 35 | + "docs/*.mdx", "docs/**/*.mdx", "src/components/en-us/*.mdx", "src/components/en-us/**/*.mdx" |
| 36 | + } |
| 37 | + |
| 38 | + exclude_patterns = { |
| 39 | + "node_modules/*", ".git/*", "build/*", |
| 40 | + "*.log", ".next/*", "dist/*", ".docusaurus/*" |
| 41 | + } |
| 42 | + |
| 43 | + # Generate content by using gitingest. |
| 44 | + summary, tree, content = await ingest_async( |
| 45 | + str(repo_path), |
| 46 | + max_file_size=100000, # 100 KB max file size |
| 47 | + include_patterns=include_patterns, |
| 48 | + exclude_patterns=exclude_patterns, |
| 49 | + include_gitignored=False |
| 50 | + ) |
| 51 | + |
| 52 | + # Create a header that matches your current format. |
| 53 | + header = textwrap.dedent("""\ |
| 54 | + # ScalarDB Documentation - Full Repository Context |
| 55 | + # Generated by using GitIngest for AI/LLM consumption |
| 56 | + # Cloud-native universal transaction manager |
| 57 | + # Website: https://scalardb.scalar-labs.com |
| 58 | +
|
| 59 | + """) |
| 60 | + |
| 61 | + # Combine all sections. |
| 62 | + full_content = header + summary + "\n\n" + tree + "\n\n" + content |
| 63 | + |
| 64 | + # Write to the build directory. |
| 65 | + output_path = build_dir / "llms-full.txt" |
| 66 | + with open(output_path, 'w', encoding='utf-8') as f: |
| 67 | + f.write(full_content) |
| 68 | + |
| 69 | + print(f"✅ llms-full.txt generated successfully at {output_path}") |
| 70 | + print(f"📊 Summary: {len(full_content)} characters, estimated tokens: {len(full_content.split())}") |
| 71 | + |
| 72 | + except Exception as error: |
| 73 | + print(f"❌ Error generating llms-full.txt: {error}") |
| 74 | + sys.exit(1) |
| 75 | + |
| 76 | +if __name__ == "__main__": |
| 77 | + asyncio.run(generate_llms_full()) |
0 commit comments