|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to generate CLI documentation from CocoIndex Click commands. |
| 4 | +
|
| 5 | +This script uses md-click as the foundation but generates enhanced markdown |
| 6 | +documentation that's suitable for inclusion in the CocoIndex documentation site. |
| 7 | +""" |
| 8 | + |
| 9 | +import sys |
| 10 | +import os |
| 11 | +from pathlib import Path |
| 12 | +import re |
| 13 | +from typing import Dict, List, Any |
| 14 | + |
| 15 | +# Add the cocoindex python directory to the path |
| 16 | +project_root = Path(__file__).parent.parent |
| 17 | +python_path = project_root / "python" |
| 18 | +sys.path.insert(0, str(python_path)) |
| 19 | + |
| 20 | +try: |
| 21 | + import md_click |
| 22 | + from cocoindex.cli import cli |
| 23 | +except ImportError as e: |
| 24 | + print(f"Error importing required modules: {e}") |
| 25 | + print("Make sure to run this script from the project root and install dependencies") |
| 26 | + sys.exit(1) |
| 27 | + |
| 28 | + |
| 29 | +def clean_usage_line(usage: str) -> str: |
| 30 | + """Clean up the usage line to remove 'cli' and make it generic, and remove the 'Usage:' prefix.""" |
| 31 | + # Replace 'cli' with 'cocoindex' in usage lines and remove 'Usage:' prefix |
| 32 | + cleaned = usage.replace("Usage: cli ", "cocoindex ") |
| 33 | + # Handle case where it might be "Usage: cocoindex" already |
| 34 | + if cleaned.startswith("Usage: cocoindex "): |
| 35 | + cleaned = cleaned.replace("Usage: cocoindex ", "cocoindex ") |
| 36 | + return cleaned |
| 37 | + |
| 38 | + |
| 39 | +def escape_html_tags(text: str) -> str: |
| 40 | + """Escape HTML-like tags in text to prevent MDX parsing issues, but preserve them in code blocks.""" |
| 41 | + import re |
| 42 | + |
| 43 | + # Handle special cases where URLs with placeholders should be wrapped in code blocks |
| 44 | + text = re.sub(r"http://localhost:<([^>]+)>", r"`http://localhost:<\1>`", text) |
| 45 | + text = re.sub(r"https://([^<\s]+)<([^>]+)>", r"`https://\1<\2>`", text) |
| 46 | + |
| 47 | + # Handle comma-separated URL examples specifically (e.g., "https://site1.com,http://localhost:3000") |
| 48 | + text = re.sub(r"(?<!`)(\bhttps?://[^\s,`]+,https?://[^\s`]+)(?!`)", r"`\1`", text) |
| 49 | + |
| 50 | + # Handle standalone URLs that aren't already wrapped in backticks |
| 51 | + text = re.sub(r"(?<!`)(?<!,)(\bhttps?://[^\s,`]+)(?!`)(?!,)", r"`\1`", text) |
| 52 | + |
| 53 | + # Split text into code blocks and regular text |
| 54 | + # Pattern matches: `code content` (inline code blocks) |
| 55 | + parts = re.split(r"(`[^`]*`)", text) |
| 56 | + |
| 57 | + result = [] |
| 58 | + for i, part in enumerate(parts): |
| 59 | + if i % 2 == 0: |
| 60 | + # Even indices are regular text, escape HTML tags |
| 61 | + result.append(part.replace("<", "<").replace(">", ">")) |
| 62 | + else: |
| 63 | + # Odd indices are code blocks, preserve as-is |
| 64 | + result.append(part) |
| 65 | + |
| 66 | + return "".join(result) |
| 67 | + |
| 68 | + |
| 69 | +def format_options_section(help_text: str) -> str: |
| 70 | + """Extract and format the options section.""" |
| 71 | + lines = help_text.split("\n") |
| 72 | + options_start = None |
| 73 | + commands_start = None |
| 74 | + |
| 75 | + for i, line in enumerate(lines): |
| 76 | + if line.strip() == "Options:": |
| 77 | + options_start = i |
| 78 | + elif line.strip() == "Commands:": |
| 79 | + commands_start = i |
| 80 | + break |
| 81 | + |
| 82 | + if options_start is None: |
| 83 | + return "" |
| 84 | + |
| 85 | + # Extract options section |
| 86 | + end_idx = commands_start if commands_start else len(lines) |
| 87 | + options_lines = lines[options_start + 1 : end_idx] # Skip "Options:" header |
| 88 | + |
| 89 | + # Parse options - each option starts with exactly 2 spaces and a dash |
| 90 | + formatted_options = [] |
| 91 | + current_option = None |
| 92 | + current_description = [] |
| 93 | + |
| 94 | + for line in options_lines: |
| 95 | + if not line.strip(): # Empty line |
| 96 | + continue |
| 97 | + |
| 98 | + # Check if this is a new option line (starts with exactly 2 spaces then -) |
| 99 | + if line.startswith(" -") and not line.startswith(" "): |
| 100 | + # Save previous option if exists |
| 101 | + if current_option is not None: |
| 102 | + desc = " ".join(current_description).strip() |
| 103 | + desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility |
| 104 | + formatted_options.append(f"| `{current_option}` | {desc} |") |
| 105 | + |
| 106 | + # Remove the leading 2 spaces |
| 107 | + content = line[2:] |
| 108 | + |
| 109 | + # Find the position where we have multiple consecutive spaces (start of description) |
| 110 | + match = re.search(r"\s{2,}", content) |
| 111 | + if match: |
| 112 | + # Split at the first occurrence of multiple spaces |
| 113 | + option_part = content[: match.start()] |
| 114 | + desc_part = content[match.end() :] |
| 115 | + current_option = option_part.strip() |
| 116 | + current_description = [desc_part.strip()] if desc_part.strip() else [] |
| 117 | + else: |
| 118 | + # No description on this line, just the option |
| 119 | + current_option = content.strip() |
| 120 | + current_description = [] |
| 121 | + else: |
| 122 | + # Continuation line (starts with more than 2 spaces) |
| 123 | + if current_option is not None and line.strip(): |
| 124 | + current_description.append(line.strip()) |
| 125 | + |
| 126 | + # Add last option |
| 127 | + if current_option is not None: |
| 128 | + desc = " ".join(current_description).strip() |
| 129 | + desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility |
| 130 | + formatted_options.append(f"| `{current_option}` | {desc} |") |
| 131 | + |
| 132 | + if formatted_options: |
| 133 | + header = "| Option | Description |\n|--------|-------------|" |
| 134 | + return f"{header}\n" + "\n".join(formatted_options) + "\n" |
| 135 | + |
| 136 | + return "" |
| 137 | + |
| 138 | + |
| 139 | +def format_commands_section(help_text: str) -> str: |
| 140 | + """Extract and format the commands section.""" |
| 141 | + lines = help_text.split("\n") |
| 142 | + commands_start = None |
| 143 | + |
| 144 | + for i, line in enumerate(lines): |
| 145 | + if line.strip() == "Commands:": |
| 146 | + commands_start = i |
| 147 | + break |
| 148 | + |
| 149 | + if commands_start is None: |
| 150 | + return "" |
| 151 | + |
| 152 | + # Extract commands section |
| 153 | + commands_lines = lines[commands_start + 1 :] |
| 154 | + |
| 155 | + # Parse commands - each command starts with 2 spaces then the command name |
| 156 | + formatted_commands = [] |
| 157 | + |
| 158 | + for line in commands_lines: |
| 159 | + if not line.strip(): # Empty line |
| 160 | + continue |
| 161 | + |
| 162 | + # Check if this is a command line (starts with 2 spaces + command name) |
| 163 | + match = re.match(r"^ (\w+)\s{2,}(.+)$", line) |
| 164 | + if match: |
| 165 | + command = match.group(1) |
| 166 | + description = match.group(2).strip() |
| 167 | + # Truncate long descriptions |
| 168 | + if len(description) > 80: |
| 169 | + description = description[:77] + "..." |
| 170 | + formatted_commands.append(f"| `{command}` | {description} |") |
| 171 | + |
| 172 | + if formatted_commands: |
| 173 | + header = "| Command | Description |\n|---------|-------------|" |
| 174 | + return f"{header}\n" + "\n".join(formatted_commands) + "\n" |
| 175 | + |
| 176 | + return "" |
| 177 | + |
| 178 | + |
| 179 | +def extract_description(help_text: str) -> str: |
| 180 | + """Extract the main description from help text.""" |
| 181 | + lines = help_text.split("\n") |
| 182 | + |
| 183 | + # Find the description between usage and options/commands |
| 184 | + description_lines = [] |
| 185 | + in_description = False |
| 186 | + |
| 187 | + for line in lines: |
| 188 | + if line.startswith("Usage:"): |
| 189 | + in_description = True |
| 190 | + continue |
| 191 | + elif line.strip() in ["Options:", "Commands:"]: |
| 192 | + break |
| 193 | + elif in_description and line.strip(): |
| 194 | + description_lines.append(line.strip()) |
| 195 | + |
| 196 | + description = "\n\n".join(description_lines) if description_lines else "" |
| 197 | + return escape_html_tags(description) # Escape HTML tags for MDX compatibility |
| 198 | + |
| 199 | + |
| 200 | +def generate_command_docs(docs: List[Dict[str, Any]]) -> str: |
| 201 | + """Generate markdown documentation for all commands.""" |
| 202 | + |
| 203 | + # Separate main CLI from subcommands |
| 204 | + main_cli = None |
| 205 | + subcommands = [] |
| 206 | + |
| 207 | + for doc in docs: |
| 208 | + parent = doc.get("parent", "") |
| 209 | + if not parent: |
| 210 | + main_cli = doc |
| 211 | + else: |
| 212 | + subcommands.append(doc) |
| 213 | + |
| 214 | + markdown_content = [] |
| 215 | + |
| 216 | + # Add top-level heading to satisfy MD041 linting rule |
| 217 | + markdown_content.append("# CLI Commands") |
| 218 | + markdown_content.append("") |
| 219 | + |
| 220 | + # Generate only the command details section (remove redundant headers) |
| 221 | + for doc in sorted(subcommands, key=lambda x: x["command"].name): |
| 222 | + command_name = doc["command"].name |
| 223 | + help_text = doc["help"] |
| 224 | + usage = clean_usage_line(doc["usage"]) |
| 225 | + description = extract_description(help_text) |
| 226 | + |
| 227 | + markdown_content.append(f"## `{command_name}`") |
| 228 | + markdown_content.append("") |
| 229 | + |
| 230 | + if description: |
| 231 | + markdown_content.append(description) |
| 232 | + markdown_content.append("") |
| 233 | + |
| 234 | + # Add usage |
| 235 | + markdown_content.append("**Usage:**") |
| 236 | + markdown_content.append("") |
| 237 | + markdown_content.append(f"```bash") |
| 238 | + markdown_content.append(usage) |
| 239 | + markdown_content.append("```") |
| 240 | + markdown_content.append("") |
| 241 | + |
| 242 | + # Add options if any |
| 243 | + options_section = format_options_section(help_text) |
| 244 | + if options_section: |
| 245 | + markdown_content.append("**Options:**") |
| 246 | + markdown_content.append("") |
| 247 | + markdown_content.append(options_section) |
| 248 | + |
| 249 | + markdown_content.append("---") |
| 250 | + markdown_content.append("") |
| 251 | + |
| 252 | + return "\n".join(markdown_content) |
| 253 | + |
| 254 | + |
| 255 | +def main(): |
| 256 | + """Generate CLI documentation and save to file.""" |
| 257 | + print("Generating CocoIndex CLI documentation...") |
| 258 | + |
| 259 | + try: |
| 260 | + # Generate documentation using md-click |
| 261 | + docs_generator = md_click.main.recursive_help(cli) |
| 262 | + docs = list(docs_generator) |
| 263 | + |
| 264 | + print(f"Found {len(docs)} CLI commands to document") |
| 265 | + |
| 266 | + # Generate markdown content |
| 267 | + markdown_content = generate_command_docs(docs) |
| 268 | + |
| 269 | + # Determine output path |
| 270 | + docs_dir = project_root / "docs" / "docs" / "core" |
| 271 | + output_file = docs_dir / "cli-commands.md" |
| 272 | + |
| 273 | + # Ensure directory exists |
| 274 | + docs_dir.mkdir(parents=True, exist_ok=True) |
| 275 | + |
| 276 | + # Write the generated documentation |
| 277 | + content_changed = True |
| 278 | + if output_file.exists(): |
| 279 | + with open(output_file, "r", encoding="utf-8") as f: |
| 280 | + existing_content = f.read() |
| 281 | + content_changed = existing_content != markdown_content |
| 282 | + |
| 283 | + if content_changed: |
| 284 | + with open(output_file, "w", encoding="utf-8") as f: |
| 285 | + f.write(markdown_content) |
| 286 | + |
| 287 | + print(f"CLI documentation generated successfully at: {output_file}") |
| 288 | + print( |
| 289 | + f"Generated {len(markdown_content.splitlines())} lines of documentation" |
| 290 | + ) |
| 291 | + else: |
| 292 | + print(f"CLI documentation is up to date at: {output_file}") |
| 293 | + |
| 294 | + except Exception as e: |
| 295 | + print(f"Error generating documentation: {e}") |
| 296 | + import traceback |
| 297 | + |
| 298 | + traceback.print_exc() |
| 299 | + sys.exit(1) |
| 300 | + |
| 301 | + |
| 302 | +if __name__ == "__main__": |
| 303 | + main() |
0 commit comments