|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to generate CLI documentation from CocoIndex Click commands. |
| 4 | +
|
| 5 | +This script uses md-click as the foundation but generates enhanced markdown |
| 6 | +documentation that's suitable for inclusion in the CocoIndex documentation site. |
| 7 | +""" |
| 8 | + |
| 9 | +import sys |
| 10 | +from pathlib import Path |
| 11 | +import re |
| 12 | +import click |
| 13 | +from cocoindex.cli import cli |
| 14 | + |
| 15 | +# Add the cocoindex python directory to the path |
| 16 | +project_root = Path(__file__).parent.parent |
| 17 | +python_path = project_root / "python" |
| 18 | +sys.path.insert(0, str(python_path)) |
| 19 | + |
| 20 | + |
| 21 | +def clean_usage_line(usage: str) -> str: |
| 22 | + """Clean up the usage line to remove 'cli' and make it generic, and remove the 'Usage:' prefix.""" |
| 23 | + # Replace 'cli' with 'cocoindex' in usage lines and remove 'Usage:' prefix |
| 24 | + cleaned = usage.replace("Usage: cli ", "cocoindex ") |
| 25 | + # Handle case where it might be "Usage: cocoindex" already |
| 26 | + if cleaned.startswith("Usage: cocoindex "): |
| 27 | + cleaned = cleaned.replace("Usage: cocoindex ", "cocoindex ") |
| 28 | + return cleaned |
| 29 | + |
| 30 | + |
| 31 | +def escape_html_tags(text: str) -> str: |
| 32 | + """Escape HTML-like tags in text to prevent MDX parsing issues, but preserve them in code blocks.""" |
| 33 | + import re |
| 34 | + |
| 35 | + # Handle special cases where URLs with placeholders should be wrapped in code blocks |
| 36 | + text = re.sub(r"http://localhost:<([^>]+)>", r"`http://localhost:<\1>`", text) |
| 37 | + text = re.sub(r"https://([^<\s]+)<([^>]+)>", r"`https://\1<\2>`", text) |
| 38 | + |
| 39 | + # Handle comma-separated URL examples specifically (e.g., "https://site1.com,http://localhost:3000") |
| 40 | + text = re.sub(r"(?<!`)(\bhttps?://[^\s,`]+,https?://[^\s`]+)(?!`)", r"`\1`", text) |
| 41 | + |
| 42 | + # Handle standalone URLs that aren't already wrapped in backticks |
| 43 | + text = re.sub(r"(?<!`)(?<!,)(\bhttps?://[^\s,`]+)(?!`)(?!,)", r"`\1`", text) |
| 44 | + |
| 45 | + # Split text into code blocks and regular text |
| 46 | + # Pattern matches: `code content` (inline code blocks) |
| 47 | + parts = re.split(r"(`[^`]*`)", text) |
| 48 | + |
| 49 | + result = [] |
| 50 | + for i, part in enumerate(parts): |
| 51 | + if i % 2 == 0: |
| 52 | + # Even indices are regular text, escape HTML tags |
| 53 | + result.append(part.replace("<", "<").replace(">", ">")) |
| 54 | + else: |
| 55 | + # Odd indices are code blocks, preserve as-is |
| 56 | + result.append(part) |
| 57 | + |
| 58 | + return "".join(result) |
| 59 | + |
| 60 | + |
| 61 | +def format_options_section(help_text: str) -> str: |
| 62 | + """Extract and format the options section.""" |
| 63 | + lines = help_text.split("\n") |
| 64 | + options_start = None |
| 65 | + commands_start = None |
| 66 | + |
| 67 | + for i, line in enumerate(lines): |
| 68 | + if line.strip() == "Options:": |
| 69 | + options_start = i |
| 70 | + elif line.strip() == "Commands:": |
| 71 | + commands_start = i |
| 72 | + break |
| 73 | + |
| 74 | + if options_start is None: |
| 75 | + return "" |
| 76 | + |
| 77 | + # Extract options section |
| 78 | + end_idx = commands_start if commands_start else len(lines) |
| 79 | + options_lines = lines[options_start + 1 : end_idx] # Skip "Options:" header |
| 80 | + |
| 81 | + # Parse options - each option starts with exactly 2 spaces and a dash |
| 82 | + formatted_options = [] |
| 83 | + current_option = None |
| 84 | + current_description = [] |
| 85 | + |
| 86 | + for line in options_lines: |
| 87 | + if not line.strip(): # Empty line |
| 88 | + continue |
| 89 | + |
| 90 | + # Check if this is a new option line (starts with exactly 2 spaces then -) |
| 91 | + if line.startswith(" -") and not line.startswith(" "): |
| 92 | + # Save previous option if exists |
| 93 | + if current_option is not None: |
| 94 | + desc = " ".join(current_description).strip() |
| 95 | + desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility |
| 96 | + formatted_options.append(f"| `{current_option}` | {desc} |") |
| 97 | + |
| 98 | + # Remove the leading 2 spaces |
| 99 | + content = line[2:] |
| 100 | + |
| 101 | + # Find the position where we have multiple consecutive spaces (start of description) |
| 102 | + match = re.search(r"\s{2,}", content) |
| 103 | + if match: |
| 104 | + # Split at the first occurrence of multiple spaces |
| 105 | + option_part = content[: match.start()] |
| 106 | + desc_part = content[match.end() :] |
| 107 | + current_option = option_part.strip() |
| 108 | + current_description = [desc_part.strip()] if desc_part.strip() else [] |
| 109 | + else: |
| 110 | + # No description on this line, just the option |
| 111 | + current_option = content.strip() |
| 112 | + current_description = [] |
| 113 | + else: |
| 114 | + # Continuation line (starts with more than 2 spaces) |
| 115 | + if current_option is not None and line.strip(): |
| 116 | + current_description.append(line.strip()) |
| 117 | + |
| 118 | + # Add last option |
| 119 | + if current_option is not None: |
| 120 | + desc = " ".join(current_description).strip() |
| 121 | + desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility |
| 122 | + formatted_options.append(f"| `{current_option}` | {desc} |") |
| 123 | + |
| 124 | + if formatted_options: |
| 125 | + header = "| Option | Description |\n|--------|-------------|" |
| 126 | + return f"{header}\n" + "\n".join(formatted_options) + "\n" |
| 127 | + |
| 128 | + return "" |
| 129 | + |
| 130 | + |
| 131 | +def format_commands_section(help_text: str) -> str: |
| 132 | + """Extract and format the commands section.""" |
| 133 | + lines = help_text.split("\n") |
| 134 | + commands_start = None |
| 135 | + |
| 136 | + for i, line in enumerate(lines): |
| 137 | + if line.strip() == "Commands:": |
| 138 | + commands_start = i |
| 139 | + break |
| 140 | + |
| 141 | + if commands_start is None: |
| 142 | + return "" |
| 143 | + |
| 144 | + # Extract commands section |
| 145 | + commands_lines = lines[commands_start + 1 :] |
| 146 | + |
| 147 | + # Parse commands - each command starts with 2 spaces then the command name |
| 148 | + formatted_commands = [] |
| 149 | + |
| 150 | + for line in commands_lines: |
| 151 | + if not line.strip(): # Empty line |
| 152 | + continue |
| 153 | + |
| 154 | + # Check if this is a command line (starts with 2 spaces + command name) |
| 155 | + match = re.match(r"^ (\w+)\s{2,}(.+)$", line) |
| 156 | + if match: |
| 157 | + command = match.group(1) |
| 158 | + description = match.group(2).strip() |
| 159 | + # Truncate long descriptions |
| 160 | + if len(description) > 80: |
| 161 | + description = description[:77] + "..." |
| 162 | + formatted_commands.append(f"| `{command}` | {description} |") |
| 163 | + |
| 164 | + if formatted_commands: |
| 165 | + header = "| Command | Description |\n|---------|-------------|" |
| 166 | + return f"{header}\n" + "\n".join(formatted_commands) + "\n" |
| 167 | + |
| 168 | + return "" |
| 169 | + |
| 170 | + |
| 171 | +def extract_description(help_text: str) -> str: |
| 172 | + """Extract the main description from help text.""" |
| 173 | + lines = help_text.split("\n") |
| 174 | + |
| 175 | + # Find the description between usage and options/commands |
| 176 | + description_lines = [] |
| 177 | + in_description = False |
| 178 | + last_was_empty = False |
| 179 | + |
| 180 | + for line in lines: |
| 181 | + if line.startswith("Usage:"): |
| 182 | + in_description = True |
| 183 | + continue |
| 184 | + elif line.strip() in ["Options:", "Commands:"]: |
| 185 | + break |
| 186 | + elif in_description: |
| 187 | + if line.strip(): |
| 188 | + # Non-empty line |
| 189 | + description_lines.append(line.strip()) |
| 190 | + last_was_empty = False |
| 191 | + else: |
| 192 | + # Empty line - only add one blank line to separate paragraphs |
| 193 | + if description_lines and not last_was_empty: |
| 194 | + description_lines.append("") |
| 195 | + last_was_empty = True |
| 196 | + |
| 197 | + # Join lines, treating consecutive lines as same paragraph unless separated by blank line |
| 198 | + result = [] |
| 199 | + current_paragraph = [] |
| 200 | + |
| 201 | + for line in description_lines: |
| 202 | + if line == "": |
| 203 | + # Blank line - end current paragraph |
| 204 | + if current_paragraph: |
| 205 | + result.append(" ".join(current_paragraph)) |
| 206 | + current_paragraph = [] |
| 207 | + else: |
| 208 | + current_paragraph.append(line) |
| 209 | + |
| 210 | + # Add any remaining paragraph |
| 211 | + if current_paragraph: |
| 212 | + result.append(" ".join(current_paragraph)) |
| 213 | + |
| 214 | + # Join paragraphs with double newline |
| 215 | + description = "\n\n".join(result) if result else "" |
| 216 | + return escape_html_tags(description) # Escape HTML tags for MDX compatibility |
| 217 | + |
| 218 | + |
| 219 | +def generate_command_docs(cmd: click.Group) -> str: |
| 220 | + """Generate markdown documentation for all commands.""" |
| 221 | + |
| 222 | + markdown_content = [] |
| 223 | + |
| 224 | + # Add top-level heading to satisfy MD041 linting rule |
| 225 | + markdown_content.append("## Subcommands Reference") |
| 226 | + markdown_content.append("") |
| 227 | + |
| 228 | + ctx = click.core.Context(cmd, info_name=cmd.name) |
| 229 | + subcommands = list(cmd.commands.values()) |
| 230 | + # Generate only the command details section (remove redundant headers) |
| 231 | + for sub_cmd in sorted(subcommands, key=lambda x: x.name or ""): |
| 232 | + sub_ctx = click.core.Context(sub_cmd, info_name=sub_cmd.name, parent=ctx) |
| 233 | + command_name = sub_cmd.name |
| 234 | + help_text = sub_cmd.get_help(sub_ctx) |
| 235 | + usage = clean_usage_line(sub_cmd.get_usage(sub_ctx)) |
| 236 | + description = extract_description(help_text) |
| 237 | + |
| 238 | + markdown_content.append(f"### `{command_name}`") |
| 239 | + markdown_content.append("") |
| 240 | + |
| 241 | + if description: |
| 242 | + markdown_content.append(description) |
| 243 | + markdown_content.append("") |
| 244 | + |
| 245 | + # Add usage |
| 246 | + markdown_content.append("**Usage:**") |
| 247 | + markdown_content.append("") |
| 248 | + markdown_content.append(f"```bash") |
| 249 | + markdown_content.append(usage) |
| 250 | + markdown_content.append("```") |
| 251 | + markdown_content.append("") |
| 252 | + |
| 253 | + # Add options if any |
| 254 | + options_section = format_options_section(help_text) |
| 255 | + if options_section: |
| 256 | + markdown_content.append("**Options:**") |
| 257 | + markdown_content.append("") |
| 258 | + markdown_content.append(options_section) |
| 259 | + |
| 260 | + markdown_content.append("---") |
| 261 | + markdown_content.append("") |
| 262 | + |
| 263 | + return "\n".join(markdown_content) |
| 264 | + |
| 265 | + |
| 266 | +def main() -> None: |
| 267 | + """Generate CLI documentation and save to file.""" |
| 268 | + print("Generating CocoIndex CLI documentation...") |
| 269 | + |
| 270 | + try: |
| 271 | + # Generate markdown content |
| 272 | + markdown_content = generate_command_docs(cli) |
| 273 | + |
| 274 | + # Determine output path |
| 275 | + docs_dir = project_root / "docs" / "docs" / "core" |
| 276 | + output_file = docs_dir / "cli-commands.md" |
| 277 | + |
| 278 | + # Ensure directory exists |
| 279 | + docs_dir.mkdir(parents=True, exist_ok=True) |
| 280 | + |
| 281 | + # Write the generated documentation |
| 282 | + content_changed = True |
| 283 | + if output_file.exists(): |
| 284 | + with open(output_file, "r", encoding="utf-8") as f: |
| 285 | + existing_content = f.read() |
| 286 | + content_changed = existing_content != markdown_content |
| 287 | + |
| 288 | + if content_changed: |
| 289 | + with open(output_file, "w", encoding="utf-8") as f: |
| 290 | + f.write(markdown_content) |
| 291 | + |
| 292 | + print(f"CLI documentation generated successfully at: {output_file}") |
| 293 | + print( |
| 294 | + f"Generated {len(markdown_content.splitlines())} lines of documentation" |
| 295 | + ) |
| 296 | + else: |
| 297 | + print(f"CLI documentation is up to date at: {output_file}") |
| 298 | + |
| 299 | + except Exception as e: |
| 300 | + print(f"Error generating documentation: {e}") |
| 301 | + import traceback |
| 302 | + |
| 303 | + traceback.print_exc() |
| 304 | + sys.exit(1) |
| 305 | + |
| 306 | + |
| 307 | +if __name__ == "__main__": |
| 308 | + main() |
0 commit comments