Skip to content

Commit c4ba813

Browse files
authored
generate full CLI docs from click's metadata and also added it in precommit hooks (#1096)
* generate full CLI docs from click's metadata and also added it in precommit hooks * fixed the escaping * refactored cli commands to directly import in cli.mdx and also refactored cli generation to fix markdownlinter issues
1 parent 661f8d7 commit c4ba813

File tree

5 files changed

+560
-20
lines changed

5 files changed

+560
-20
lines changed

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ repos:
2626

2727
- repo: local
2828
hooks:
29+
- id: generate-cli-docs
30+
name: generate CLI documentation
31+
entry: python dev/generate_cli_docs.py
32+
language: system
33+
files: ^(python/cocoindex/cli\.py|dev/generate_cli_docs\.py)$
34+
pass_filenames: false
35+
2936
- id: maturin-develop
3037
name: maturin develop
3138
entry: maturin develop -E all,dev

dev/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Development Scripts
2+
3+
This directory contains development and maintenance scripts for the CocoIndex project.
4+
5+
## Scripts
6+
7+
### `generate_cli_docs.py`
8+
9+
Automatically generates CLI documentation from the CocoIndex Click commands.
10+
11+
**Usage:**
12+
13+
```bash
14+
python dev/generate_cli_docs.py
15+
```
16+
17+
**What it does:**
18+
19+
- Extracts help messages from all Click commands in `python/cocoindex/cli.py`
20+
- Generates comprehensive Markdown documentation with properly formatted tables
21+
- Saves the output to `docs/docs/core/cli-commands.md` for direct import into CLI documentation
22+
- Only updates the file if content has changed (avoids unnecessary git diffs)
23+
- Automatically escapes HTML-like tags to prevent MDX parsing issues
24+
- Wraps URLs with placeholders in code blocks for proper rendering
25+
26+
**Integration:**
27+
28+
- Runs automatically as a pre-commit hook when `python/cocoindex/cli.py` is modified
29+
- The generated documentation is directly imported into `docs/docs/core/cli.mdx` via MDX import
30+
- Provides seamless single-page CLI documentation experience without separate reference pages
31+
32+
**Dependencies:**
33+
34+
- `md-click` package for extracting Click help information
35+
- `cocoindex` package must be importable (the CLI module)
36+
37+
This ensures that CLI documentation is always kept in sync with the actual command-line interface.

dev/generate_cli_docs.py

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to generate CLI documentation from CocoIndex Click commands.
4+
5+
This script uses md-click as the foundation but generates enhanced markdown
6+
documentation that's suitable for inclusion in the CocoIndex documentation site.
7+
"""
8+
9+
import sys
10+
import os
11+
from pathlib import Path
12+
import re
13+
from typing import Dict, List, Any
14+
15+
# Add the cocoindex python directory to the path
16+
project_root = Path(__file__).parent.parent
17+
python_path = project_root / "python"
18+
sys.path.insert(0, str(python_path))
19+
20+
try:
21+
import md_click
22+
from cocoindex.cli import cli
23+
except ImportError as e:
24+
print(f"Error importing required modules: {e}")
25+
print("Make sure to run this script from the project root and install dependencies")
26+
sys.exit(1)
27+
28+
29+
def clean_usage_line(usage: str) -> str:
30+
"""Clean up the usage line to remove 'cli' and make it generic, and remove the 'Usage:' prefix."""
31+
# Replace 'cli' with 'cocoindex' in usage lines and remove 'Usage:' prefix
32+
cleaned = usage.replace("Usage: cli ", "cocoindex ")
33+
# Handle case where it might be "Usage: cocoindex" already
34+
if cleaned.startswith("Usage: cocoindex "):
35+
cleaned = cleaned.replace("Usage: cocoindex ", "cocoindex ")
36+
return cleaned
37+
38+
39+
def escape_html_tags(text: str) -> str:
40+
"""Escape HTML-like tags in text to prevent MDX parsing issues, but preserve them in code blocks."""
41+
import re
42+
43+
# Handle special cases where URLs with placeholders should be wrapped in code blocks
44+
text = re.sub(r"http://localhost:<([^>]+)>", r"`http://localhost:<\1>`", text)
45+
text = re.sub(r"https://([^<\s]+)<([^>]+)>", r"`https://\1<\2>`", text)
46+
47+
# Handle comma-separated URL examples specifically (e.g., "https://site1.com,http://localhost:3000")
48+
text = re.sub(r"(?<!`)(\bhttps?://[^\s,`]+,https?://[^\s`]+)(?!`)", r"`\1`", text)
49+
50+
# Handle standalone URLs that aren't already wrapped in backticks
51+
text = re.sub(r"(?<!`)(?<!,)(\bhttps?://[^\s,`]+)(?!`)(?!,)", r"`\1`", text)
52+
53+
# Split text into code blocks and regular text
54+
# Pattern matches: `code content` (inline code blocks)
55+
parts = re.split(r"(`[^`]*`)", text)
56+
57+
result = []
58+
for i, part in enumerate(parts):
59+
if i % 2 == 0:
60+
# Even indices are regular text, escape HTML tags
61+
result.append(part.replace("<", "&lt;").replace(">", "&gt;"))
62+
else:
63+
# Odd indices are code blocks, preserve as-is
64+
result.append(part)
65+
66+
return "".join(result)
67+
68+
69+
def format_options_section(help_text: str) -> str:
70+
"""Extract and format the options section."""
71+
lines = help_text.split("\n")
72+
options_start = None
73+
commands_start = None
74+
75+
for i, line in enumerate(lines):
76+
if line.strip() == "Options:":
77+
options_start = i
78+
elif line.strip() == "Commands:":
79+
commands_start = i
80+
break
81+
82+
if options_start is None:
83+
return ""
84+
85+
# Extract options section
86+
end_idx = commands_start if commands_start else len(lines)
87+
options_lines = lines[options_start + 1 : end_idx] # Skip "Options:" header
88+
89+
# Parse options - each option starts with exactly 2 spaces and a dash
90+
formatted_options = []
91+
current_option = None
92+
current_description = []
93+
94+
for line in options_lines:
95+
if not line.strip(): # Empty line
96+
continue
97+
98+
# Check if this is a new option line (starts with exactly 2 spaces then -)
99+
if line.startswith(" -") and not line.startswith(" "):
100+
# Save previous option if exists
101+
if current_option is not None:
102+
desc = " ".join(current_description).strip()
103+
desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility
104+
formatted_options.append(f"| `{current_option}` | {desc} |")
105+
106+
# Remove the leading 2 spaces
107+
content = line[2:]
108+
109+
# Find the position where we have multiple consecutive spaces (start of description)
110+
match = re.search(r"\s{2,}", content)
111+
if match:
112+
# Split at the first occurrence of multiple spaces
113+
option_part = content[: match.start()]
114+
desc_part = content[match.end() :]
115+
current_option = option_part.strip()
116+
current_description = [desc_part.strip()] if desc_part.strip() else []
117+
else:
118+
# No description on this line, just the option
119+
current_option = content.strip()
120+
current_description = []
121+
else:
122+
# Continuation line (starts with more than 2 spaces)
123+
if current_option is not None and line.strip():
124+
current_description.append(line.strip())
125+
126+
# Add last option
127+
if current_option is not None:
128+
desc = " ".join(current_description).strip()
129+
desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility
130+
formatted_options.append(f"| `{current_option}` | {desc} |")
131+
132+
if formatted_options:
133+
header = "| Option | Description |\n|--------|-------------|"
134+
return f"{header}\n" + "\n".join(formatted_options) + "\n"
135+
136+
return ""
137+
138+
139+
def format_commands_section(help_text: str) -> str:
140+
"""Extract and format the commands section."""
141+
lines = help_text.split("\n")
142+
commands_start = None
143+
144+
for i, line in enumerate(lines):
145+
if line.strip() == "Commands:":
146+
commands_start = i
147+
break
148+
149+
if commands_start is None:
150+
return ""
151+
152+
# Extract commands section
153+
commands_lines = lines[commands_start + 1 :]
154+
155+
# Parse commands - each command starts with 2 spaces then the command name
156+
formatted_commands = []
157+
158+
for line in commands_lines:
159+
if not line.strip(): # Empty line
160+
continue
161+
162+
# Check if this is a command line (starts with 2 spaces + command name)
163+
match = re.match(r"^ (\w+)\s{2,}(.+)$", line)
164+
if match:
165+
command = match.group(1)
166+
description = match.group(2).strip()
167+
# Truncate long descriptions
168+
if len(description) > 80:
169+
description = description[:77] + "..."
170+
formatted_commands.append(f"| `{command}` | {description} |")
171+
172+
if formatted_commands:
173+
header = "| Command | Description |\n|---------|-------------|"
174+
return f"{header}\n" + "\n".join(formatted_commands) + "\n"
175+
176+
return ""
177+
178+
179+
def extract_description(help_text: str) -> str:
180+
"""Extract the main description from help text."""
181+
lines = help_text.split("\n")
182+
183+
# Find the description between usage and options/commands
184+
description_lines = []
185+
in_description = False
186+
187+
for line in lines:
188+
if line.startswith("Usage:"):
189+
in_description = True
190+
continue
191+
elif line.strip() in ["Options:", "Commands:"]:
192+
break
193+
elif in_description and line.strip():
194+
description_lines.append(line.strip())
195+
196+
description = "\n\n".join(description_lines) if description_lines else ""
197+
return escape_html_tags(description) # Escape HTML tags for MDX compatibility
198+
199+
200+
def generate_command_docs(docs: List[Dict[str, Any]]) -> str:
201+
"""Generate markdown documentation for all commands."""
202+
203+
# Separate main CLI from subcommands
204+
main_cli = None
205+
subcommands = []
206+
207+
for doc in docs:
208+
parent = doc.get("parent", "")
209+
if not parent:
210+
main_cli = doc
211+
else:
212+
subcommands.append(doc)
213+
214+
markdown_content = []
215+
216+
# Add top-level heading to satisfy MD041 linting rule
217+
markdown_content.append("# CLI Commands")
218+
markdown_content.append("")
219+
220+
# Generate only the command details section (remove redundant headers)
221+
for doc in sorted(subcommands, key=lambda x: x["command"].name):
222+
command_name = doc["command"].name
223+
help_text = doc["help"]
224+
usage = clean_usage_line(doc["usage"])
225+
description = extract_description(help_text)
226+
227+
markdown_content.append(f"## `{command_name}`")
228+
markdown_content.append("")
229+
230+
if description:
231+
markdown_content.append(description)
232+
markdown_content.append("")
233+
234+
# Add usage
235+
markdown_content.append("**Usage:**")
236+
markdown_content.append("")
237+
markdown_content.append(f"```bash")
238+
markdown_content.append(usage)
239+
markdown_content.append("```")
240+
markdown_content.append("")
241+
242+
# Add options if any
243+
options_section = format_options_section(help_text)
244+
if options_section:
245+
markdown_content.append("**Options:**")
246+
markdown_content.append("")
247+
markdown_content.append(options_section)
248+
249+
markdown_content.append("---")
250+
markdown_content.append("")
251+
252+
return "\n".join(markdown_content)
253+
254+
255+
def main():
256+
"""Generate CLI documentation and save to file."""
257+
print("Generating CocoIndex CLI documentation...")
258+
259+
try:
260+
# Generate documentation using md-click
261+
docs_generator = md_click.main.recursive_help(cli)
262+
docs = list(docs_generator)
263+
264+
print(f"Found {len(docs)} CLI commands to document")
265+
266+
# Generate markdown content
267+
markdown_content = generate_command_docs(docs)
268+
269+
# Determine output path
270+
docs_dir = project_root / "docs" / "docs" / "core"
271+
output_file = docs_dir / "cli-commands.md"
272+
273+
# Ensure directory exists
274+
docs_dir.mkdir(parents=True, exist_ok=True)
275+
276+
# Write the generated documentation
277+
content_changed = True
278+
if output_file.exists():
279+
with open(output_file, "r", encoding="utf-8") as f:
280+
existing_content = f.read()
281+
content_changed = existing_content != markdown_content
282+
283+
if content_changed:
284+
with open(output_file, "w", encoding="utf-8") as f:
285+
f.write(markdown_content)
286+
287+
print(f"CLI documentation generated successfully at: {output_file}")
288+
print(
289+
f"Generated {len(markdown_content.splitlines())} lines of documentation"
290+
)
291+
else:
292+
print(f"CLI documentation is up to date at: {output_file}")
293+
294+
except Exception as e:
295+
print(f"Error generating documentation: {e}")
296+
import traceback
297+
298+
traceback.print_exc()
299+
sys.exit(1)
300+
301+
302+
if __name__ == "__main__":
303+
main()

0 commit comments

Comments
 (0)