Skip to content

Commit 448c92c

Browse files
committed
Merge branch 'main' into doc1
2 parents 9e1d621 + 81ec671 commit 448c92c

File tree

30 files changed

+1094
-140
lines changed

30 files changed

+1094
-140
lines changed

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ repos:
3333
files: ^(python/|src/|Cargo\.toml|pyproject\.toml)
3434
pass_filenames: false
3535

36+
- id: generate-cli-docs
37+
name: generate CLI documentation
38+
entry: python dev/generate_cli_docs.py
39+
language: system
40+
files: ^(python/cocoindex/cli\.py|dev/generate_cli_docs\.py)$
41+
pass_filenames: false
42+
3643
- id: cargo-fmt
3744
name: cargo fmt
3845
entry: cargo fmt

dev/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Development Scripts
2+
3+
This directory contains development and maintenance scripts for the CocoIndex project.
4+
5+
## Scripts
6+
7+
### `generate_cli_docs.py`
8+
9+
Automatically generates CLI documentation from the CocoIndex Click commands.
10+
11+
**Usage:**
12+
13+
```bash
14+
python dev/generate_cli_docs.py
15+
```
16+
17+
**What it does:**
18+
19+
- Extracts help messages from all Click commands in `python/cocoindex/cli.py`
20+
- Generates comprehensive Markdown documentation with properly formatted tables
21+
- Saves the output to `docs/docs/core/cli-commands.md` for direct import into CLI documentation
22+
- Only updates the file if content has changed (avoids unnecessary git diffs)
23+
- Automatically escapes HTML-like tags to prevent MDX parsing issues
24+
- Wraps URLs with placeholders in code blocks for proper rendering
25+
26+
**Integration:**
27+
28+
- Runs automatically as a pre-commit hook when `python/cocoindex/cli.py` is modified
29+
- The generated documentation is directly imported into `docs/docs/core/cli.mdx` via MDX import
30+
- Provides seamless single-page CLI documentation experience without separate reference pages
31+
32+
**Dependencies:**
33+
34+
- `md-click` package for extracting Click help information
35+
- `cocoindex` package must be importable (the CLI module)
36+
37+
This ensures that CLI documentation is always kept in sync with the actual command-line interface.

dev/generate_cli_docs.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to generate CLI documentation from CocoIndex Click commands.
4+
5+
This script uses md-click as the foundation but generates enhanced markdown
6+
documentation that's suitable for inclusion in the CocoIndex documentation site.
7+
"""
8+
9+
import sys
10+
from pathlib import Path
11+
import re
12+
import click
13+
from cocoindex.cli import cli
14+
15+
# Add the cocoindex python directory to the path
16+
project_root = Path(__file__).parent.parent
17+
python_path = project_root / "python"
18+
sys.path.insert(0, str(python_path))
19+
20+
21+
def clean_usage_line(usage: str) -> str:
22+
"""Clean up the usage line to remove 'cli' and make it generic, and remove the 'Usage:' prefix."""
23+
# Replace 'cli' with 'cocoindex' in usage lines and remove 'Usage:' prefix
24+
cleaned = usage.replace("Usage: cli ", "cocoindex ")
25+
# Handle case where it might be "Usage: cocoindex" already
26+
if cleaned.startswith("Usage: cocoindex "):
27+
cleaned = cleaned.replace("Usage: cocoindex ", "cocoindex ")
28+
return cleaned
29+
30+
31+
def escape_html_tags(text: str) -> str:
32+
"""Escape HTML-like tags in text to prevent MDX parsing issues, but preserve them in code blocks."""
33+
import re
34+
35+
# Handle special cases where URLs with placeholders should be wrapped in code blocks
36+
text = re.sub(r"http://localhost:<([^>]+)>", r"`http://localhost:<\1>`", text)
37+
text = re.sub(r"https://([^<\s]+)<([^>]+)>", r"`https://\1<\2>`", text)
38+
39+
# Handle comma-separated URL examples specifically (e.g., "https://site1.com,http://localhost:3000")
40+
text = re.sub(r"(?<!`)(\bhttps?://[^\s,`]+,https?://[^\s`]+)(?!`)", r"`\1`", text)
41+
42+
# Handle standalone URLs that aren't already wrapped in backticks
43+
text = re.sub(r"(?<!`)(?<!,)(\bhttps?://[^\s,`]+)(?!`)(?!,)", r"`\1`", text)
44+
45+
# Split text into code blocks and regular text
46+
# Pattern matches: `code content` (inline code blocks)
47+
parts = re.split(r"(`[^`]*`)", text)
48+
49+
result = []
50+
for i, part in enumerate(parts):
51+
if i % 2 == 0:
52+
# Even indices are regular text, escape HTML tags
53+
result.append(part.replace("<", "&lt;").replace(">", "&gt;"))
54+
else:
55+
# Odd indices are code blocks, preserve as-is
56+
result.append(part)
57+
58+
return "".join(result)
59+
60+
61+
def format_options_section(help_text: str) -> str:
62+
"""Extract and format the options section."""
63+
lines = help_text.split("\n")
64+
options_start = None
65+
commands_start = None
66+
67+
for i, line in enumerate(lines):
68+
if line.strip() == "Options:":
69+
options_start = i
70+
elif line.strip() == "Commands:":
71+
commands_start = i
72+
break
73+
74+
if options_start is None:
75+
return ""
76+
77+
# Extract options section
78+
end_idx = commands_start if commands_start else len(lines)
79+
options_lines = lines[options_start + 1 : end_idx] # Skip "Options:" header
80+
81+
# Parse options - each option starts with exactly 2 spaces and a dash
82+
formatted_options = []
83+
current_option = None
84+
current_description = []
85+
86+
for line in options_lines:
87+
if not line.strip(): # Empty line
88+
continue
89+
90+
# Check if this is a new option line (starts with exactly 2 spaces then -)
91+
if line.startswith(" -") and not line.startswith(" "):
92+
# Save previous option if exists
93+
if current_option is not None:
94+
desc = " ".join(current_description).strip()
95+
desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility
96+
formatted_options.append(f"| `{current_option}` | {desc} |")
97+
98+
# Remove the leading 2 spaces
99+
content = line[2:]
100+
101+
# Find the position where we have multiple consecutive spaces (start of description)
102+
match = re.search(r"\s{2,}", content)
103+
if match:
104+
# Split at the first occurrence of multiple spaces
105+
option_part = content[: match.start()]
106+
desc_part = content[match.end() :]
107+
current_option = option_part.strip()
108+
current_description = [desc_part.strip()] if desc_part.strip() else []
109+
else:
110+
# No description on this line, just the option
111+
current_option = content.strip()
112+
current_description = []
113+
else:
114+
# Continuation line (starts with more than 2 spaces)
115+
if current_option is not None and line.strip():
116+
current_description.append(line.strip())
117+
118+
# Add last option
119+
if current_option is not None:
120+
desc = " ".join(current_description).strip()
121+
desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility
122+
formatted_options.append(f"| `{current_option}` | {desc} |")
123+
124+
if formatted_options:
125+
header = "| Option | Description |\n|--------|-------------|"
126+
return f"{header}\n" + "\n".join(formatted_options) + "\n"
127+
128+
return ""
129+
130+
131+
def format_commands_section(help_text: str) -> str:
132+
"""Extract and format the commands section."""
133+
lines = help_text.split("\n")
134+
commands_start = None
135+
136+
for i, line in enumerate(lines):
137+
if line.strip() == "Commands:":
138+
commands_start = i
139+
break
140+
141+
if commands_start is None:
142+
return ""
143+
144+
# Extract commands section
145+
commands_lines = lines[commands_start + 1 :]
146+
147+
# Parse commands - each command starts with 2 spaces then the command name
148+
formatted_commands = []
149+
150+
for line in commands_lines:
151+
if not line.strip(): # Empty line
152+
continue
153+
154+
# Check if this is a command line (starts with 2 spaces + command name)
155+
match = re.match(r"^ (\w+)\s{2,}(.+)$", line)
156+
if match:
157+
command = match.group(1)
158+
description = match.group(2).strip()
159+
# Truncate long descriptions
160+
if len(description) > 80:
161+
description = description[:77] + "..."
162+
formatted_commands.append(f"| `{command}` | {description} |")
163+
164+
if formatted_commands:
165+
header = "| Command | Description |\n|---------|-------------|"
166+
return f"{header}\n" + "\n".join(formatted_commands) + "\n"
167+
168+
return ""
169+
170+
171+
def extract_description(help_text: str) -> str:
172+
"""Extract the main description from help text."""
173+
lines = help_text.split("\n")
174+
175+
# Find the description between usage and options/commands
176+
description_lines = []
177+
in_description = False
178+
179+
for line in lines:
180+
if line.startswith("Usage:"):
181+
in_description = True
182+
continue
183+
elif line.strip() in ["Options:", "Commands:"]:
184+
break
185+
elif in_description and line.strip():
186+
description_lines.append(line.strip())
187+
188+
description = "\n\n".join(description_lines) if description_lines else ""
189+
return escape_html_tags(description) # Escape HTML tags for MDX compatibility
190+
191+
192+
def generate_command_docs(cmd: click.Group) -> str:
193+
"""Generate markdown documentation for all commands."""
194+
195+
markdown_content = []
196+
197+
# Disable lint warnings for about "first line in file should be a top level heading"
198+
# We intentionally start with a level 2 heading below, as this file is imported into another file.
199+
markdown_content.append("<!-- markdownlint-disable MD041 -->")
200+
markdown_content.append("")
201+
202+
# Add top-level heading to satisfy MD041 linting rule
203+
markdown_content.append("## Subcommands Reference")
204+
markdown_content.append("")
205+
206+
ctx = click.core.Context(cmd, info_name=cmd.name)
207+
subcommands = list(cmd.commands.values())
208+
# Generate only the command details section (remove redundant headers)
209+
for sub_cmd in sorted(subcommands, key=lambda x: x.name or ""):
210+
sub_ctx = click.core.Context(sub_cmd, info_name=sub_cmd.name, parent=ctx)
211+
command_name = sub_cmd.name
212+
help_text = sub_cmd.get_help(sub_ctx)
213+
usage = clean_usage_line(sub_cmd.get_usage(sub_ctx))
214+
description = extract_description(help_text)
215+
216+
markdown_content.append(f"### `{command_name}`")
217+
markdown_content.append("")
218+
219+
if description:
220+
markdown_content.append(description)
221+
markdown_content.append("")
222+
223+
# Add usage
224+
markdown_content.append("**Usage:**")
225+
markdown_content.append("")
226+
markdown_content.append(f"```bash")
227+
markdown_content.append(usage)
228+
markdown_content.append("```")
229+
markdown_content.append("")
230+
231+
# Add options if any
232+
options_section = format_options_section(help_text)
233+
if options_section:
234+
markdown_content.append("**Options:**")
235+
markdown_content.append("")
236+
markdown_content.append(options_section)
237+
238+
markdown_content.append("---")
239+
markdown_content.append("")
240+
241+
return "\n".join(markdown_content)
242+
243+
244+
def main() -> None:
245+
"""Generate CLI documentation and save to file."""
246+
print("Generating CocoIndex CLI documentation...")
247+
248+
try:
249+
# Generate markdown content
250+
markdown_content = generate_command_docs(cli)
251+
252+
# Determine output path
253+
docs_dir = project_root / "docs" / "docs" / "core"
254+
output_file = docs_dir / "cli-commands.md"
255+
256+
# Ensure directory exists
257+
docs_dir.mkdir(parents=True, exist_ok=True)
258+
259+
# Write the generated documentation
260+
content_changed = True
261+
if output_file.exists():
262+
with open(output_file, "r", encoding="utf-8") as f:
263+
existing_content = f.read()
264+
content_changed = existing_content != markdown_content
265+
266+
if content_changed:
267+
with open(output_file, "w", encoding="utf-8") as f:
268+
f.write(markdown_content)
269+
270+
print(f"CLI documentation generated successfully at: {output_file}")
271+
print(
272+
f"Generated {len(markdown_content.splitlines())} lines of documentation"
273+
)
274+
else:
275+
print(f"CLI documentation is up to date at: {output_file}")
276+
277+
except Exception as e:
278+
print(f"Error generating documentation: {e}")
279+
import traceback
280+
281+
traceback.print_exc()
282+
sys.exit(1)
283+
284+
285+
if __name__ == "__main__":
286+
main()

0 commit comments

Comments
 (0)