Skip to content

Commit fa9f444

Browse files
committed
docs(cli): improve docstring formatting for --help and markdown
- Add backticks around parameters, paths, and module names - Fix paragraph formatting in generate_cli_docs.py - Eliminate redundant blank lines in generated markdown - Ensure consistent rendering in terminal and documentation Fixes #1108
1 parent 0dc1a48 commit fa9f444

File tree

3 files changed

+501
-25
lines changed

3 files changed

+501
-25
lines changed

dev/generate_cli_docs.py

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to generate CLI documentation from CocoIndex Click commands.
4+
5+
This script uses md-click as the foundation but generates enhanced markdown
6+
documentation that's suitable for inclusion in the CocoIndex documentation site.
7+
"""
8+
9+
import sys
10+
from pathlib import Path
11+
import re
12+
import click
13+
from cocoindex.cli import cli
14+
15+
# Add the cocoindex python directory to the path
16+
project_root = Path(__file__).parent.parent
17+
python_path = project_root / "python"
18+
sys.path.insert(0, str(python_path))
19+
20+
21+
def clean_usage_line(usage: str) -> str:
22+
"""Clean up the usage line to remove 'cli' and make it generic, and remove the 'Usage:' prefix."""
23+
# Replace 'cli' with 'cocoindex' in usage lines and remove 'Usage:' prefix
24+
cleaned = usage.replace("Usage: cli ", "cocoindex ")
25+
# Handle case where it might be "Usage: cocoindex" already
26+
if cleaned.startswith("Usage: cocoindex "):
27+
cleaned = cleaned.replace("Usage: cocoindex ", "cocoindex ")
28+
return cleaned
29+
30+
31+
def escape_html_tags(text: str) -> str:
32+
"""Escape HTML-like tags in text to prevent MDX parsing issues, but preserve them in code blocks."""
33+
import re
34+
35+
# Handle special cases where URLs with placeholders should be wrapped in code blocks
36+
text = re.sub(r"http://localhost:<([^>]+)>", r"`http://localhost:<\1>`", text)
37+
text = re.sub(r"https://([^<\s]+)<([^>]+)>", r"`https://\1<\2>`", text)
38+
39+
# Handle comma-separated URL examples specifically (e.g., "https://site1.com,http://localhost:3000")
40+
text = re.sub(r"(?<!`)(\bhttps?://[^\s,`]+,https?://[^\s`]+)(?!`)", r"`\1`", text)
41+
42+
# Handle standalone URLs that aren't already wrapped in backticks
43+
text = re.sub(r"(?<!`)(?<!,)(\bhttps?://[^\s,`]+)(?!`)(?!,)", r"`\1`", text)
44+
45+
# Split text into code blocks and regular text
46+
# Pattern matches: `code content` (inline code blocks)
47+
parts = re.split(r"(`[^`]*`)", text)
48+
49+
result = []
50+
for i, part in enumerate(parts):
51+
if i % 2 == 0:
52+
# Even indices are regular text, escape HTML tags
53+
result.append(part.replace("<", "&lt;").replace(">", "&gt;"))
54+
else:
55+
# Odd indices are code blocks, preserve as-is
56+
result.append(part)
57+
58+
return "".join(result)
59+
60+
61+
def format_options_section(help_text: str) -> str:
62+
"""Extract and format the options section."""
63+
lines = help_text.split("\n")
64+
options_start = None
65+
commands_start = None
66+
67+
for i, line in enumerate(lines):
68+
if line.strip() == "Options:":
69+
options_start = i
70+
elif line.strip() == "Commands:":
71+
commands_start = i
72+
break
73+
74+
if options_start is None:
75+
return ""
76+
77+
# Extract options section
78+
end_idx = commands_start if commands_start else len(lines)
79+
options_lines = lines[options_start + 1 : end_idx] # Skip "Options:" header
80+
81+
# Parse options - each option starts with exactly 2 spaces and a dash
82+
formatted_options = []
83+
current_option = None
84+
current_description = []
85+
86+
for line in options_lines:
87+
if not line.strip(): # Empty line
88+
continue
89+
90+
# Check if this is a new option line (starts with exactly 2 spaces then -)
91+
if line.startswith(" -") and not line.startswith(" "):
92+
# Save previous option if exists
93+
if current_option is not None:
94+
desc = " ".join(current_description).strip()
95+
desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility
96+
formatted_options.append(f"| `{current_option}` | {desc} |")
97+
98+
# Remove the leading 2 spaces
99+
content = line[2:]
100+
101+
# Find the position where we have multiple consecutive spaces (start of description)
102+
match = re.search(r"\s{2,}", content)
103+
if match:
104+
# Split at the first occurrence of multiple spaces
105+
option_part = content[: match.start()]
106+
desc_part = content[match.end() :]
107+
current_option = option_part.strip()
108+
current_description = [desc_part.strip()] if desc_part.strip() else []
109+
else:
110+
# No description on this line, just the option
111+
current_option = content.strip()
112+
current_description = []
113+
else:
114+
# Continuation line (starts with more than 2 spaces)
115+
if current_option is not None and line.strip():
116+
current_description.append(line.strip())
117+
118+
# Add last option
119+
if current_option is not None:
120+
desc = " ".join(current_description).strip()
121+
desc = escape_html_tags(desc) # Escape HTML tags for MDX compatibility
122+
formatted_options.append(f"| `{current_option}` | {desc} |")
123+
124+
if formatted_options:
125+
header = "| Option | Description |\n|--------|-------------|"
126+
return f"{header}\n" + "\n".join(formatted_options) + "\n"
127+
128+
return ""
129+
130+
131+
def format_commands_section(help_text: str) -> str:
132+
"""Extract and format the commands section."""
133+
lines = help_text.split("\n")
134+
commands_start = None
135+
136+
for i, line in enumerate(lines):
137+
if line.strip() == "Commands:":
138+
commands_start = i
139+
break
140+
141+
if commands_start is None:
142+
return ""
143+
144+
# Extract commands section
145+
commands_lines = lines[commands_start + 1 :]
146+
147+
# Parse commands - each command starts with 2 spaces then the command name
148+
formatted_commands = []
149+
150+
for line in commands_lines:
151+
if not line.strip(): # Empty line
152+
continue
153+
154+
# Check if this is a command line (starts with 2 spaces + command name)
155+
match = re.match(r"^ (\w+)\s{2,}(.+)$", line)
156+
if match:
157+
command = match.group(1)
158+
description = match.group(2).strip()
159+
# Truncate long descriptions
160+
if len(description) > 80:
161+
description = description[:77] + "..."
162+
formatted_commands.append(f"| `{command}` | {description} |")
163+
164+
if formatted_commands:
165+
header = "| Command | Description |\n|---------|-------------|"
166+
return f"{header}\n" + "\n".join(formatted_commands) + "\n"
167+
168+
return ""
169+
170+
171+
def extract_description(help_text: str) -> str:
172+
"""Extract the main description from help text."""
173+
lines = help_text.split("\n")
174+
175+
# Find the description between usage and options/commands
176+
description_lines = []
177+
in_description = False
178+
last_was_empty = False
179+
180+
for line in lines:
181+
if line.startswith("Usage:"):
182+
in_description = True
183+
continue
184+
elif line.strip() in ["Options:", "Commands:"]:
185+
break
186+
elif in_description:
187+
if line.strip():
188+
# Non-empty line
189+
description_lines.append(line.strip())
190+
last_was_empty = False
191+
else:
192+
# Empty line - only add one blank line to separate paragraphs
193+
if description_lines and not last_was_empty:
194+
description_lines.append("")
195+
last_was_empty = True
196+
197+
# Join lines, treating consecutive lines as same paragraph unless separated by blank line
198+
result = []
199+
current_paragraph = []
200+
201+
for line in description_lines:
202+
if line == "":
203+
# Blank line - end current paragraph
204+
if current_paragraph:
205+
result.append(" ".join(current_paragraph))
206+
current_paragraph = []
207+
else:
208+
current_paragraph.append(line)
209+
210+
# Add any remaining paragraph
211+
if current_paragraph:
212+
result.append(" ".join(current_paragraph))
213+
214+
# Join paragraphs with double newline
215+
description = "\n\n".join(result) if result else ""
216+
return escape_html_tags(description) # Escape HTML tags for MDX compatibility
217+
218+
219+
def generate_command_docs(cmd: click.Group) -> str:
220+
"""Generate markdown documentation for all commands."""
221+
222+
markdown_content = []
223+
224+
# Add top-level heading to satisfy MD041 linting rule
225+
markdown_content.append("## Subcommands Reference")
226+
markdown_content.append("")
227+
228+
ctx = click.core.Context(cmd, info_name=cmd.name)
229+
subcommands = list(cmd.commands.values())
230+
# Generate only the command details section (remove redundant headers)
231+
for sub_cmd in sorted(subcommands, key=lambda x: x.name or ""):
232+
sub_ctx = click.core.Context(sub_cmd, info_name=sub_cmd.name, parent=ctx)
233+
command_name = sub_cmd.name
234+
help_text = sub_cmd.get_help(sub_ctx)
235+
usage = clean_usage_line(sub_cmd.get_usage(sub_ctx))
236+
description = extract_description(help_text)
237+
238+
markdown_content.append(f"### `{command_name}`")
239+
markdown_content.append("")
240+
241+
if description:
242+
markdown_content.append(description)
243+
markdown_content.append("")
244+
245+
# Add usage
246+
markdown_content.append("**Usage:**")
247+
markdown_content.append("")
248+
markdown_content.append(f"```bash")
249+
markdown_content.append(usage)
250+
markdown_content.append("```")
251+
markdown_content.append("")
252+
253+
# Add options if any
254+
options_section = format_options_section(help_text)
255+
if options_section:
256+
markdown_content.append("**Options:**")
257+
markdown_content.append("")
258+
markdown_content.append(options_section)
259+
260+
markdown_content.append("---")
261+
markdown_content.append("")
262+
263+
return "\n".join(markdown_content)
264+
265+
266+
def main() -> None:
267+
"""Generate CLI documentation and save to file."""
268+
print("Generating CocoIndex CLI documentation...")
269+
270+
try:
271+
# Generate markdown content
272+
markdown_content = generate_command_docs(cli)
273+
274+
# Determine output path
275+
docs_dir = project_root / "docs" / "docs" / "core"
276+
output_file = docs_dir / "cli-commands.md"
277+
278+
# Ensure directory exists
279+
docs_dir.mkdir(parents=True, exist_ok=True)
280+
281+
# Write the generated documentation
282+
content_changed = True
283+
if output_file.exists():
284+
with open(output_file, "r", encoding="utf-8") as f:
285+
existing_content = f.read()
286+
content_changed = existing_content != markdown_content
287+
288+
if content_changed:
289+
with open(output_file, "w", encoding="utf-8") as f:
290+
f.write(markdown_content)
291+
292+
print(f"CLI documentation generated successfully at: {output_file}")
293+
print(
294+
f"Generated {len(markdown_content.splitlines())} lines of documentation"
295+
)
296+
else:
297+
print(f"CLI documentation is up to date at: {output_file}")
298+
299+
except Exception as e:
300+
print(f"Error generating documentation: {e}")
301+
import traceback
302+
303+
traceback.print_exc()
304+
sys.exit(1)
305+
306+
307+
if __name__ == "__main__":
308+
main()

0 commit comments

Comments
 (0)