Skip to content

Commit eae336e

Browse files
authored
Updating docstring output format to use Google doc style (#49)
1 parent a26db74 commit eae336e

File tree

7 files changed

+314
-118
lines changed

7 files changed

+314
-118
lines changed

pybind11_mkdoc/mkdoc_lib.py

Lines changed: 138 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# Extract documentation from C++ header files to use it in Python bindings
66
#
77

8+
from __future__ import annotations
9+
810
import contextlib
911
import ctypes.util
1012
import os
@@ -108,6 +110,13 @@ def sanitize_name(name):
108110
return "mkd_doc_" + name
109111

110112

113+
param_re = re.compile(r"[\\@]param\s+([\w:]+)\s*(.*)")
114+
t_param_re = re.compile(r"[\\@]tparam\s+([\w:]+)\s*(.*)")
115+
return_re = re.compile(r"[\\@]returns?\s+(.*)")
116+
raises_re = re.compile(r"[\\@](?:exception|throws?)\s+([\w:]+)(.*)")
117+
any_dox_re = re.compile(r"[\\@].*")
118+
119+
111120
def process_comment(comment):
112121
result = ""
113122

@@ -135,7 +144,6 @@ def process_comment(comment):
135144

136145
# Doxygen tags
137146
cpp_group = r"([^\s]+)"
138-
param_group = r"([\[\w:,\]]+)"
139147

140148
s = result
141149
s = re.sub(rf"[\\@][cp]\s+{cpp_group}", r"``\1``", s)
@@ -144,15 +152,74 @@ def process_comment(comment):
144152
s = re.sub(rf"[\\@]em\s+{cpp_group}", r"*\1*", s)
145153
s = re.sub(rf"[\\@]b\s+{cpp_group}", r"**\1**", s)
146154
s = re.sub(rf"[\\@]ingroup\s+{cpp_group}", r"", s)
147-
s = re.sub(rf"[\\@]param{param_group}?\s+{cpp_group}", r"\n\n$Parameter ``\2``:\n\n", s)
148-
s = re.sub(rf"[\\@]tparam{param_group}?\s+{cpp_group}", r"\n\n$Template parameter ``\2``:\n\n", s)
155+
156+
# Add arguments, return type, and exceptions
157+
lines = s.splitlines()
158+
rm_lines = []
159+
params = {}
160+
t_params = {}
161+
raises = {}
162+
ret = []
163+
add_to = None
164+
for k, line in enumerate(lines):
165+
if m := param_re.match(line):
166+
name, text = m.groups()
167+
params[name] = text.strip()
168+
rm_lines.append(k)
169+
add_to = (params, name)
170+
elif m := t_param_re.match(line):
171+
name, text = m.groups()
172+
t_params[name] = text.strip()
173+
rm_lines.append(k)
174+
add_to = (t_params, name)
175+
elif m := return_re.match(line):
176+
text, = m.groups()
177+
ret.append(text.strip())
178+
add_to = (ret, len(ret) - 1)
179+
rm_lines.append(k)
180+
elif m := raises_re.match(line):
181+
name, text = m.groups()
182+
raises[name] = text.strip()
183+
add_to = (raises, name)
184+
rm_lines.append(k)
185+
elif m := any_dox_re.match(line):
186+
add_to = None
187+
elif add_to is not None:
188+
add_to[0][add_to[1]] += " " + line.strip()
189+
rm_lines.append(k)
190+
191+
# If we had any hits, then remove the old lines, fill with the new lines, and convert back to s
192+
if rm_lines:
193+
rm_lines.sort(reverse=True)
194+
for k in rm_lines:
195+
lines.pop(k)
196+
197+
new_lines = []
198+
if params:
199+
new_lines.append("Args:")
200+
new_lines += [f" {name}: {text}" for name, text in params.items()]
201+
new_lines.append("")
202+
if t_params:
203+
new_lines.append("Template Args:")
204+
new_lines += [f" {name}: {text}" for name, text in t_params.items()]
205+
new_lines.append("")
206+
if ret:
207+
new_lines.append("Returns:")
208+
new_lines += [f" {text}" for text in ret]
209+
new_lines.append("")
210+
if raises:
211+
new_lines.append("Raises:")
212+
new_lines += [f" {name}: {text}" for name, text in raises.items()]
213+
new_lines.append("")
214+
215+
idx = rm_lines[-1]
216+
lines = [*lines[0:idx], *new_lines, *lines[idx:]]
217+
s = "\n".join(lines)
149218

150219
# Remove class and struct tags
151220
s = re.sub(r"[\\@](class|struct)\s+.*", "", s)
152221

153222
for in_, out_ in {
154-
"returns": "Returns",
155-
"return": "Returns",
156223
"authors": "Authors",
157224
"author": "Author",
158225
"copyright": "Copyright",
@@ -161,9 +228,6 @@ def process_comment(comment):
161228
"sa": "See also",
162229
"see": "See also",
163230
"extends": "Extends",
164-
"exception": "Throws",
165-
"throws": "Throws",
166-
"throw": "Throws",
167231
}.items():
168232
s = re.sub(rf"[\\@]{in_}\s*", rf"\n\n${out_}:\n\n", s)
169233

@@ -214,15 +278,70 @@ def process_comment(comment):
214278
elif in_code_segment:
215279
result += x.strip()
216280
else:
217-
for y in re.split(r"(?: *\n *){2,}", x):
218-
wrapped = wrapper.fill(re.sub(r"\s+", " ", y).strip())
219-
if len(wrapped) > 0 and wrapped[0] == "$":
220-
result += wrapped[1:] + "\n"
221-
wrapper.initial_indent = wrapper.subsequent_indent = " " * 4
281+
wrapped = []
282+
paragraph = []
283+
284+
def get_prefix_and_indent(line) -> tuple[str | None, str]:
285+
indent = len(line) - len(line.lstrip())
286+
indent_str = " " * indent
287+
m = re.match(
288+
rf"{indent_str}("
289+
r"(?:[*\-•]\s)|(?:\(?\d+[\.)]\s)|(?:\w+:)"
290+
r"\s*)",
291+
line,
292+
)
293+
if m:
294+
g = m.group(0)
295+
return g, " " * len(g)
296+
return None, indent_str
297+
298+
def flush_paragraph(paragraph=paragraph, wrapped=wrapped):
299+
if not paragraph:
300+
return
301+
302+
# Detect bullet/number from first line
303+
first_line = paragraph[0]
304+
prefix, indent_str = get_prefix_and_indent(first_line)
305+
306+
# Combine paragraph into single string (replace internal line breaks with space)
307+
para_text = " ".join(line.strip() for line in paragraph)
308+
309+
if prefix:
310+
content = para_text[len(prefix.lstrip()) :]
311+
wrapper.initial_indent = prefix
312+
wrapper.subsequent_indent = indent_str
313+
if content == "":
314+
# This paragraph is just the prefix
315+
wrapped.append(prefix)
316+
paragraph.clear()
317+
return
222318
else:
223-
if len(wrapped) > 0:
224-
result += wrapped + "\n\n"
225-
wrapper.initial_indent = wrapper.subsequent_indent = ""
319+
content = para_text.lstrip()
320+
wrapper.initial_indent = indent_str
321+
wrapper.subsequent_indent = indent_str
322+
323+
wrapped.append(wrapper.fill(content))
324+
paragraph.clear()
325+
326+
current_prefix = None
327+
current_indent = ""
328+
for line in x.splitlines():
329+
if not line.strip():
330+
flush_paragraph()
331+
wrapped.append(line) # preserve blank lines
332+
continue
333+
334+
prefix, indent = get_prefix_and_indent(line)
335+
if paragraph and ((indent != current_indent) or (prefix and prefix != current_prefix)):
336+
# Prefix/indent changed → start new paragraph
337+
flush_paragraph()
338+
339+
paragraph.append(line)
340+
current_prefix = prefix
341+
current_indent = indent
342+
343+
flush_paragraph()
344+
result += "\n".join(wrapped)
226345
return result.rstrip().lstrip("\n")
227346

228347

@@ -300,10 +419,7 @@ def read_args(args):
300419
if os.path.isfile(library_file):
301420
cindex.Config.set_library_file(library_file)
302421
else:
303-
msg = (
304-
"Failed to find libclang.dll! "
305-
"Set the LIBCLANG_PATH environment variable to provide a path to it."
306-
)
422+
msg = "Failed to find libclang.dll! Set the LIBCLANG_PATH environment variable to provide a path to it."
307423
raise FileNotFoundError(msg)
308424
else:
309425
library_file = ctypes.util.find_library("libclang.dll")
@@ -423,6 +539,7 @@ def write_header(comments, out_file=sys.stdout):
423539
#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3
424540
#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4
425541
#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5
542+
#define MKD_DOC6(n1, n2, n3, n4, n5, n6) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
426543
#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
427544
#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
428545
@@ -439,7 +556,7 @@ def write_header(comments, out_file=sys.stdout):
439556
for name, _, comment in sorted(comments, key=lambda x: (x[0], x[1])):
440557
if name == name_prev:
441558
name_ctr += 1
442-
name = name + "_%i" % name_ctr
559+
name = name + f"_{name_ctr}"
443560
else:
444561
name_prev = name
445562
name_ctr = 1

tests/cli_test.py

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,13 @@
1-
import sys
21
import subprocess
2+
import sys
33
from pathlib import Path
44

55
import pytest
66

77
DIR = Path(__file__).resolve().parent
88

9-
expected = """\
10-
/*
11-
This file contains docstrings for use in the Python bindings.
12-
Do not edit! They were automatically extracted by pybind11_mkdoc.
13-
*/
14-
15-
#define MKD_EXPAND(x) x
16-
#define MKD_COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
17-
#define MKD_VA_SIZE(...) MKD_EXPAND(MKD_COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0))
18-
#define MKD_CAT1(a, b) a ## b
19-
#define MKD_CAT2(a, b) MKD_CAT1(a, b)
20-
#define MKD_DOC1(n1) mkd_doc_##n1
21-
#define MKD_DOC2(n1, n2) mkd_doc_##n1##_##n2
22-
#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3
23-
#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4
24-
#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5
25-
#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
26-
#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
27-
28-
#if defined(__GNUG__)
29-
#pragma GCC diagnostic push
30-
#pragma GCC diagnostic ignored "-Wunused-variable"
31-
#endif
32-
33-
34-
static const char *mkd_doc_RootLevelSymbol =
35-
R"doc(Root-level symbol. Magna fermentum iaculis eu non diam phasellus
36-
vestibulum.)doc";
37-
38-
static const char *mkd_doc_drake_MidLevelSymbol =
39-
R"doc(1. Begin first ordered list element. Rutrum quisque non tellus orci ac
40-
auctor. End first ordered list element. 2. Begin second ordered list
41-
element. Ipsum faucibus vitae aliquet nec. Ligula ullamcorper
42-
malesuada proin libero. End second ordered list element. 3. Begin
43-
third ordered list element. Dictum sit amet justo donec enim. Pharetra
44-
convallis posuere morbi leo urna molestie. End third ordered list
45-
element.
46-
47-
Senectus et netus et malesuada fames ac. Tincidunt lobortis feugiat
48-
vivamus at augue eget arcu dictum varius.)doc";
49-
50-
#if defined(__GNUG__)
51-
#pragma GCC diagnostic pop
52-
#endif
53-
54-
"""
9+
with open(DIR / "sample_header_docs" / "sample_header_truth.h") as f:
10+
expected = f.read()
5511

5612

5713
@pytest.mark.parametrize(

tests/long_parameter_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ def test_long_parameter(capsys):
1212

1313
res = capsys.readouterr()
1414
expected = """\
15-
Parameter ``x``:
16-
- Begin first parameter description. Senectus et netus et
17-
malesuada fames ac. End first parameter description.)doc";
15+
Args:
16+
x: - Begin first parameter description. Senectus et netus et
17+
malesuada fames ac. End first parameter description.)doc";
1818
"""
1919

2020
assert expected in res.out
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#pragma once
2+
3+
#include <map>
4+
#include <string>
5+
#include <vector>
6+
7+
/**
8+
* @class Base
9+
* @brief A simple base class.
10+
*/
11+
class Base {
12+
public:
13+
/**
14+
* @brief Description for method1.
15+
*
16+
* This is the extended description for method1.
17+
*
18+
* @param p1 I am the first parameter.
19+
* @param p2 I am the second parameter.
20+
* @return An integer is what I return.
21+
*
22+
* @throws runtime_error Throws runtime error if p1 is empty.
23+
*/
24+
int method1(std::vector<float> p1, std::map<std::string,std::string> p2);
25+
26+
/**
27+
* @brief Description for method1.
28+
*
29+
* This is the extended description for method1.
30+
*
31+
* @param p1 I am a very long description for parameter 1. Let's ensure that this gets wrapped properly.
32+
* @param p2 I am a very long description for parameter 2.
33+
* However, I'm broken out onto two lines. Will this be parsed correctly?
34+
*
35+
* @return An integer is what I return.
36+
*
37+
* @throw runtime_error Throws runtime error if p1 is 0.
38+
* @exception invalid_argument Throws invalid_argument error if p2 is 0.
39+
*/
40+
void method2(int p1, int p2);
41+
};

0 commit comments

Comments
 (0)