diff --git a/pybind11_mkdoc/mkdoc_lib.py b/pybind11_mkdoc/mkdoc_lib.py index c34c82a..6f57054 100755 --- a/pybind11_mkdoc/mkdoc_lib.py +++ b/pybind11_mkdoc/mkdoc_lib.py @@ -5,6 +5,8 @@ # Extract documentation from C++ header files to use it in Python bindings # +from __future__ import annotations + import contextlib import ctypes.util import os @@ -108,6 +110,13 @@ def sanitize_name(name): return "mkd_doc_" + name +param_re = re.compile(r"[\\@]param\s+([\w:]+)\s*(.*)") +t_param_re = re.compile(r"[\\@]tparam\s+([\w:]+)\s*(.*)") +return_re = re.compile(r"[\\@]returns?\s+(.*)") +raises_re = re.compile(r"[\\@](?:exception|throws?)\s+([\w:]+)(.*)") +any_dox_re = re.compile(r"[\\@].*") + + def process_comment(comment): result = "" @@ -135,7 +144,6 @@ def process_comment(comment): # Doxygen tags cpp_group = r"([^\s]+)" - param_group = r"([\[\w:,\]]+)" s = result s = re.sub(rf"[\\@][cp]\s+{cpp_group}", r"``\1``", s) @@ -144,15 +152,74 @@ def process_comment(comment): s = re.sub(rf"[\\@]em\s+{cpp_group}", r"*\1*", s) s = re.sub(rf"[\\@]b\s+{cpp_group}", r"**\1**", s) s = re.sub(rf"[\\@]ingroup\s+{cpp_group}", r"", s) - s = re.sub(rf"[\\@]param{param_group}?\s+{cpp_group}", r"\n\n$Parameter ``\2``:\n\n", s) - s = re.sub(rf"[\\@]tparam{param_group}?\s+{cpp_group}", r"\n\n$Template parameter ``\2``:\n\n", s) + + # Add arguments, return type, and exceptions + lines = s.splitlines() + rm_lines = [] + params = {} + t_params = {} + raises = {} + ret = [] + add_to = None + for k, line in enumerate(lines): + if m := param_re.match(line): + name, text = m.groups() + params[name] = text.strip() + rm_lines.append(k) + add_to = (params, name) + elif m := t_param_re.match(line): + name, text = m.groups() + t_params[name] = text.strip() + rm_lines.append(k) + add_to = (t_params, name) + elif m := return_re.match(line): + text, = m.groups() + ret.append(text.strip()) + add_to = (ret, len(ret) - 1) + rm_lines.append(k) + elif m := raises_re.match(line): + name, text = m.groups() + raises[name] = text.strip() + add_to = (raises, name) + rm_lines.append(k) + elif m := any_dox_re.match(line): + add_to = None + elif add_to is not None: + add_to[0][add_to[1]] += " " + line.strip() + rm_lines.append(k) + + # If we had any hits, then remove the old lines, fill with the new lines, and convert back to s + if rm_lines: + rm_lines.sort(reverse=True) + for k in rm_lines: + lines.pop(k) + + new_lines = [] + if params: + new_lines.append("Args:") + new_lines += [f" {name}: {text}" for name, text in params.items()] + new_lines.append("") + if t_params: + new_lines.append("Template Args:") + new_lines += [f" {name}: {text}" for name, text in t_params.items()] + new_lines.append("") + if ret: + new_lines.append("Returns:") + new_lines += [f" {text}" for text in ret] + new_lines.append("") + if raises: + new_lines.append("Raises:") + new_lines += [f" {name}: {text}" for name, text in raises.items()] + new_lines.append("") + + idx = rm_lines[-1] + lines = [*lines[0:idx], *new_lines, *lines[idx:]] + s = "\n".join(lines) # Remove class and struct tags s = re.sub(r"[\\@](class|struct)\s+.*", "", s) for in_, out_ in { - "returns": "Returns", - "return": "Returns", "authors": "Authors", "author": "Author", "copyright": "Copyright", @@ -161,9 +228,6 @@ def process_comment(comment): "sa": "See also", "see": "See also", "extends": "Extends", - "exception": "Throws", - "throws": "Throws", - "throw": "Throws", }.items(): s = re.sub(rf"[\\@]{in_}\s*", rf"\n\n${out_}:\n\n", s) @@ -214,15 +278,70 @@ def process_comment(comment): elif in_code_segment: result += x.strip() else: - for y in re.split(r"(?: *\n *){2,}", x): - wrapped = wrapper.fill(re.sub(r"\s+", " ", y).strip()) - if len(wrapped) > 0 and wrapped[0] == "$": - result += wrapped[1:] + "\n" - wrapper.initial_indent = wrapper.subsequent_indent = " " * 4 + wrapped = [] + paragraph = [] + + def get_prefix_and_indent(line) -> tuple[str | None, str]: + indent = len(line) - len(line.lstrip()) + indent_str = " " * indent + m = re.match( + rf"{indent_str}(" + r"(?:[*\-•]\s)|(?:\(?\d+[\.)]\s)|(?:\w+:)" + r"\s*)", + line, + ) + if m: + g = m.group(0) + return g, " " * len(g) + return None, indent_str + + def flush_paragraph(paragraph=paragraph, wrapped=wrapped): + if not paragraph: + return + + # Detect bullet/number from first line + first_line = paragraph[0] + prefix, indent_str = get_prefix_and_indent(first_line) + + # Combine paragraph into single string (replace internal line breaks with space) + para_text = " ".join(line.strip() for line in paragraph) + + if prefix: + content = para_text[len(prefix.lstrip()) :] + wrapper.initial_indent = prefix + wrapper.subsequent_indent = indent_str + if content == "": + # This paragraph is just the prefix + wrapped.append(prefix) + paragraph.clear() + return else: - if len(wrapped) > 0: - result += wrapped + "\n\n" - wrapper.initial_indent = wrapper.subsequent_indent = "" + content = para_text.lstrip() + wrapper.initial_indent = indent_str + wrapper.subsequent_indent = indent_str + + wrapped.append(wrapper.fill(content)) + paragraph.clear() + + current_prefix = None + current_indent = "" + for line in x.splitlines(): + if not line.strip(): + flush_paragraph() + wrapped.append(line) # preserve blank lines + continue + + prefix, indent = get_prefix_and_indent(line) + if paragraph and ((indent != current_indent) or (prefix and prefix != current_prefix)): + # Prefix/indent changed → start new paragraph + flush_paragraph() + + paragraph.append(line) + current_prefix = prefix + current_indent = indent + + flush_paragraph() + result += "\n".join(wrapped) return result.rstrip().lstrip("\n") @@ -300,10 +419,7 @@ def read_args(args): if os.path.isfile(library_file): cindex.Config.set_library_file(library_file) else: - msg = ( - "Failed to find libclang.dll! " - "Set the LIBCLANG_PATH environment variable to provide a path to it." - ) + msg = "Failed to find libclang.dll! Set the LIBCLANG_PATH environment variable to provide a path to it." raise FileNotFoundError(msg) else: library_file = ctypes.util.find_library("libclang.dll") @@ -423,6 +539,7 @@ def write_header(comments, out_file=sys.stdout): #define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3 #define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4 #define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5 +#define MKD_DOC6(n1, n2, n3, n4, n5, n6) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 #define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 #define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) @@ -439,7 +556,7 @@ def write_header(comments, out_file=sys.stdout): for name, _, comment in sorted(comments, key=lambda x: (x[0], x[1])): if name == name_prev: name_ctr += 1 - name = name + "_%i" % name_ctr + name = name + f"_{name_ctr}" else: name_prev = name name_ctr = 1 diff --git a/tests/cli_test.py b/tests/cli_test.py index 6829cd5..491479e 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -1,57 +1,13 @@ -import sys import subprocess +import sys from pathlib import Path import pytest DIR = Path(__file__).resolve().parent -expected = """\ -/* - This file contains docstrings for use in the Python bindings. - Do not edit! They were automatically extracted by pybind11_mkdoc. - */ - -#define MKD_EXPAND(x) x -#define MKD_COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT -#define MKD_VA_SIZE(...) MKD_EXPAND(MKD_COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0)) -#define MKD_CAT1(a, b) a ## b -#define MKD_CAT2(a, b) MKD_CAT1(a, b) -#define MKD_DOC1(n1) mkd_doc_##n1 -#define MKD_DOC2(n1, n2) mkd_doc_##n1##_##n2 -#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3 -#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4 -#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5 -#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 -#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) - -#if defined(__GNUG__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif - - -static const char *mkd_doc_RootLevelSymbol = -R"doc(Root-level symbol. Magna fermentum iaculis eu non diam phasellus -vestibulum.)doc"; - -static const char *mkd_doc_drake_MidLevelSymbol = -R"doc(1. Begin first ordered list element. Rutrum quisque non tellus orci ac -auctor. End first ordered list element. 2. Begin second ordered list -element. Ipsum faucibus vitae aliquet nec. Ligula ullamcorper -malesuada proin libero. End second ordered list element. 3. Begin -third ordered list element. Dictum sit amet justo donec enim. Pharetra -convallis posuere morbi leo urna molestie. End third ordered list -element. - -Senectus et netus et malesuada fames ac. Tincidunt lobortis feugiat -vivamus at augue eget arcu dictum varius.)doc"; - -#if defined(__GNUG__) -#pragma GCC diagnostic pop -#endif - -""" +with open(DIR / "sample_header_docs" / "sample_header_truth.h") as f: + expected = f.read() @pytest.mark.parametrize( diff --git a/tests/long_parameter_test.py b/tests/long_parameter_test.py index 5b93992..530fc37 100644 --- a/tests/long_parameter_test.py +++ b/tests/long_parameter_test.py @@ -12,9 +12,9 @@ def test_long_parameter(capsys): res = capsys.readouterr() expected = """\ -Parameter ``x``: - - Begin first parameter description. Senectus et netus et - malesuada fames ac. End first parameter description.)doc"; +Args: + x: - Begin first parameter description. Senectus et netus et + malesuada fames ac. End first parameter description.)doc"; """ assert expected in res.out diff --git a/tests/sample_header_docs/sample_header_2.h b/tests/sample_header_docs/sample_header_2.h new file mode 100644 index 0000000..c62cb80 --- /dev/null +++ b/tests/sample_header_docs/sample_header_2.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +/** + * @class Base + * @brief A simple base class. + */ +class Base { + public: + /** + * @brief Description for method1. + * + * This is the extended description for method1. + * + * @param p1 I am the first parameter. + * @param p2 I am the second parameter. + * @return An integer is what I return. + * + * @throws runtime_error Throws runtime error if p1 is empty. + */ + int method1(std::vector p1, std::map p2); + + /** + * @brief Description for method1. + * + * This is the extended description for method1. + * + * @param p1 I am a very long description for parameter 1. Let's ensure that this gets wrapped properly. + * @param p2 I am a very long description for parameter 2. + * However, I'm broken out onto two lines. Will this be parsed correctly? + * + * @return An integer is what I return. + * + * @throw runtime_error Throws runtime error if p1 is 0. + * @exception invalid_argument Throws invalid_argument error if p2 is 0. + */ + void method2(int p1, int p2); +}; diff --git a/tests/sample_header_docs/sample_header_2_truth.h b/tests/sample_header_docs/sample_header_2_truth.h new file mode 100644 index 0000000..c514b61 --- /dev/null +++ b/tests/sample_header_docs/sample_header_2_truth.h @@ -0,0 +1,64 @@ +/* + This file contains docstrings for use in the Python bindings. + Do not edit! They were automatically extracted by pybind11_mkdoc. + */ + +#define MKD_EXPAND(x) x +#define MKD_COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT +#define MKD_VA_SIZE(...) MKD_EXPAND(MKD_COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0)) +#define MKD_CAT1(a, b) a ## b +#define MKD_CAT2(a, b) MKD_CAT1(a, b) +#define MKD_DOC1(n1) mkd_doc_##n1 +#define MKD_DOC2(n1, n2) mkd_doc_##n1##_##n2 +#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3 +#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4 +#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5 +#define MKD_DOC6(n1, n2, n3, n4, n5, n6) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 +#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 +#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) + +#if defined(__GNUG__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + + +static const char *mkd_doc_Base = R"doc(A simple base class.)doc"; + +static const char *mkd_doc_Base_method1 = +R"doc(Description for method1. + +This is the extended description for method1. + +Args: + p1: I am the first parameter. + p2: I am the second parameter. + +Returns: + An integer is what I return. + +Raises: + runtime_error: Throws runtime error if p1 is empty.)doc"; + +static const char *mkd_doc_Base_method2 = +R"doc(Description for method1. + +This is the extended description for method1. + +Args: + p1: I am a very long description for parameter 1. Let's ensure + that this gets wrapped properly. + p2: I am a very long description for parameter 2. However, I'm + broken out onto two lines. Will this be parsed correctly? + +Returns: + An integer is what I return. + +Raises: + runtime_error: Throws runtime error if p1 is 0. + invalid_argument: Throws invalid_argument error if p2 is 0.)doc"; + +#if defined(__GNUG__) +#pragma GCC diagnostic pop +#endif + diff --git a/tests/sample_header_docs/sample_header_truth.h b/tests/sample_header_docs/sample_header_truth.h new file mode 100644 index 0000000..d752cc5 --- /dev/null +++ b/tests/sample_header_docs/sample_header_truth.h @@ -0,0 +1,46 @@ +/* + This file contains docstrings for use in the Python bindings. + Do not edit! They were automatically extracted by pybind11_mkdoc. + */ + +#define MKD_EXPAND(x) x +#define MKD_COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT +#define MKD_VA_SIZE(...) MKD_EXPAND(MKD_COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0)) +#define MKD_CAT1(a, b) a ## b +#define MKD_CAT2(a, b) MKD_CAT1(a, b) +#define MKD_DOC1(n1) mkd_doc_##n1 +#define MKD_DOC2(n1, n2) mkd_doc_##n1##_##n2 +#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3 +#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4 +#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5 +#define MKD_DOC6(n1, n2, n3, n4, n5, n6) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 +#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 +#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) + +#if defined(__GNUG__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + + +static const char *mkd_doc_RootLevelSymbol = +R"doc(Root-level symbol. Magna fermentum iaculis eu non diam phasellus +vestibulum.)doc"; + +static const char *mkd_doc_drake_MidLevelSymbol = +R"doc(1. Begin first ordered list element. Rutrum quisque non tellus orci ac + auctor. End first ordered list element. +2. Begin second ordered list element. Ipsum faucibus vitae aliquet + nec. Ligula ullamcorper malesuada proin libero. End second ordered + list element. +3. Begin third ordered list element. Dictum sit amet justo donec enim. + Pharetra convallis posuere morbi leo urna molestie. End third + ordered list element. + +Senectus et netus et malesuada fames ac. Tincidunt lobortis feugiat +vivamus at augue eget arcu dictum varius.)doc"; + +#if defined(__GNUG__) +#pragma GCC diagnostic pop +#endif + diff --git a/tests/sample_header_test.py b/tests/sample_header_test.py index 5b58d6e..64f992f 100644 --- a/tests/sample_header_test.py +++ b/tests/sample_header_test.py @@ -1,13 +1,16 @@ -import os +from pathlib import Path import pybind11_mkdoc -DIR = os.path.abspath(os.path.dirname(__file__)) +DIR = Path(__file__).resolve().parent def test_generate_headers(capsys, tmp_path): - comments = pybind11_mkdoc.mkdoc_lib.extract_all([os.path.join(DIR, "sample_header_docs", "sample_header.h")]) - assert ["mkd_doc_RootLevelSymbol", "mkd_doc_drake_MidLevelSymbol"] == [c[0] for c in comments] + with open(DIR / "sample_header_docs" / "sample_header_truth.h") as f: + expected = f.read() + + comments = pybind11_mkdoc.mkdoc_lib.extract_all([str(DIR / "sample_header_docs" / "sample_header.h")]) + assert [c[0] for c in comments] == ["mkd_doc_RootLevelSymbol", "mkd_doc_drake_MidLevelSymbol"] output = tmp_path / "docs.h" with output.open("w") as fd: @@ -17,52 +20,21 @@ def test_generate_headers(capsys, tmp_path): assert "warning" not in res.err assert "error" not in res.err - assert ( - output.read_text() - == """\ -/* - This file contains docstrings for use in the Python bindings. - Do not edit! They were automatically extracted by pybind11_mkdoc. - */ - -#define MKD_EXPAND(x) x -#define MKD_COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT -#define MKD_VA_SIZE(...) MKD_EXPAND(MKD_COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0)) -#define MKD_CAT1(a, b) a ## b -#define MKD_CAT2(a, b) MKD_CAT1(a, b) -#define MKD_DOC1(n1) mkd_doc_##n1 -#define MKD_DOC2(n1, n2) mkd_doc_##n1##_##n2 -#define MKD_DOC3(n1, n2, n3) mkd_doc_##n1##_##n2##_##n3 -#define MKD_DOC4(n1, n2, n3, n4) mkd_doc_##n1##_##n2##_##n3##_##n4 -#define MKD_DOC5(n1, n2, n3, n4, n5) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5 -#define MKD_DOC7(n1, n2, n3, n4, n5, n6, n7) mkd_doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 -#define DOC(...) MKD_EXPAND(MKD_EXPAND(MKD_CAT2(MKD_DOC, MKD_VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) - -#if defined(__GNUG__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif + assert output.read_text() == expected -static const char *mkd_doc_RootLevelSymbol = -R"doc(Root-level symbol. Magna fermentum iaculis eu non diam phasellus -vestibulum.)doc"; +def test_generate_headers_2(capsys, tmp_path): + with open(DIR / "sample_header_docs" / "sample_header_2_truth.h") as f: + expected = f.read() -static const char *mkd_doc_drake_MidLevelSymbol = -R"doc(1. Begin first ordered list element. Rutrum quisque non tellus orci ac -auctor. End first ordered list element. 2. Begin second ordered list -element. Ipsum faucibus vitae aliquet nec. Ligula ullamcorper -malesuada proin libero. End second ordered list element. 3. Begin -third ordered list element. Dictum sit amet justo donec enim. Pharetra -convallis posuere morbi leo urna molestie. End third ordered list -element. + comments = pybind11_mkdoc.mkdoc_lib.extract_all([str(DIR / "sample_header_docs" / "sample_header_2.h")]) -Senectus et netus et malesuada fames ac. Tincidunt lobortis feugiat -vivamus at augue eget arcu dictum varius.)doc"; + output = tmp_path / "docs.h" + with output.open("w") as fd: + pybind11_mkdoc.mkdoc_lib.write_header(comments, fd) -#if defined(__GNUG__) -#pragma GCC diagnostic pop -#endif + res = capsys.readouterr() -""" - ) + assert "warning" not in res.err + assert "error" not in res.err + assert output.read_text() == expected