Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .changelog/_unreleased.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
[[entries]]
id = "5be79248-7b86-465d-953c-d0c69ab64e8a"
type = "improvement"
description = "Implement support for NumPy-style docstrings"
author = "celsiusnarhwal"
pr = "https://github.com/NiklasRosenstein/pydoc-markdown/pull/279"
issues = [
"https://github.com/celsiusnarhwal/pydoc-markdown/issues/251",
]

[[entries]]
id = "4409675c-ea67-4c56-be5a-a7310f779c15"
type = "improvement"
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ tomli = "^2.0.0"
tomli_w = "^1.0.0"
yapf = ">=0.30.0"
watchdog = "*"
numpydoc = "^1.5.0"

[tool.poetry.dev-dependencies]
pytest = "*"
Expand Down
4 changes: 2 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ If you plan on using the [Novella][] integration, you may want to install it as:

### Features 🌟

* Understands multiple documentation styles (Sphinx, Google, Pydoc-Markdown specific) and converts them to properly
formatted Markdown
* Understands multiple documentation styles (Sphinx, Google, NumPy, Pydoc-Markdown specific) and converts them to
properly formatted Markdown
* Can parse docstrings for variables thanks to [docspec][] (`#:` block before or string literal after the statement)
* Generates links to other API objects per the documentation syntax (e.g. `#OtherClass` for the Pydoc-Markdown style)
* Configure the output using a YAML file or `pyProject.toml`, then you're only one command away to generate the
Expand Down
255 changes: 255 additions & 0 deletions src/pydoc_markdown/contrib/processors/numpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
# -*- coding: utf8 -*-
# Copyright (c) 2019 Niklas Rosenstein
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

from __future__ import annotations

import dataclasses
import itertools
import re
import typing as t
import warnings
from contextlib import contextmanager

import docspec
from numpydoc.docscrape import NumpyDocString, Parameter # type: ignore[import]
from numpydoc.validate import validate # type: ignore[import]

from pydoc_markdown.interfaces import Processor, Resolver


@contextmanager
def _filter_numpydoc_warnings(action: warnings._ActionKind):
warnings.filterwarnings(action, module="numpydoc.docscrape")
yield
warnings.resetwarnings()


class _DocstringWrapper:
# Wraps docstrings for use with numpydoc.validate.validate().
__qualname__ = "pydoc_markdown.contrib.processors.numpy._DocstringWrapper"


@dataclasses.dataclass
class NumpyProcessor(Processor):
# numpydoc doesn't like when a heading appears twice in the same docstring so we have to use <span> tags to
# keep numpydoc from recognizing the example headings. This also means the example code block has to be
# delineated with HTML tags instead of Markdown syntax.
"""
This processor parses NumPy-style docstrings and converts them to Markdown syntax.

References
----------
- https://numpydoc.readthedocs.io/en/latest/format.html

Examples
--------
<pre>
<code>
<span>Parameters</span>
----------
arg: str
This argument should be a string.

<span>Raises</span>
------
ValueError
If *arg* is not a string.

<span>Returns</span>
-------
int
The length of the string.
</code>
</pre>

Renders as:

Parameters
----------
arg : str
This argument should be a string.

Raises
------
ValueError
If *arg* is not a string.

Returns
-------
int
The length of the string.

@doc:fmt:numpy
"""

_SECTION_MAP = {
"Summary": ["Summary", "Extended Summary"],
"Arguments": ["Parameters", "Other Parameters"],
"Returns": ["Returns"],
"Yields": ["Yields"],
"Receives": ["Receives"],
"Attributes": ["Attributes"],
"Methods": ["Methods"],
"Raises": ["Raises"],
"Warns": ["Warns"],
"Warnings": ["Warnings"],
"See Also": ["See Also"],
"Notes": ["Notes"],
"References": ["References"],
"Examples": ["Examples"],
}

@staticmethod
def check_docstring_format(docstring: str) -> bool:
_DocstringWrapper.__doc__ = docstring

with _filter_numpydoc_warnings("error"):
try:
return not validate(_DocstringWrapper.__qualname__).get("Errors")
except Warning:
return False

def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
docspec.visit(modules, self._process)

def _process(self, node: docspec.ApiObject):
if not node.docstring:
return

docstring = NumpyDocString(node.docstring.content)
lines = []

# Filter self._SECTION_MAP to only include sections used in the docstring
active_sections = {k: v for k, v in self._SECTION_MAP.items() if any(docstring.get(sec) for sec in v)}

# numpydoc is opinionated when it comes to section order so we have to preserve the order of the original
# docstring ourselves

# First, we create a regex pattern to match all section headings in the docstring
keyword_regex = re.compile(
"|".join(
[rf"{keyword}(?:\r?\n)-{{{len(keyword)}}}" for keyword in itertools.chain(*active_sections.values())]
)
)

# Second, we strip each patten match of hyphens and whitespace
keyword_matches = [match.replace("-", "").strip() for match in keyword_regex.findall(node.docstring.content)]

# Third, we determine the section order in the eventual output based on the order of the headings in the
# original docstring (but always starting with the summary)
section_order = [
"Summary",
*[next(key for key, value in active_sections.items() if keyword in value) for keyword in keyword_matches],
]

# Finally, we sort active_sections according to the section order we just determined
for section, keywords in sorted(active_sections.items(), key=lambda x: section_order.index(x[0])):
lines.extend(self._get_section_contents(docstring, section, keywords))

node.docstring.content = "\n".join(lines)

def _get_section_contents(self, docstring: NumpyDocString, section: str, keywords: list) -> list[str]:
contents = list(itertools.chain([docstring.get(sec) for sec in keywords]))

if section == "Summary":
return self._parse_summary(contents)
else:
# contents needs to be flattened for all sections aside from Summary
contents = list(itertools.chain(*contents))
if section in ["Notes", "References"]:
return self._parse_notes_and_references(section, contents)
elif section == "Examples":
return self._parse_examples(contents)
elif section == "See Also":
return self._parse_see_also(contents)
elif any(isinstance(item, Parameter) for item in contents):
return self._parse_parameters(section, contents)
else:
return [f"\n**{section}**\n", *contents] if contents else []

@staticmethod
def _parse_summary(contents: list[str]) -> list[str]:
summary, extended = contents
return [*summary, "", *extended] if extended else [*summary]

@staticmethod
def _parse_parameters(section: str, parameters: list[Parameter]) -> list[str]:
lines = []

for param in parameters:
name, cls, desc = param
desc = "\n".join(desc)

if name and cls and desc:
lines.append(f"* **{name}** (`{cls}`): {desc}")
elif name and cls:
lines.append(f"* **{name}** (`{cls}`)")
elif name and desc:
lines.append(f"* **{name}**: {desc}")
elif cls and desc:
lines.append(f"* `{cls}`: {desc}")
elif name:
lines.append(f"* **{name}**")
elif cls:
lines.append(f"* `{cls}`")
elif desc:
lines.append(f"* {desc}")

return [f"\n**{section}**\n", *lines] if lines else []

@staticmethod
def _parse_notes_and_references(section: str, contents: list[str]) -> list[str]:
content_string = "\n".join(contents)
citations = re.compile("(\.\. )?\[(?P<ref_id>\w+)][_ ]?")

replacements = {"Notes": "<sup>{ref_id}</sup>", "References": "{ref_id}. "}

for match in citations.finditer(content_string):
ref_id = match.group("ref_id")
content_string = content_string.replace(match.group(0), replacements[section].format(ref_id=ref_id))

return [f"\n**{section}**\n", *content_string.splitlines()]

@staticmethod
def _parse_examples(contents: list[str]) -> list[str]:
# Wraps doctests in Python codeblocks and leaves all other content as is
doctests = re.compile(r"(>>>(?:.+(?:\r?\n|$))+)", flags=re.MULTILINE)
return [
"\n**Examples**\n",
*doctests.sub("```python\n\g<0>\n```", "\n".join(contents)).splitlines(),
]

@staticmethod
def _parse_see_also(contents: list[tuple]) -> list[str]:
lines = []

for group in contents:
sublines = []
objs, desc = group

sublines.append("* " + ", ".join([f":{obj[1]}:`{obj[0]}`" if obj[1] else f"{obj[0]}" for obj in objs]))

if desc:
sublines[-1] += ": " + "\n".join(desc)

lines.extend(sublines)

return [f"\n**See Also**\n", *lines]
2 changes: 1 addition & 1 deletion src/pydoc_markdown/contrib/processors/pydocmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class PydocmdProcessor(Processor):
def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
docspec.visit(modules, self._process)

def _process(self, node: docspec.ApiObject):
def _process(self, node: docspec.ApiObject) -> None:
if not node.docstring:
return
lines = []
Expand Down
49 changes: 42 additions & 7 deletions src/pydoc_markdown/contrib/processors/smart.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,42 @@
# IN THE SOFTWARE.

import dataclasses
import logging
import typing as t

import docspec
from typing_extensions import Protocol

from pydoc_markdown.contrib.processors.google import GoogleProcessor
from pydoc_markdown.contrib.processors.numpy import NumpyProcessor
from pydoc_markdown.contrib.processors.pydocmd import PydocmdProcessor
from pydoc_markdown.contrib.processors.sphinx import SphinxProcessor
from pydoc_markdown.interfaces import Processor, Resolver

logger = logging.getLogger(__name__)


class DelegatableProcessor(Protocol):
def _process(self, node: docspec.ApiObject) -> None:
...


class CheckCapableProcessor(DelegatableProcessor, Protocol):
def check_docstring_format(self, docstring: str) -> bool:
...


@dataclasses.dataclass
class SmartProcessor(Processor):
"""
This processor picks the #GoogleProcessor, #SphinxProcessor or #PydocmdProcessor after
This processor picks the #GoogleProcessor, #SphinxProcessor, #PydocmdProcessor, or #NumpyProcessor after
guessing which is appropriate from the syntax it finds in the docstring.
"""

google: GoogleProcessor = dataclasses.field(default_factory=GoogleProcessor)
pydocmd: PydocmdProcessor = dataclasses.field(default_factory=PydocmdProcessor)
sphinx: SphinxProcessor = dataclasses.field(default_factory=SphinxProcessor)
numpy: NumpyProcessor = dataclasses.field(default_factory=NumpyProcessor)

def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
docspec.visit(modules, self._process)
Expand All @@ -48,14 +64,33 @@ def _process(self, obj: docspec.ApiObject):
if not obj.docstring:
return None

for name in ("google", "pydocmd", "sphinx"):
object_name = ".".join(x.name for x in obj.path)
object_type = type(obj).__name__

processors: t.List[t.Tuple[str, DelegatableProcessor]] = [
("sphinx", self.sphinx),
("google", self.google),
("numpy", self.numpy),
("pydocmd", self.pydocmd),
]

checkable_processors: t.List[t.Tuple[str, CheckCapableProcessor]] = [
("sphinx", self.sphinx),
("google", self.google),
("numpy", self.numpy),
]

for name, processor in processors:
indicator = "@doc:fmt:" + name
if indicator in obj.docstring.content:
logger.info("Using `%s` processor for %s `%s` (explicit)", name, object_type, object_name)
obj.docstring.content = obj.docstring.content.replace(indicator, "")
return getattr(self, name)._process(obj)
return processor._process(obj)

for name, processor in checkable_processors:
if processor.check_docstring_format(obj.docstring.content):
logger.info("Using `%s` processor for %s `%s` (detected)", name, object_type, object_name)
return processor._process(obj)

if self.sphinx.check_docstring_format(obj.docstring.content):
return self.sphinx._process(obj)
if self.google.check_docstring_format(obj.docstring.content):
return self.google._process(obj)
logger.info("Using `pydocmd` processor for %s `%s` (default)", name, object_type, object_name)
return self.pydocmd._process(obj)
1 change: 1 addition & 0 deletions test/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ def assert_processor_result(processor, docstring, expected_output):
)
processor.process([module], None)
assert module.docstring
print(module.docstring.content)
assert_text_equals(module.docstring.content, textwrap.dedent(expected_output))
Loading