Skip to content

Commit 2e000e9

Browse files
authored
docs: add Google-style docstring parser (#283)
Replaces the default `pydoc-markdown` shell script with a custom Python script calling the `pydoc-markdown` API directly. A custom patch of `GoogleProcessor` allows us to parse the Google-style docstrings and render the parameter comments with the actual parameters.
1 parent 8fcff20 commit 2e000e9

File tree

7 files changed

+273
-36
lines changed

7 files changed

+273
-36
lines changed

website/build_api_reference.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sed_no_backup() {
1111
}
1212

1313
# Create docspec dump of this package's source code through pydoc-markdown
14-
pydoc-markdown --quiet --dump > docspec-dump.jsonl
14+
python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
1515
sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl
1616

1717
# Create docpec dump from the right version of the apify-shared package

website/docusaurus.config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ module.exports = {
6666
rehypePlugins: [externalLinkProcessor],
6767
editUrl: 'https://github.com/apify/apify-sdk-python/edit/master/website/',
6868
},
69+
theme: {
70+
customCss: require.resolve('./src/css/custom.css'),
71+
},
6972
}),
7073
],
7174
]),

website/pydoc-markdown/__init__.py

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly.
3+
4+
This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object.
5+
"""
6+
7+
from pydoc_markdown.interfaces import Context
8+
from pydoc_markdown.contrib.loaders.python import PythonLoader
9+
from pydoc_markdown.contrib.processors.filter import FilterProcessor
10+
from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor
11+
from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver
12+
from google_docstring_processor import ApifyGoogleProcessor
13+
from docspec import dump_module
14+
15+
import json
16+
import os
17+
18+
project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src')
19+
20+
context = Context(directory='.')
21+
loader = PythonLoader(search_path=[project_path])
22+
filter = FilterProcessor(
23+
documented_only=False,
24+
skip_empty_modules=False,
25+
)
26+
crossref = CrossrefProcessor()
27+
google = ApifyGoogleProcessor()
28+
29+
loader.init(context)
30+
filter.init(context)
31+
google.init(context)
32+
crossref.init(context)
33+
34+
processors = [filter, google, crossref]
35+
36+
dump = []
37+
38+
modules = list(loader.load())
39+
40+
for processor in processors:
41+
processor.process(modules, None)
42+
43+
for module in modules:
44+
dump.append(dump_module(module))
45+
46+
print(json.dumps(dump, indent=4))
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# -*- coding: utf8 -*-
2+
# Copyright (c) 2019 Niklas Rosenstein
3+
# !!! Modified 2024 Jindřich Bär
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to
7+
# deal in the Software without restriction, including without limitation the
8+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9+
# sell copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21+
# IN THE SOFTWARE.
22+
23+
import dataclasses
24+
import re
25+
import typing as t
26+
27+
import docspec
28+
29+
from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown
30+
from pydoc_markdown.interfaces import Processor, Resolver
31+
32+
import json
33+
34+
35+
@dataclasses.dataclass
36+
class ApifyGoogleProcessor(Processor):
37+
"""
38+
This class implements the preprocessor for Google and PEP 257 docstrings. It converts
39+
docstrings formatted in the Google docstyle to Markdown syntax.
40+
41+
References:
42+
43+
* https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
44+
* https://www.python.org/dev/peps/pep-0257/
45+
46+
Example:
47+
48+
```
49+
Attributes:
50+
module_level_variable1 (int): Module level variables may be documented in
51+
either the ``Attributes`` section of the module docstring, or in an
52+
inline docstring immediately following the variable.
53+
54+
Either form is acceptable, but the two should not be mixed. Choose
55+
one convention to document module level variables and be consistent
56+
with it.
57+
58+
Todo:
59+
* For module TODOs
60+
* You have to also use ``sphinx.ext.todo`` extension
61+
```
62+
63+
Renders as:
64+
65+
Attributes:
66+
module_level_variable1 (int): Module level variables may be documented in
67+
either the ``Attributes`` section of the module docstring, or in an
68+
inline docstring immediately following the variable.
69+
70+
Either form is acceptable, but the two should not be mixed. Choose
71+
one convention to document module level variables and be consistent
72+
with it.
73+
74+
Todo:
75+
* For module TODOs
76+
* You have to also use ``sphinx.ext.todo`` extension
77+
78+
@doc:fmt:google
79+
"""
80+
81+
_param_res = [
82+
re.compile(r"^(?P<param>\S+):\s+(?P<desc>.+)$"),
83+
re.compile(r"^(?P<param>\S+)\s+\((?P<type>[^)]+)\):\s+(?P<desc>.+)$"),
84+
re.compile(r"^(?P<param>\S+)\s+--\s+(?P<desc>.+)$"),
85+
re.compile(r"^(?P<param>\S+)\s+\{\[(?P<type>\S+)\]\}\s+--\s+(?P<desc>.+)$"),
86+
re.compile(r"^(?P<param>\S+)\s+\{(?P<type>\S+)\}\s+--\s+(?P<desc>.+)$"),
87+
]
88+
89+
_keywords_map = {
90+
"Args:": "Arguments",
91+
"Arguments:": "Arguments",
92+
"Attributes:": "Attributes",
93+
"Example:": "Example",
94+
"Examples:": "Examples",
95+
"Keyword Args:": "Arguments",
96+
"Keyword Arguments:": "Arguments",
97+
"Methods:": "Methods",
98+
"Note:": "Notes",
99+
"Notes:": "Notes",
100+
"Other Parameters:": "Arguments",
101+
"Parameters:": "Arguments",
102+
"Return:": "Returns",
103+
"Returns:": "Returns",
104+
"Raises:": "Raises",
105+
"References:": "References",
106+
"See Also:": "See Also",
107+
"Todo:": "Todo",
108+
"Warning:": "Warnings",
109+
"Warnings:": "Warnings",
110+
"Warns:": "Warns",
111+
"Yield:": "Yields",
112+
"Yields:": "Yields",
113+
}
114+
115+
def check_docstring_format(self, docstring: str) -> bool:
116+
for section_name in self._keywords_map:
117+
if section_name in docstring:
118+
return True
119+
return False
120+
121+
def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
122+
docspec.visit(modules, self._process)
123+
124+
def _process(self, node: docspec.ApiObject):
125+
if not node.docstring:
126+
return
127+
128+
lines = []
129+
sections = []
130+
current_lines: t.List[str] = []
131+
in_codeblock = False
132+
keyword = None
133+
multiline_argument_offset = -1
134+
135+
def _commit():
136+
if keyword:
137+
sections.append({keyword: list(current_lines)})
138+
else:
139+
lines.extend(current_lines)
140+
current_lines.clear()
141+
142+
for line in node.docstring.content.split("\n"):
143+
multiline_argument_offset += 1
144+
if line.lstrip().startswith("```"):
145+
in_codeblock = not in_codeblock
146+
current_lines.append(line)
147+
continue
148+
149+
if in_codeblock:
150+
current_lines.append(line)
151+
continue
152+
153+
line = line.strip()
154+
if line in self._keywords_map:
155+
_commit()
156+
keyword = self._keywords_map[line]
157+
continue
158+
159+
if keyword is None:
160+
lines.append(line)
161+
continue
162+
163+
for param_re in self._param_res:
164+
param_match = param_re.match(line)
165+
if param_match:
166+
current_lines.append(param_match.groupdict())
167+
multiline_argument_offset = 0
168+
break
169+
170+
if not param_match:
171+
if multiline_argument_offset == 1:
172+
current_lines[-1]["desc"] += "\n" + line
173+
multiline_argument_offset = 0
174+
else:
175+
current_lines.append(line)
176+
177+
_commit()
178+
node.docstring.content = json.dumps({
179+
"text": "\n".join(lines),
180+
"sections": sections,
181+
}, indent=None)
182+
183+

website/src/css/custom.css

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
.tsd-parameters li {
2+
margin-bottom: 16px;
3+
}
4+
5+
.tsd-parameters-title {
6+
font-size: 16px;
7+
margin-bottom: 16px !important;
8+
}
9+
10+
.tsd-returns-title {
11+
font-size: 16px;
12+
}

website/transformDocs.js

Lines changed: 28 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -134,27 +134,6 @@ function sortChildren(typedocMember) {
134134
typedocMember.groups.sort((a, b) => groupSort(a.title, b.title));
135135
}
136136

137-
// Parses the arguments and return value description of a method from its docstring
138-
function extractArgsAndReturns(docstring) {
139-
const parameters = (docstring
140-
.split('Args:')[1] ?? '').split('Returns:')[0] // Get the part between Args: and Returns:
141-
.split(/(^|\n)\s*([\w]+)\s*\(.*?\)\s*:\s*/) // Magic regex which splits the arguments into an array, and removes the argument types
142-
.filter(x => x.length > 1) // Remove empty strings
143-
.reduce((acc, curr, idx, arr) => { // Collect the argument names and types into an object
144-
if(idx % 2 === 0){
145-
return {...acc, [curr]: arr[idx+1]} // If the index is even, the current string is an argument name, and the next string is its type
146-
}
147-
return acc;
148-
}, {});
149-
150-
const returns = (docstring
151-
.split('Returns:')[1] ?? '').split('Raises:')[0] // Get the part between Returns: and Raises:
152-
.split(':')[1]?.trim() || undefined; // Split the return value into its type and description, return description
153-
154-
155-
return { parameters, returns };
156-
}
157-
158137
// Objects with decorators named 'ignore_docs' or with empty docstrings will be ignored
159138
function isHidden(member) {
160139
return member.decorations?.some(d => d.name === 'ignore_docs')
@@ -211,6 +190,24 @@ function convertObject(obj, parent, module) {
211190
member.name = 'Actor';
212191
}
213192

193+
let docstring = { text: member.docstring?.content ?? '' };
194+
try {
195+
docstring = JSON.parse(docstring.text);
196+
197+
docstring.args = docstring.sections.find((section) => Object.keys(section)[0] === 'Arguments')['Arguments'] ?? [];
198+
199+
docstring.args = docstring.args.reduce((acc, arg) => {
200+
acc[arg.param] = arg.desc;
201+
return acc;
202+
}, {});
203+
204+
docstring.returns = docstring.sections.find((section) => Object.keys(section)[0] === 'Returns')['Returns'] ?? [];
205+
206+
docstring.returns = docstring.returns.join('\n');
207+
} catch {
208+
// Do nothing
209+
}
210+
214211
// Create the Typedoc member object
215212
let typedocMember = {
216213
id: oid++,
@@ -222,7 +219,7 @@ function convertObject(obj, parent, module) {
222219
comment: member.docstring ? {
223220
summary: [{
224221
kind: 'text',
225-
text: member.docstring?.content,
222+
text: docstring.text,
226223
}],
227224
} : undefined,
228225
type: typedocType,
@@ -241,23 +238,20 @@ function convertObject(obj, parent, module) {
241238
}
242239

243240
if(typedocMember.kindString === 'Method') {
244-
const { parameters, returns } = extractArgsAndReturns(member.docstring?.content ?? '');
245-
246241
typedocMember.signatures = [{
247242
id: oid++,
248243
name: member.name,
249244
modifiers: member.modifiers ?? [],
250245
kind: 4096,
251246
kindString: 'Call signature',
252247
flags: {},
253-
comment: member.docstring ? {
248+
comment: docstring.text ? {
254249
summary: [{
255250
kind: 'text',
256-
text: member.docstring?.content
257-
.replace(/\**(Args|Arguments|Returns)[\s\S]+/, ''),
251+
text: docstring?.text,
258252
}],
259-
blockTags: returns ? [
260-
{ tag: '@returns', content: [{ kind: 'text', text: returns }] },
253+
blockTags: docstring?.returns ? [
254+
{ tag: '@returns', content: [{ kind: 'text', text: docstring.returns }] },
261255
] : undefined,
262256
} : undefined,
263257
type: inferTypedocType(member.return_type),
@@ -271,10 +265,10 @@ function convertObject(obj, parent, module) {
271265
'keyword-only': arg.type === 'KEYWORD_ONLY' ? 'true' : undefined,
272266
},
273267
type: inferTypedocType(arg.datatype),
274-
comment: parameters[arg.name] ? {
268+
comment: docstring.args?.[arg.name] ? {
275269
summary: [{
276270
kind: 'text',
277-
text: parameters[arg.name]
271+
text: docstring.args[arg.name]
278272
}]
279273
} : undefined,
280274
defaultValue: arg.default_value,
@@ -330,15 +324,14 @@ function main() {
330324

331325
// Load the docspec dump files of this module and of apify-shared
332326
const thisPackageDocspecDump = fs.readFileSync('docspec-dump.jsonl', 'utf8');
333-
const thisPackageModules = thisPackageDocspecDump.split('\n').filter((line) => line !== '');
327+
const thisPackageModules = JSON.parse(thisPackageDocspecDump)
334328

335329
const apifySharedDocspecDump = fs.readFileSync('apify-shared-docspec-dump.jsonl', 'utf8');
336330
const apifySharedModules = apifySharedDocspecDump.split('\n').filter((line) => line !== '');
337331

338332
// Convert all the modules, store them in the root object
339-
for (const module of [...thisPackageModules, ...apifySharedModules]) {
340-
const parsedModule = JSON.parse(module);
341-
convertObject(parsedModule, typedocApiReference, parsedModule);
333+
for (const module of thisPackageModules) {
334+
convertObject(module, typedocApiReference, module);
342335
};
343336

344337
// Recursively fix references (collect names->ids of all the named entities and then inject those in the reference objects)

0 commit comments

Comments
 (0)