-
-
Notifications
You must be signed in to change notification settings - Fork 420
chore: CI worfklows for linting docs #4076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| name: Check links | ||
|
|
||
| on: | ||
| repository_dispatch: | ||
| workflow_dispatch: | ||
| pull_request: | ||
| schedule: | ||
| - cron: "00 18 * * *" | ||
|
|
||
| jobs: | ||
| linkChecker: | ||
| runs-on: ubuntu-latest | ||
| permissions: | ||
| issues: write # required for peter-evans/create-issue-from-file | ||
| steps: | ||
| - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 | ||
| with: | ||
| persist-credentials: false | ||
|
|
||
| - name: Link Checker | ||
| id: lychee | ||
| uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2.8.0 | ||
| # with: | ||
| # fail: false | ||
|
|
||
| # - name: Create Issue From File | ||
| # if: steps.lychee.outputs.exit_code != 0 | ||
| # uses: peter-evans/create-issue-from-file@fca9117c27cdc29c6c4db3b86c48e4115a786710 # v6.0.0 | ||
| # with: | ||
| # title: Link Checker Report | ||
| # content-filepath: ./lychee/out.md | ||
| # labels: report, automated issue |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| // markdownlint-cli2 configuration for zarr-python docs. | ||
| // | ||
| // We keep the rules that catch real rendering/structure problems and disable those that | ||
| // are pure style, conflict with house conventions, or fire false positives against our | ||
| // MkDocs/mkdocstrings + pymdownx toolchain. Complementary, not overlapping, with | ||
| // ci/lint_docs.py (RST residue + list-breaking fences) and `mkdocs build --strict`. | ||
| { | ||
| "config": { | ||
| "default": true, | ||
|
|
||
| // House style: Markdown paragraphs are single unwrapped lines, so line length is not | ||
| // a meaningful constraint. | ||
| "MD013": false, | ||
|
|
||
| // Purely stylistic marker/emphasis choices -- not worth the churn across existing docs. | ||
| "MD004": false, // ul bullet style (-, *, +) | ||
| "MD007": false, // ul indentation width | ||
| "MD050": false, // strong (bold) style | ||
| "MD035": false, // hr style | ||
|
|
||
| // False positives from our toolchain: | ||
| // mkdocstrings cross-refs `[`X`][zarr.X]` read as undefined reference links (MD052); | ||
| // pymdownx.magiclink auto-links bare URLs (MD034); | ||
| // md_in_html lets us embed intentional raw HTML (MD033); | ||
| // generated/included files (api stubs, snippets) need not open with an H1 (MD041). | ||
| "MD052": false, | ||
| "MD034": false, | ||
| "MD033": false, | ||
| "MD041": false, | ||
|
|
||
| // Duplicate headings are legitimate under different sections (e.g. repeated | ||
| // "Documentation"); only flag true sibling duplicates. | ||
| "MD024": { "siblings_only": true }, | ||
|
|
||
| // Opinionated table/link/command rules with low value for these docs. | ||
| "MD055": false, // table pipe style | ||
| "MD060": false, // table column style | ||
| "MD059": false, // "descriptive" link text (no "click here") | ||
| "MD014": false, // $ before commands without shown output | ||
|
|
||
| // markdownlint does not understand MkDocs `!!!` admonitions, so it reads their | ||
| // 4-space-indented bodies as indented code blocks and flags them (and, via inferred | ||
| // file style, flags real fenced blocks too). Cannot coexist with our admonitions. | ||
| "MD046": false // code block style (fenced vs indented) | ||
| // Kept on (structural / real rendering bugs): MD012 (multiple blanks), MD022/MD031/MD032 | ||
| // (blanks around headings/fences/lists), MD025 (single H1), MD029 (ordered-list prefix), | ||
| // MD040 (fenced code language), MD042 (empty links), | ||
| // MD047 (trailing newline), MD056 (table column count), among others. | ||
| }, | ||
| "globs": ["docs/**/*.md"], | ||
| "ignores": [ | ||
| "docs/api/**" // mkdocstrings stubs (`::: zarr.X`) | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,131 @@ | ||||||||||||||||||||||||||||
| """Check that every public top-level export is in the API reference. | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| The API reference is authored as explicit mkdocstrings directives (``::: target``) | ||||||||||||||||||||||||||||
| under ``docs/api/`` -- one per documented symbol -- rather than autodoc, so a newly | ||||||||||||||||||||||||||||
| added ``zarr.__all__`` entry will not appear in the docs until someone writes a page | ||||||||||||||||||||||||||||
| for it (or it becomes a rendered member of an already-documented module). This script | ||||||||||||||||||||||||||||
| catches that gap: it resolves every ``:::`` target, expands module directives into the | ||||||||||||||||||||||||||||
| members they render (honoring ``members: false``), and asserts each name in | ||||||||||||||||||||||||||||
| ``zarr.__all__`` resolves to a documented object. | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| Usage: | ||||||||||||||||||||||||||||
| python ci/check_documented_exports.py | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| Raises ValueError if any public export is undocumented. | ||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| from __future__ import annotations | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| import importlib | ||||||||||||||||||||||||||||
| import re | ||||||||||||||||||||||||||||
| from pathlib import Path | ||||||||||||||||||||||||||||
| from types import ModuleType | ||||||||||||||||||||||||||||
| from typing import Any | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| import zarr | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| REPO_ROOT = Path(__file__).parent.parent.resolve() | ||||||||||||||||||||||||||||
| API_DOCS_ROOT = REPO_ROOT / "docs" / "api" | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| # Names in zarr.__all__ that are intentionally absent from the API reference. | ||||||||||||||||||||||||||||
| # Keep this list short and justified -- it is the only escape hatch from the guard. | ||||||||||||||||||||||||||||
| EXEMPT_EXPORTS = { | ||||||||||||||||||||||||||||
| "__version__", # version string, not an API symbol | ||||||||||||||||||||||||||||
| "print_debug_info", # debugging helper, deliberately not in the reference | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| # A mkdocstrings autodoc directive: `::: some.dotted.target` at the start of a line. | ||||||||||||||||||||||||||||
| DIRECTIVE_RE = re.compile(r"^:::[ \t]+(?P<target>\S+)", re.MULTILINE) | ||||||||||||||||||||||||||||
| # `members: false` (or `members: []`) within a directive's option block disables | ||||||||||||||||||||||||||||
| # rendering of a module's members. | ||||||||||||||||||||||||||||
| MEMBERS_DISABLED_RE = re.compile(r"^\s+members:\s*(false|\[\s*\])\s*$") | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def resolve(target: str) -> Any: | ||||||||||||||||||||||||||||
| """Resolve a `:::` target (a dotted path) to the Python object it documents.""" | ||||||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||||||
| return importlib.import_module(target) | ||||||||||||||||||||||||||||
| except ImportError: | ||||||||||||||||||||||||||||
| pass | ||||||||||||||||||||||||||||
| module_path, _, attr = target.rpartition(".") | ||||||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||||||
| return getattr(importlib.import_module(module_path), attr) | ||||||||||||||||||||||||||||
| except (ImportError, AttributeError): | ||||||||||||||||||||||||||||
| return None | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def members_disabled(text: str, directive_start: int) -> bool: | ||||||||||||||||||||||||||||
| """Return True if the directive starting at `directive_start` sets members: false. | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| Scans the indented option block immediately following the `:::` line, stopping at | ||||||||||||||||||||||||||||
| the first non-indented line (the end of this directive's block).""" | ||||||||||||||||||||||||||||
| for line in text[directive_start:].splitlines()[1:]: | ||||||||||||||||||||||||||||
| if line.strip() == "": | ||||||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||||||
| if not line.startswith((" ", "\t")): | ||||||||||||||||||||||||||||
| break | ||||||||||||||||||||||||||||
| if MEMBERS_DISABLED_RE.match(line): | ||||||||||||||||||||||||||||
| return True | ||||||||||||||||||||||||||||
| return False | ||||||||||||||||||||||||||||
|
Comment on lines
+62
to
+69
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is perhaps more readable:
Suggested change
However, I cannot tell (yet, I'll keep reviewing) if Although, I suspect that the suggestion above doesn't address the potentially larger issue of how this script is architected. In the end, however, this might be sufficient since this is simply a CI script, but I'll make more significant suggestions elsewhere. |
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def documented_object_ids() -> set[int]: | ||||||||||||||||||||||||||||
| """Collect the id()s of every object rendered by a `:::` directive under docs/api. | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| A directive pointing at an object documents that object. A directive pointing at a | ||||||||||||||||||||||||||||
| module documents the module's public members (its ``__all__`` if defined, else its | ||||||||||||||||||||||||||||
| public attributes) unless the directive sets ``members: false``.""" | ||||||||||||||||||||||||||||
| documented: set[int] = set() | ||||||||||||||||||||||||||||
| for md_file in sorted(API_DOCS_ROOT.rglob("*.md")): | ||||||||||||||||||||||||||||
| text = md_file.read_text(encoding="utf-8") | ||||||||||||||||||||||||||||
| for match in DIRECTIVE_RE.finditer(text): | ||||||||||||||||||||||||||||
| obj = resolve(match.group("target")) | ||||||||||||||||||||||||||||
| if obj is None: | ||||||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||||||
| documented.add(id(obj)) | ||||||||||||||||||||||||||||
| if isinstance(obj, ModuleType) and not members_disabled(text, match.start()): | ||||||||||||||||||||||||||||
| member_names = getattr(obj, "__all__", None) or [ | ||||||||||||||||||||||||||||
| name for name in dir(obj) if not name.startswith("_") | ||||||||||||||||||||||||||||
| ] | ||||||||||||||||||||||||||||
| for name in member_names: | ||||||||||||||||||||||||||||
| member = getattr(obj, name, None) | ||||||||||||||||||||||||||||
| if member is not None: | ||||||||||||||||||||||||||||
| documented.add(id(member)) | ||||||||||||||||||||||||||||
| return documented | ||||||||||||||||||||||||||||
|
Comment on lines
+79
to
+94
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is nearly indecipherable, so it may be annoying to maintain. I suggest refactoring this to take a pipeline approach, which I believe will not only make it more readable, but also allow for greater efficiency/speed (related to my comment on the A pipeline approach should allow you to avoid repeatedly splitting each file's text into individual lines every time a directive is encountered. |
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def find_undocumented_exports() -> list[str]: | ||||||||||||||||||||||||||||
| documented = documented_object_ids() | ||||||||||||||||||||||||||||
| missing = [] | ||||||||||||||||||||||||||||
| for name in zarr.__all__: | ||||||||||||||||||||||||||||
| if name in EXEMPT_EXPORTS: | ||||||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||||||
| if id(getattr(zarr, name)) not in documented: | ||||||||||||||||||||||||||||
| missing.append(name) | ||||||||||||||||||||||||||||
| return sorted(missing) | ||||||||||||||||||||||||||||
|
Comment on lines
+99
to
+105
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use the
Suggested change
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def main() -> None: | ||||||||||||||||||||||||||||
| if not API_DOCS_ROOT.exists(): | ||||||||||||||||||||||||||||
| raise FileNotFoundError(f"{API_DOCS_ROOT} does not exist.") | ||||||||||||||||||||||||||||
|
Comment on lines
+109
to
+110
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I mentioned in another comment, take the docs root as an argument, rather than hard-coding it. |
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| missing = find_undocumented_exports() | ||||||||||||||||||||||||||||
| if not missing: | ||||||||||||||||||||||||||||
| print(f"All {len(zarr.__all__)} public exports are documented.") | ||||||||||||||||||||||||||||
| return | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| lines = [ | ||||||||||||||||||||||||||||
| f"Found {len(missing)} public export(s) in zarr.__all__ missing from the API " | ||||||||||||||||||||||||||||
| "reference (docs/api/):\n", | ||||||||||||||||||||||||||||
| ] | ||||||||||||||||||||||||||||
| lines.extend(f" - zarr.{name}" for name in missing) | ||||||||||||||||||||||||||||
| lines.append( | ||||||||||||||||||||||||||||
| "\nAdd a `::: zarr.<name>` page under docs/api/zarr/ (and register it in " | ||||||||||||||||||||||||||||
| "mkdocs.yml and docs/api/zarr/index.md), or -- if the export is intentionally " | ||||||||||||||||||||||||||||
| "undocumented -- add it to EXEMPT_EXPORTS in this script with a reason." | ||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||
| raise ValueError("\n".join(lines)) | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| if __name__ == "__main__": | ||||||||||||||||||||||||||||
| main() | ||||||||||||||||||||||||||||
|
Comment on lines
+117
to
+131
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest following the pattern you used in |
||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suggest you make this a CLI argument instead of hard-coding it here. Then modify the github workflow to call this script with "docs/api" as the argument since it will be invoked from the root of the repo.