Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ Misc/externals.spdx.json @sethmlarson
Misc/sbom.spdx.json @sethmlarson
Tools/build/generate_sbom.py @sethmlarson

# C API Documentation Checks
Tools/check-c-api-docs/ @ZeroIntensity


# ----------------------------------------------------------------------------
# Platform Support
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ jobs:
- name: Check for unsupported C global variables
if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME
run: make check-c-globals
- name: Check for undocumented C APIs
if: github.event_name == 'pull_request'
run: make check-c-api-docs


build-windows:
name: >-
Expand Down
5 changes: 5 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -3313,6 +3313,11 @@ check-c-globals:
--format summary \
--traceback

# Check for undocumented C APIs.
.PHONY: check-c-globals
check-c-api-docs:
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py

# Find files with funny names
.PHONY: funny
funny:
Expand Down
100 changes: 100 additions & 0 deletions Tools/check-c-api-docs/ignored_c_api.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# descrobject.h
PyClassMethodDescr_Type
PyDictProxy_Type
PyGetSetDescr_Type
PyMemberDescr_Type
PyMethodDescr_Type
PyWrapperDescr_Type
# pydtrace_probes.h
PyDTrace_AUDIT
PyDTrace_FUNCTION_ENTRY
PyDTrace_FUNCTION_RETURN
PyDTrace_GC_DONE
PyDTrace_GC_START
PyDTrace_IMPORT_FIND_LOAD_DONE
PyDTrace_IMPORT_FIND_LOAD_START
PyDTrace_INSTANCE_DELETE_DONE
PyDTrace_INSTANCE_DELETE_START
PyDTrace_INSTANCE_NEW_DONE
PyDTrace_INSTANCE_NEW_START
PyDTrace_LINE
# fileobject.h
Py_FileSystemDefaultEncodeErrors
Py_FileSystemDefaultEncoding
Py_HasFileSystemDefaultEncoding
Py_UTF8Mode
# pyhash.h
Py_HASH_EXTERNAL
# exports.h
PyAPI_DATA
Py_EXPORTED_SYMBOL
Py_IMPORTED_SYMBOL
Py_LOCAL_SYMBOL
# modsupport.h
PyABIInfo_FREETHREADING_AGNOSTIC
# moduleobject.h
PyModuleDef_Type
# object.h
Py_INVALID_SIZE
Py_TPFLAGS_HAVE_VERSION_TAG
Py_TPFLAGS_INLINE_VALUES
Py_TPFLAGS_IS_ABSTRACT
# pyexpat.h
PyExpat_CAPI_MAGIC
PyExpat_CAPSULE_NAME
# pyport.h
Py_ALIGNED
Py_ARITHMETIC_RIGHT_SHIFT
Py_CAN_START_THREADS
Py_FORCE_EXPANSION
Py_GCC_ATTRIBUTE
Py_LL
Py_SAFE_DOWNCAST
Py_ULL
Py_VA_COPY
# unicodeobject.h
Py_UNICODE_SIZE
# cpython/methodobject.h
PyCFunction_GET_CLASS
# cpython/compile.h
PyCF_ALLOW_INCOMPLETE_INPUT
PyCF_COMPILE_MASK
PyCF_DONT_IMPLY_DEDENT
PyCF_IGNORE_COOKIE
PyCF_MASK
PyCF_MASK_OBSOLETE
PyCF_SOURCE_IS_UTF8
# cpython/descrobject.h
PyDescr_COMMON
PyDescr_NAME
PyDescr_TYPE
PyWrapperFlag_KEYWORDS
# cpython/fileobject.h
PyFile_NewStdPrinter
PyStdPrinter_Type
Py_UniversalNewlineFgets
# cpython/setobject.h
PySet_MINSIZE
# cpython/ceval.h
PyUnstable_CopyPerfMapFile
PyUnstable_PerfTrampoline_CompileCode
PyUnstable_PerfTrampoline_SetPersistAfterFork
# cpython/genobject.h
PyAsyncGenASend_CheckExact
# cpython/longintrepr.h
PyLong_BASE
PyLong_MASK
PyLong_SHIFT
# cpython/pyerrors.h
PyException_HEAD
# cpython/pyframe.h
PyUnstable_EXECUTABLE_KINDS
PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION
PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR
PyUnstable_EXECUTABLE_KIND_PY_FUNCTION
PyUnstable_EXECUTABLE_KIND_SKIP
# cpython/pylifecycle.h
Py_FrozenMain
# cpython/unicodeobject.h
PyUnicode_IS_COMPACT
PyUnicode_IS_COMPACT_ASCII
166 changes: 166 additions & 0 deletions Tools/check-c-api-docs/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import re
from pathlib import Path
import sys
import _colorize

SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")

MISTAKE = """\
If this is a mistake and this script should not be failing, please create an
issue and tag Peter (@ZeroIntensity) on it.\
"""

FOUND_UNDOCUMENTED = f"""\
Found some undocumented C API(s)!
Python requires documentation on all public C API symbols, macros, and types.
If these API(s) were not meant to be public, please prefix them with a
leading underscore (_PySomething_API) or move them to the internal C API
(pycore_*.h files).
In exceptional cases, certain functions can be ignored by adding them to
Tools/c-api-docs-check/ignored_c_api.txt
{MISTAKE}\
"""

FOUND_IGNORED_DOCUMENTED = f"""\
Some C API(s) were listed in Tools/c-api-docs-check/ignored_c_api.txt, but
they were found in the documentation. To fix this, simply update ignored_c_api.txt
accordingly.
{MISTAKE}\
"""

_CPYTHON = Path(__file__).parent.parent.parent
INCLUDE = _CPYTHON / "Include"
C_API_DOCS = _CPYTHON / "Doc" / "c-api"
IGNORED = (
(_CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt")
.read_text()
.split("\n")
)

for index, line in enumerate(IGNORED):
if line.startswith("#"):
IGNORED.pop(index)


def is_documented(name: str) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think is_documented is a little different from is present. Why not improve the check to include verifying it's a doc declaration, i.e. if it's in the text check it's on the same line as the c:func:: etc.?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that, but I decided that it was a case of "practicality beats purity". Running Sphinx or using a regular expression will make this significantly slower, and probably won't save us much in practice.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a compromise, we could go ever simpler, just checking if :: and .. are in the same line as the API name.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, that may work, but the list in #141004 was created using a full-text search, not a search for :: or .., so there are probably some APIs that still need work. Let's finish the list in #141004 first, and then we can look into finding more edge cases.

"""
Is a name present in the C API documentation?
"""
for path in C_API_DOCS.iterdir():
if path.is_dir():
continue
if path.suffix != ".rst":
continue

text = path.read_text(encoding="utf-8")
if name in text:
return True

return False


def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
"""
Scan a header file for C API functions.
"""
undocumented: list[str] = []
documented_ignored: list[str] = []
colors = _colorize.get_colors()

def check_for_name(name: str) -> None:
documented = is_documented(name)
if documented and (name in IGNORED):
documented_ignored.append(name)
elif not documented and (name not in IGNORED):
undocumented.append(name)

for function in SIMPLE_FUNCTION_REGEX.finditer(text):
name = function.group(2)
if not name.startswith("Py"):
continue

check_for_name(name)

for macro in SIMPLE_MACRO_REGEX.finditer(text):
name = macro.group(1)
if not name.startswith("Py"):
continue

if "(" in name:
name = name[: name.index("(")]

check_for_name(name)

for inline in SIMPLE_INLINE_REGEX.finditer(text):
name = inline.group(2)
if not name.startswith("Py"):
continue

check_for_name(name)

for data in SIMPLE_DATA_REGEX.finditer(text):
name = data.group(1)
if not name.startswith("Py"):
continue

check_for_name(name)

# Remove duplicates and sort alphabetically to keep the output non-deterministic
undocumented = list(set(undocumented))
undocumented.sort()

if undocumented or documented_ignored:
print(f"{filename} {colors.RED}BAD{colors.RESET}")
for name in undocumented:
print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}")
for name in documented_ignored:
print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} {name}")
else:
print(f"{filename} {colors.GREEN}OK{colors.RESET}")

return undocumented, documented_ignored


def main() -> None:
print("Scanning for undocumented C API functions...")
files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
all_missing: list[str] = []
all_found_ignored: list[str] = []

for file in files:
if file.is_dir():
continue
assert file.exists()
text = file.read_text(encoding="utf-8")
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
all_found_ignored += ignored
all_missing += missing

fail = False
to_check = [
(all_missing, "missing", FOUND_UNDOCUMENTED),
(all_found_ignored, "documented but ignored", FOUND_IGNORED_DOCUMENTED),
]
for name_list, what, message in to_check:
if not name_list:
continue

s = "s" if len(name_list) != 1 else ""
print(f"-- {len(name_list)} {what} C API{s} --")
for name in all_missing:
print(f" - {name}")
print(message)
fail = True

sys.exit(1 if fail else 0)


if __name__ == "__main__":
main()
Loading