Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 68 additions & 7 deletions sast-engine/tools/generate_stdlib_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
JSON registries containing functions, classes, constants, and attributes
with type information for static analysis.

Minimum Python version: 3.9

Usage:
# Generate ALL stdlib modules for Python 3.14
python3.14 generate_stdlib_registry.py --all --output-dir ./registries/python3.14/stdlib/v1/
Expand All @@ -23,7 +25,9 @@
import inspect
import json
import os
import pkgutil
import sys
import sysconfig
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
Expand Down Expand Up @@ -357,12 +361,58 @@ def generate_manifest(output_dir: Path, python_version: tuple, modules: List[str


def get_all_stdlib_modules() -> List[str]:
"""Get list of all public stdlib modules."""
if not hasattr(sys, "stdlib_module_names"):
raise RuntimeError("sys.stdlib_module_names not available (Python 3.10+ required)")
"""Get list of all public stdlib modules.

For Python 3.10+, uses sys.stdlib_module_names (PEP 585).
For Python 3.9, uses pkgutil and sysconfig to discover modules from:
- Built-in modules (C-level)
- Standard library path (pure Python modules)
- lib-dynload / DLLs (compiled extension modules)
- Platform-specific stdlib path
"""
# Python 3.10+ has sys.stdlib_module_names
if hasattr(sys, "stdlib_module_names"):
# Filter out private modules
return sorted([m for m in sys.stdlib_module_names if not m.startswith("_")])

# Fallback for Python 3.9: Use pkgutil to discover stdlib modules
stdlib_modules = set()

# Add built-in modules
stdlib_modules.update(sys.builtin_module_names)

# Get stdlib path
stdlib_path = sysconfig.get_path('stdlib')
if stdlib_path:
# Discover modules in stdlib path
try:
for importer, modname, ispkg in pkgutil.iter_modules([stdlib_path]):
stdlib_modules.add(modname)
except Exception as e:
print(f"Warning: Error discovering modules from {stdlib_path}: {e}", file=sys.stderr)

# Also check lib-dynload subdirectory for compiled extension modules (Unix/Linux)
# and DLLs directory (Windows)
for dynload_dir_name in ['lib-dynload', 'DLLs']:
dynload_path = os.path.join(stdlib_path, dynload_dir_name)
if os.path.exists(dynload_path):
try:
for importer, modname, ispkg in pkgutil.iter_modules([dynload_path]):
stdlib_modules.add(modname)
except Exception as e:
print(f"Warning: Error discovering modules from {dynload_path}: {e}", file=sys.stderr)

# Get platstdlib path (platform-specific stdlib modules)
platstdlib_path = sysconfig.get_path('platstdlib')
if platstdlib_path and platstdlib_path != stdlib_path:
try:
for importer, modname, ispkg in pkgutil.iter_modules([platstdlib_path]):
stdlib_modules.add(modname)
except Exception as e:
print(f"Warning: Error discovering modules from {platstdlib_path}: {e}", file=sys.stderr)

# Filter out private modules
return sorted([m for m in sys.stdlib_module_names if not m.startswith("_")])
# Filter out private modules and return sorted list
return sorted([m for m in stdlib_modules if not m.startswith("_")])


def main():
Expand Down Expand Up @@ -400,9 +450,17 @@ def main():

args = parser.parse_args()

# Check Python version
if sys.version_info < (3, 9):
print(f"Error: Python 3.9 or higher is required. Current version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", file=sys.stderr)
return 1

# Determine which modules to generate
if args.all:
modules = get_all_stdlib_modules()
detection_method = "sys.stdlib_module_names" if hasattr(sys, "stdlib_module_names") else "pkgutil fallback"
if args.verbose:
print(f"Using {detection_method} to discover stdlib modules")
print(f"Generating registries for ALL {len(modules)} stdlib modules...")
elif args.modules:
modules = [m.strip() for m in args.modules.split(",")]
Expand Down Expand Up @@ -474,8 +532,11 @@ def main():

# Platform-specific modules that are expected to fail on certain platforms
WINDOWS_ONLY_MODULES = {'msvcrt', 'nt', 'winreg', 'winsound', '_winapi', 'msilib'}
UNIX_SPECIFIC_MODULES = {'nis'} # Network Information Service (may not be available on all systems)
PLATFORM_SPECIFIC_MODULES = WINDOWS_ONLY_MODULES | UNIX_SPECIFIC_MODULES
UNIX_SPECIFIC_MODULES = {'nis', 'grp', 'pwd', 'resource', 'syslog', 'termios', 'tty'}
MACOS_SPECIFIC_MODULES = {'_scproxy'}
# Modules that may not be available in all builds
OPTIONAL_MODULES = {'readline', 'dbm', 'gdbm', 'ossaudiodev', 'spwd'}
PLATFORM_SPECIFIC_MODULES = WINDOWS_ONLY_MODULES | UNIX_SPECIFIC_MODULES | MACOS_SPECIFIC_MODULES | OPTIONAL_MODULES

# Check if any non-platform-specific modules failed
unexpected_failures = [m for m in failed if m not in PLATFORM_SPECIFIC_MODULES]
Expand Down
Loading