diff --git a/sast-engine/tools/generate_stdlib_registry.py b/sast-engine/tools/generate_stdlib_registry.py index f50d25b0..08fe2758 100755 --- a/sast-engine/tools/generate_stdlib_registry.py +++ b/sast-engine/tools/generate_stdlib_registry.py @@ -6,6 +6,8 @@ JSON registries containing functions, classes, constants, and attributes with type information for static analysis. +Minimum Python version: 3.9 + Usage: # Generate ALL stdlib modules for Python 3.14 python3.14 generate_stdlib_registry.py --all --output-dir ./registries/python3.14/stdlib/v1/ @@ -23,7 +25,9 @@ import inspect import json import os +import pkgutil import sys +import sysconfig from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional @@ -357,12 +361,58 @@ def generate_manifest(output_dir: Path, python_version: tuple, modules: List[str def get_all_stdlib_modules() -> List[str]: - """Get list of all public stdlib modules.""" - if not hasattr(sys, "stdlib_module_names"): - raise RuntimeError("sys.stdlib_module_names not available (Python 3.10+ required)") + """Get list of all public stdlib modules. + + For Python 3.10+, uses sys.stdlib_module_names (PEP 585). + For Python 3.9, uses pkgutil and sysconfig to discover modules from: + - Built-in modules (C-level) + - Standard library path (pure Python modules) + - lib-dynload / DLLs (compiled extension modules) + - Platform-specific stdlib path + """ + # Python 3.10+ has sys.stdlib_module_names + if hasattr(sys, "stdlib_module_names"): + # Filter out private modules + return sorted([m for m in sys.stdlib_module_names if not m.startswith("_")]) + + # Fallback for Python 3.9: Use pkgutil to discover stdlib modules + stdlib_modules = set() + + # Add built-in modules + stdlib_modules.update(sys.builtin_module_names) + + # Get stdlib path + stdlib_path = sysconfig.get_path('stdlib') + if stdlib_path: + # Discover modules in stdlib path + try: + for importer, modname, ispkg in pkgutil.iter_modules([stdlib_path]): + stdlib_modules.add(modname) + except Exception as e: + print(f"Warning: Error discovering modules from {stdlib_path}: {e}", file=sys.stderr) + + # Also check lib-dynload subdirectory for compiled extension modules (Unix/Linux) + # and DLLs directory (Windows) + for dynload_dir_name in ['lib-dynload', 'DLLs']: + dynload_path = os.path.join(stdlib_path, dynload_dir_name) + if os.path.exists(dynload_path): + try: + for importer, modname, ispkg in pkgutil.iter_modules([dynload_path]): + stdlib_modules.add(modname) + except Exception as e: + print(f"Warning: Error discovering modules from {dynload_path}: {e}", file=sys.stderr) + + # Get platstdlib path (platform-specific stdlib modules) + platstdlib_path = sysconfig.get_path('platstdlib') + if platstdlib_path and platstdlib_path != stdlib_path: + try: + for importer, modname, ispkg in pkgutil.iter_modules([platstdlib_path]): + stdlib_modules.add(modname) + except Exception as e: + print(f"Warning: Error discovering modules from {platstdlib_path}: {e}", file=sys.stderr) - # Filter out private modules - return sorted([m for m in sys.stdlib_module_names if not m.startswith("_")]) + # Filter out private modules and return sorted list + return sorted([m for m in stdlib_modules if not m.startswith("_")]) def main(): @@ -400,9 +450,17 @@ def main(): args = parser.parse_args() + # Check Python version + if sys.version_info < (3, 9): + print(f"Error: Python 3.9 or higher is required. Current version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", file=sys.stderr) + return 1 + # Determine which modules to generate if args.all: modules = get_all_stdlib_modules() + detection_method = "sys.stdlib_module_names" if hasattr(sys, "stdlib_module_names") else "pkgutil fallback" + if args.verbose: + print(f"Using {detection_method} to discover stdlib modules") print(f"Generating registries for ALL {len(modules)} stdlib modules...") elif args.modules: modules = [m.strip() for m in args.modules.split(",")] @@ -474,8 +532,11 @@ def main(): # Platform-specific modules that are expected to fail on certain platforms WINDOWS_ONLY_MODULES = {'msvcrt', 'nt', 'winreg', 'winsound', '_winapi', 'msilib'} - UNIX_SPECIFIC_MODULES = {'nis'} # Network Information Service (may not be available on all systems) - PLATFORM_SPECIFIC_MODULES = WINDOWS_ONLY_MODULES | UNIX_SPECIFIC_MODULES + UNIX_SPECIFIC_MODULES = {'nis', 'grp', 'pwd', 'resource', 'syslog', 'termios', 'tty'} + MACOS_SPECIFIC_MODULES = {'_scproxy'} + # Modules that may not be available in all builds + OPTIONAL_MODULES = {'readline', 'dbm', 'gdbm', 'ossaudiodev', 'spwd'} + PLATFORM_SPECIFIC_MODULES = WINDOWS_ONLY_MODULES | UNIX_SPECIFIC_MODULES | MACOS_SPECIFIC_MODULES | OPTIONAL_MODULES # Check if any non-platform-specific modules failed unexpected_failures = [m for m in failed if m not in PLATFORM_SPECIFIC_MODULES]