From 1ec17a550dde91f7ac1e84683023b9e6e4ec1ad0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 31 Jul 2025 12:11:00 +0100 Subject: [PATCH 1/3] Don't mark uop as escaping if the escaping call is on an exit branch --- Include/internal/pycore_opcode_metadata.h | 6 +- Include/internal/pycore_uop_metadata.h | 6 +- Tools/cases_generator/analyzer.py | 143 +++++++++++++++------- 3 files changed, 106 insertions(+), 49 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..9f626cecd13630 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1218,14 +1218,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG }, [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SMALL_INT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_SPECIAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1250,7 +1250,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [POP_TOP] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, - [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESERVED] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ff7e800aa9bb1a..f8e27e2f33250f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -24,7 +24,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, - [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, @@ -148,7 +148,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_LOCALS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_LOCALS] = HAS_ERROR_FLAG, [_LOAD_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_NULL_CONDITIONAL] = HAS_ARG_FLAG, @@ -308,7 +308,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, - [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_NOT_NONE_POP] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, [_JUMP_TO_TOP] = 0, [_SET_IP] = 0, diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6466d2615cd14e..d8b8726a95fad8 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -5,7 +5,7 @@ import re from typing import Optional, Callable -from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt +from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt, ForStmt, MacroIfStmt @dataclass class EscapingCall: @@ -723,53 +723,57 @@ def visit(stmt: Stmt) -> None: if error is not None: raise analysis_error(f"Escaping call '{error.text} in condition", error) +def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[Stmt, EscapingCall]): + tokens = stmt.contents + for idx, tkn in enumerate(tokens): + try: + next_tkn = tokens[idx+1] + except IndexError: + break + if next_tkn.kind != lexer.LPAREN: + continue + if tkn.kind == lexer.IDENTIFIER: + if tkn.text.upper() == tkn.text: + # simple macro + continue + #if not tkn.text.startswith(("Py", "_Py", "monitor")): + # continue + if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): + # Optimize functions + continue + if tkn.text.endswith("Check"): + continue + if tkn.text.startswith("Py_Is"): + continue + if tkn.text.endswith("CheckExact"): + continue + if tkn.text in NON_ESCAPING_FUNCTIONS: + continue + elif tkn.kind == "RPAREN": + prev = tokens[idx-1] + if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": + #cast + continue + elif tkn.kind != "RBRACKET": + continue + if tkn.text in ("PyStackRef_CLOSE", "PyStackRef_XCLOSE"): + if len(tokens) <= idx+2: + raise analysis_error("Unexpected end of file", next_tkn) + kills = tokens[idx+2] + if kills.kind != "IDENTIFIER": + raise analysis_error(f"Expected identifier, got '{kills.text}'", kills) + else: + kills = None + result[stmt] = EscapingCall(stmt, tkn, kills) + + def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingCall]: result: dict[SimpleStmt, EscapingCall] = {} def visit(stmt: Stmt) -> None: if not isinstance(stmt, SimpleStmt): return - tokens = stmt.contents - for idx, tkn in enumerate(tokens): - try: - next_tkn = tokens[idx+1] - except IndexError: - break - if next_tkn.kind != lexer.LPAREN: - continue - if tkn.kind == lexer.IDENTIFIER: - if tkn.text.upper() == tkn.text: - # simple macro - continue - #if not tkn.text.startswith(("Py", "_Py", "monitor")): - # continue - if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): - # Optimize functions - continue - if tkn.text.endswith("Check"): - continue - if tkn.text.startswith("Py_Is"): - continue - if tkn.text.endswith("CheckExact"): - continue - if tkn.text in NON_ESCAPING_FUNCTIONS: - continue - elif tkn.kind == "RPAREN": - prev = tokens[idx-1] - if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": - #cast - continue - elif tkn.kind != "RBRACKET": - continue - if tkn.text in ("PyStackRef_CLOSE", "PyStackRef_XCLOSE"): - if len(tokens) <= idx+2: - raise analysis_error("Unexpected end of file", next_tkn) - kills = tokens[idx+2] - if kills.kind != "IDENTIFIER": - raise analysis_error(f"Expected identifier, got '{kills.text}'", kills) - else: - kills = None - result[stmt] = EscapingCall(stmt, tkn, kills) + escaping_call_in_simple_stmt(stmt, result) instr.block.accept(visit) check_escaping_calls(instr, result) @@ -822,6 +826,59 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: ) +def op_escapes(op: parser.CodeDef): + + def is_simple_exit(stmt: Stmt): + if not isinstance(stmt, SimpleStmt): + return False + tokens = stmt.contents + if len(tokens) < 4: + return False + return ( + tokens[0].text in ("ERROR_IF", "DEOPT_IF", "EXIT_IF") + and + tokens[1].text == "(" + and + tokens[2].text in ("true", "1") + and + tokens[3].text == ")" + ) + + def escapes_list(stmts: list[Stmt]): + if not stmts: + return False + if is_simple_exit(stmts[-1]): + return False + for stmt in stmts: + if escapes(stmt): + return True + return False + + def escapes(stmt: Stmt) -> None: + if isinstance(stmt, BlockStmt): + return escapes_list(stmt.body) + elif isinstance(stmt, SimpleStmt): + for tkn in stmt.contents: + if tkn.text == "DECREF_INPUTS": + return True + d: dict[Stmt, EscapingCall] = {} + escaping_call_in_simple_stmt(stmt, d) + return bool(d) + elif isinstance(stmt, IfStmt): + if stmt.else_body and escapes(stmt.else_body): + return True + return escapes(stmt.body) + elif isinstance(stmt, MacroIfStmt): + if stmt.else_body and escapes_list(stmt.else_body): + return True + return escapes_list(stmt.body) + elif isinstance(stmt, ForStmt): + return escapes(stmt.body) + elif isinstance(stmt, WhileStmt): + return escapes(stmt.body) + + return escapes(op.block) + def compute_properties(op: parser.CodeDef) -> Properties: escaping_calls = find_escaping_api_calls(op) has_free = ( @@ -843,7 +900,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) - escapes = bool(escaping_calls) or variable_used(op, "DECREF_INPUTS") + escapes = op_escapes(op) pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations return Properties( From 3dce74007ab775f26b62e47589f3e12eccee75a8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 31 Jul 2025 12:57:15 +0100 Subject: [PATCH 2/3] Fix up type annotations --- Tools/cases_generator/analyzer.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index d8b8726a95fad8..0bb4e4f6cd0bd0 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -723,7 +723,7 @@ def visit(stmt: Stmt) -> None: if error is not None: raise analysis_error(f"Escaping call '{error.text} in condition", error) -def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[Stmt, EscapingCall]): +def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[SimpleStmt, EscapingCall]) -> None: tokens = stmt.contents for idx, tkn in enumerate(tokens): try: @@ -826,9 +826,9 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: ) -def op_escapes(op: parser.CodeDef): +def op_escapes(op: parser.CodeDef) -> bool: - def is_simple_exit(stmt: Stmt): + def is_simple_exit(stmt: Stmt) -> bool: if not isinstance(stmt, SimpleStmt): return False tokens = stmt.contents @@ -844,7 +844,7 @@ def is_simple_exit(stmt: Stmt): tokens[3].text == ")" ) - def escapes_list(stmts: list[Stmt]): + def escapes_list(stmts: list[Stmt]) -> bool: if not stmts: return False if is_simple_exit(stmts[-1]): @@ -854,14 +854,14 @@ def escapes_list(stmts: list[Stmt]): return True return False - def escapes(stmt: Stmt) -> None: + def escapes(stmt: Stmt) -> bool: if isinstance(stmt, BlockStmt): return escapes_list(stmt.body) elif isinstance(stmt, SimpleStmt): for tkn in stmt.contents: if tkn.text == "DECREF_INPUTS": return True - d: dict[Stmt, EscapingCall] = {} + d: dict[SimpleStmt, EscapingCall] = {} escaping_call_in_simple_stmt(stmt, d) return bool(d) elif isinstance(stmt, IfStmt): @@ -876,6 +876,8 @@ def escapes(stmt: Stmt) -> None: return escapes(stmt.body) elif isinstance(stmt, WhileStmt): return escapes(stmt.body) + else: + assert False, "Unexpected statement type" return escapes(op.block) From 921797f08bc039d9857bf3479f3612ab713653df Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 2 Aug 2025 09:28:19 +0100 Subject: [PATCH 3/3] Flatten functino hierarchy --- Tools/cases_generator/analyzer.py | 97 +++++++++++++++---------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 0bb4e4f6cd0bd0..a3c38ecdea3e1f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -826,60 +826,59 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: ) -def op_escapes(op: parser.CodeDef) -> bool: +def stmt_is_simple_exit(stmt: Stmt) -> bool: + if not isinstance(stmt, SimpleStmt): + return False + tokens = stmt.contents + if len(tokens) < 4: + return False + return ( + tokens[0].text in ("ERROR_IF", "DEOPT_IF", "EXIT_IF") + and + tokens[1].text == "(" + and + tokens[2].text in ("true", "1") + and + tokens[3].text == ")" + ) - def is_simple_exit(stmt: Stmt) -> bool: - if not isinstance(stmt, SimpleStmt): - return False - tokens = stmt.contents - if len(tokens) < 4: - return False - return ( - tokens[0].text in ("ERROR_IF", "DEOPT_IF", "EXIT_IF") - and - tokens[1].text == "(" - and - tokens[2].text in ("true", "1") - and - tokens[3].text == ")" - ) - def escapes_list(stmts: list[Stmt]) -> bool: - if not stmts: - return False - if is_simple_exit(stmts[-1]): - return False - for stmt in stmts: - if escapes(stmt): - return True +def stmt_list_escapes(stmts: list[Stmt]) -> bool: + if not stmts: + return False + if stmt_is_simple_exit(stmts[-1]): return False + for stmt in stmts: + if stmt_escapes(stmt): + return True + return False - def escapes(stmt: Stmt) -> bool: - if isinstance(stmt, BlockStmt): - return escapes_list(stmt.body) - elif isinstance(stmt, SimpleStmt): - for tkn in stmt.contents: - if tkn.text == "DECREF_INPUTS": - return True - d: dict[SimpleStmt, EscapingCall] = {} - escaping_call_in_simple_stmt(stmt, d) - return bool(d) - elif isinstance(stmt, IfStmt): - if stmt.else_body and escapes(stmt.else_body): - return True - return escapes(stmt.body) - elif isinstance(stmt, MacroIfStmt): - if stmt.else_body and escapes_list(stmt.else_body): + +def stmt_escapes(stmt: Stmt) -> bool: + if isinstance(stmt, BlockStmt): + return stmt_list_escapes(stmt.body) + elif isinstance(stmt, SimpleStmt): + for tkn in stmt.contents: + if tkn.text == "DECREF_INPUTS": return True - return escapes_list(stmt.body) - elif isinstance(stmt, ForStmt): - return escapes(stmt.body) - elif isinstance(stmt, WhileStmt): - return escapes(stmt.body) - else: - assert False, "Unexpected statement type" + d: dict[SimpleStmt, EscapingCall] = {} + escaping_call_in_simple_stmt(stmt, d) + return bool(d) + elif isinstance(stmt, IfStmt): + if stmt.else_body and stmt_escapes(stmt.else_body): + return True + return stmt_escapes(stmt.body) + elif isinstance(stmt, MacroIfStmt): + if stmt.else_body and stmt_list_escapes(stmt.else_body): + return True + return stmt_list_escapes(stmt.body) + elif isinstance(stmt, ForStmt): + return stmt_escapes(stmt.body) + elif isinstance(stmt, WhileStmt): + return stmt_escapes(stmt.body) + else: + assert False, "Unexpected statement type" - return escapes(op.block) def compute_properties(op: parser.CodeDef) -> Properties: escaping_calls = find_escaping_api_calls(op) @@ -902,7 +901,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) - escapes = op_escapes(op) + escapes = stmt_escapes(op.block) pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations return Properties(