From 64f0f4630c31aeee99b9a01bc637a03bc5ea890c Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 6 Jul 2025 11:06:59 +0100 Subject: [PATCH 1/2] [mypyc] Add optional logging of sampled operation trace to a file Logging executed ops is useful for performance analysis. For example, we can look for functions which perform many slow operations and try to optimize them. I've already used this successfully to implement several optimizations. A typical optimization that this helps with is replacing a generic Python function call operation with a native call. Compile using `MYPYC_LOG_TRACE=1 mypyc ...` to enable trace logging. The log will be written to `mypyc_trace.txt`. Roughly 1/1000 of ops of certain kinds (e.g. primitive calls) are logged. This can also be enabled by passing `log_trace=True` to `mypycify`. Compared to profiling, this logging data is frequency-oriented instead of CPU time oriented, and it's mostly helpful for micro-optimizations. It also needs some understanding of mypyc internals to be useful. It's not generally possible to reconstruct call stacks from the event data (but we could improve this). However, there is very little noise in the data and even small improvements can be visible. Logging isn't impacted by C compiler optimizations, so for a faster iteration loop, optimizations can be disabled. In the future this could possibly be used for profile-guided optimizations, but we'd probably need to adjust the data collection a bit for this use case. This is currently not documented and mostly intended for mypy or mypyc maintainers for now. Also no tests yet, since this is not a user-evel feature and it's disabled by default. Random example of log entries from mypy self check: ``` mypy.typeops.TypeVarExtractor._merge:1146:call_c:CPyList_Extend mypy.semanal.SemanticAnalyzer.lookup::primitive_op:list_get_item_unsafe mypy.expandtype.ExpandTypeVisitor.visit_type_var__TypeVisitor_glue:239:call:mypy.expandtype.ExpandTypeVisitor.visit_type_var mypy.applytype.apply_generic_arguments:111:call_c:CPy_NoErrOccurred mypy.indirection.TypeIndirectionVisitor.visit_callable_type__TypeVisitor_glue:118:call:mypy.indirection.TypeIndirectionVisitor.visit_callable_type mypy.fastparse.ASTConverter.visit_Call::primitive_op:buf_init_item mypy.semanal.SemanticAnalyzer.is_func_scope::primitive_op:int_eq mypy.meet.is_overlapping_types:397:call:mypy.meet._is_subtype_is_overlapping_types_obj mypy.types.CallableType.serialize:2287:call_c:CPyList_SetItemUnsafe mypy.checkexpr.ExpressionChecker.check_argument_types:2576:call_c:CPyList_SetItemUnsafe ``` For example, let's look at this line: ``` mypy.typeops.TypeVarExtractor._merge:1146:call_c:CPyList_Extend ``` In method `TypeVarExtractor._merge`, on line 1146 of `mypy/typeops.py`, the C primitive CPyList_Extend was called (corresponds to `list.extend`). I'll later add some documentation to the wiki or other developer docs and give examples of using and analyzing the data. --- mypyc/__main__.py | 20 +++++++-- mypyc/build.py | 10 +++++ mypyc/codegen/emitmodule.py | 4 ++ mypyc/lib-rt/CPy.h | 1 + mypyc/lib-rt/misc_ops.c | 28 ++++++++++++ mypyc/options.py | 5 +++ mypyc/primitives/misc_ops.py | 11 +++++ mypyc/transform/log_trace.py | 84 ++++++++++++++++++++++++++++++++++++ 8 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 mypyc/transform/log_trace.py diff --git a/mypyc/__main__.py b/mypyc/__main__.py index 653199e0fb55..9b3973710efa 100644 --- a/mypyc/__main__.py +++ b/mypyc/__main__.py @@ -23,8 +23,15 @@ from setuptools import setup from mypyc.build import mypycify -setup(name='mypyc_output', - ext_modules=mypycify({}, opt_level="{}", debug_level="{}", strict_dunder_typing={}), +setup( + name='mypyc_output', + ext_modules=mypycify( + {}, + opt_level="{}", + debug_level="{}", + strict_dunder_typing={}, + log_trace={}, + ), ) """ @@ -39,10 +46,17 @@ def main() -> None: opt_level = os.getenv("MYPYC_OPT_LEVEL", "3") debug_level = os.getenv("MYPYC_DEBUG_LEVEL", "1") strict_dunder_typing = bool(int(os.getenv("MYPYC_STRICT_DUNDER_TYPING", "0"))) + # If enabled, compiled code writes a sampled log of executed ops (or events) to + # mypyc_trace.txt. + log_trace = bool(int(os.getenv("MYPYC_LOG_TRACE", "0"))) setup_file = os.path.join(build_dir, "setup.py") with open(setup_file, "w") as f: - f.write(setup_format.format(sys.argv[1:], opt_level, debug_level, strict_dunder_typing)) + f.write( + setup_format.format( + sys.argv[1:], opt_level, debug_level, strict_dunder_typing, log_trace + ) + ) # We don't use run_setup (like we do in the test suite) because it throws # away the error code from distutils, and we don't care about the slight diff --git a/mypyc/build.py b/mypyc/build.py index ab7ba5393614..8ddbf4d22a27 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -485,6 +485,7 @@ def mypycify( include_runtime_files: bool | None = None, strict_dunder_typing: bool = False, group_name: str | None = None, + log_trace: bool = False, ) -> list[Extension]: """Main entry point to building using mypyc. @@ -531,6 +532,10 @@ def mypycify( the hash of module names. This is used for the names of the output C files and the shared library. This is only supported if there is a single group. [Experimental] + log_trace: If True, compiled code writes a trace log of events in + mypyc_trace.txt (derived from executed operations). This is + useful for performance analysis, such as analyzing which + primitive ops are used the most and on which lines. """ # Figure out our configuration @@ -543,6 +548,7 @@ def mypycify( include_runtime_files=include_runtime_files, strict_dunder_typing=strict_dunder_typing, group_name=group_name, + log_trace=log_trace, ) # Generate all the actual important C code @@ -583,6 +589,8 @@ def mypycify( # See https://github.com/mypyc/mypyc/issues/956 "-Wno-cpp", ] + if log_trace: + cflags.append("-DMYPYC_LOG_TRACE") elif compiler.compiler_type == "msvc": # msvc doesn't have levels, '/O2' is full and '/Od' is disable if opt_level == "0": @@ -607,6 +615,8 @@ def mypycify( # that we actually get the compilation speed and memory # use wins that multi-file mode is intended for. cflags += ["/GL-", "/wd9025"] # warning about overriding /GL + if log_trace: + cflags.append("/DMYPYC_LOG_TRACE") # If configured to (defaults to yes in multi-file mode), copy the # runtime library in. Otherwise it just gets #included to save on diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index 2a6f17cea5e2..7037409ff40b 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -65,6 +65,7 @@ from mypyc.transform.copy_propagation import do_copy_propagation from mypyc.transform.exceptions import insert_exception_handling from mypyc.transform.flag_elimination import do_flag_elimination +from mypyc.transform.log_trace import insert_event_trace_logging from mypyc.transform.lower import lower_ir from mypyc.transform.refcount import insert_ref_count_opcodes from mypyc.transform.spill import insert_spills @@ -253,6 +254,9 @@ def compile_scc_to_ir( if fn in env_user_functions: insert_spills(fn, env_user_functions[fn]) + if compiler_options.log_trace: + insert_event_trace_logging(fn, compiler_options) + # Switch to lower abstraction level IR. lower_ir(fn, compiler_options) # Perform optimizations. diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index dba84d44f363..5aa64d4763dd 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -928,6 +928,7 @@ PyObject *CPySingledispatch_RegisterFunction(PyObject *singledispatch_func, PyOb PyObject *CPy_GetAIter(PyObject *obj); PyObject *CPy_GetANext(PyObject *aiter); void CPy_SetTypeAliasTypeComputeFunction(PyObject *alias, PyObject *compute_value); +void CPyTrace_LogEvent(const char *location, const char *line, const char *op, const char *details); #ifdef __cplusplus } diff --git a/mypyc/lib-rt/misc_ops.c b/mypyc/lib-rt/misc_ops.c index d234138b2ff7..8aa25cc11e02 100644 --- a/mypyc/lib-rt/misc_ops.c +++ b/mypyc/lib-rt/misc_ops.c @@ -1030,6 +1030,34 @@ PyObject *CPy_GetANext(PyObject *aiter) return NULL; } +#ifdef MYPYC_LOG_TRACE + +// This is only compiled in if trace logging is enabled by user + +static int TraceCounter = 0; +static const int TRACE_EVERY_NTH = 1009; // Should be a prime number +#define TRACE_LOG_FILE_NAME "mypyc_trace.txt" +static FILE *TraceLogFile = NULL; + +// Log a tracing event on every Nth call +void CPyTrace_LogEvent(const char *location, const char *line, const char *op, const char *details) { + if (TraceLogFile == NULL) { + if ((TraceLogFile = fopen(TRACE_LOG_FILE_NAME, "w")) == NULL) { + fprintf(stderr, "error: Could not open trace file %s\n", TRACE_LOG_FILE_NAME); + abort(); + } + } + if (TraceCounter == 0) { + fprintf(TraceLogFile, "%s:%s:%s:%s\n", location, line, op, details); + } + TraceCounter++; + if (TraceCounter == TRACE_EVERY_NTH) { + TraceCounter = 0; + } +} + +#endif + #ifdef CPY_3_12_FEATURES // Copied from Python 3.12.3, since this struct is internal to CPython. It defines diff --git a/mypyc/options.py b/mypyc/options.py index 51114926f6b2..50c76d3c0656 100644 --- a/mypyc/options.py +++ b/mypyc/options.py @@ -16,6 +16,7 @@ def __init__( python_version: tuple[int, int] | None = None, strict_dunder_typing: bool = False, group_name: str | None = None, + log_trace: bool = False, ) -> None: self.strip_asserts = strip_asserts self.multi_file = multi_file @@ -45,3 +46,7 @@ def __init__( # library is generated (with shims). This can be used to make the output # file names more predictable. self.group_name = group_name + # If enabled, write a trace log of events based on executed operations to + # mypyc_trace.txt when compiled module is executed. This is useful for + # performance analysis. + self.log_trace = log_trace diff --git a/mypyc/primitives/misc_ops.py b/mypyc/primitives/misc_ops.py index 7494b46790ce..485f799354d8 100644 --- a/mypyc/primitives/misc_ops.py +++ b/mypyc/primitives/misc_ops.py @@ -9,6 +9,7 @@ c_int_rprimitive, c_pointer_rprimitive, c_pyssize_t_rprimitive, + cstring_rprimitive, dict_rprimitive, int_rprimitive, object_pointer_rprimitive, @@ -291,3 +292,13 @@ return_type=void_rtype, error_kind=ERR_NEVER, ) + +# Log an event to a trace log, which is written to a file during execution. +log_trace_event = custom_primitive_op( + name="log_trace_event", + c_function_name="CPyTrace_LogEvent", + # (fullname of function/location, line number, operation name, operation details) + arg_types=[cstring_rprimitive, cstring_rprimitive, cstring_rprimitive, cstring_rprimitive], + return_type=void_rtype, + error_kind=ERR_NEVER, +) diff --git a/mypyc/transform/log_trace.py b/mypyc/transform/log_trace.py new file mode 100644 index 000000000000..e2311aad7a1f --- /dev/null +++ b/mypyc/transform/log_trace.py @@ -0,0 +1,84 @@ +"""This optional pass adds logging of various executed operations. + +Some subset of the executed operations are logged to the mypyc_trace.txt file. + +This is useful for performance analysis. For example, it's possible +to identify how frequently various primitive functions are called, +and in which code locations they are called. +""" + +from __future__ import annotations + +from mypyc.ir.func_ir import FuncIR +from mypyc.ir.ops import Call, CallC, CString, LoadLiteral, LoadStatic, Op, PrimitiveOp, Value +from mypyc.irbuild.ll_builder import LowLevelIRBuilder +from mypyc.options import CompilerOptions +from mypyc.primitives.misc_ops import log_trace_event +from mypyc.transform.ir_transform import IRTransform + + +def insert_event_trace_logging(fn: FuncIR, options: CompilerOptions) -> None: + builder = LowLevelIRBuilder(None, options) + transform = LogTraceEventTransform(builder, fn.decl.fullname) + transform.transform_blocks(fn.blocks) + fn.blocks = builder.blocks + + +def get_load_global_name(op: CallC) -> str | None: + name = op.function_name + if name == "CPyDict_GetItem": + arg = op.args[0] + if ( + isinstance(arg, LoadStatic) + and arg.namespace == "static" + and arg.identifier == "globals" + and isinstance(op.args[1], LoadLiteral) + ): + return str(op.args[1].value) + return None + + +class LogTraceEventTransform(IRTransform): + def __init__(self, builder: LowLevelIRBuilder, fullname: str) -> None: + super().__init__(builder) + self.fullname = fullname.encode("utf-8") + + def visit_call(self, op: Call) -> Value: + # TODO: Use different op name when constructing an instance + return self.log(op, "call", op.fn.fullname) + + def visit_primitive_op(self, op: PrimitiveOp) -> Value: + self.log(op, "primitive_op", op.desc.name) + return self.add(op) + + def visit_call_c(self, op: CallC) -> Value: + if global_name := get_load_global_name(op): + return self.log(op, "globals_dict_get_item", global_name) + + func_name = op.function_name + if func_name == "PyObject_Vectorcall" and isinstance(op.args[0], CallC): + if global_name := get_load_global_name(op.args[0]): + return self.log(op, "python_call_global", global_name) + elif func_name == "CPyObject_GetAttr" and isinstance(op.args[1], LoadLiteral): + return self.log(op, "python_get_attr", str(op.args[1].value)) + elif func_name == "PyObject_VectorcallMethod" and isinstance(op.args[0], LoadLiteral): + return self.log(op, "python_call_method", str(op.args[0].value)) + + return self.log(op, "call_c", func_name) + + def log(self, op: Op, name: str, details: str) -> Value: + if op.line >= 0: + line_str = str(op.line) + else: + line_str = "" + self.builder.primitive_op( + log_trace_event, + [ + CString(self.fullname), + CString(line_str.encode("ascii")), + CString(name.encode("utf-8")), + CString(details.encode("utf-8")), + ], + op.line, + ) + return self.add(op) From 63a5e71cbd502ceb31b6c97b87566d1a6732fffe Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 16 Jul 2025 13:13:43 +0100 Subject: [PATCH 2/2] Fix duplicate ops --- mypyc/transform/log_trace.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mypyc/transform/log_trace.py b/mypyc/transform/log_trace.py index e2311aad7a1f..5b20940c66bb 100644 --- a/mypyc/transform/log_trace.py +++ b/mypyc/transform/log_trace.py @@ -48,8 +48,7 @@ def visit_call(self, op: Call) -> Value: return self.log(op, "call", op.fn.fullname) def visit_primitive_op(self, op: PrimitiveOp) -> Value: - self.log(op, "primitive_op", op.desc.name) - return self.add(op) + return self.log(op, "primitive_op", op.desc.name) def visit_call_c(self, op: CallC) -> Value: if global_name := get_load_global_name(op):