From c2bf11e6011b1e78024839e3810f79bf1dd5c1d5 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 01/12] [mypyc] feat: improve LoadLiteral annotation determinism --- mypyc/codegen/emit.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 4ef53296ef0d..df9d8c8143c1 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -208,6 +208,15 @@ def object_annotation(self, obj: object, line: str) -> str: if any(x in formatted for x in ("/*", "*/", "\0")): return "" + # make frozenset annotations deterministic + if formatted.startswith("frozenset({"): + frozenset_items = formatted[11:-2] + # if our frozenset contains another frozenset or a tuple, we will need better logic + # here, but this redimentary logic will still vastly improve codegen determinism. + if "(" not in frozenset_items: + sorted_items = ", ".join(sorted(frozenset_items.split(", "))) + formatted = "frozenset({" + sorted_items + "})" + if "\n" in formatted: first_line, rest = formatted.split("\n", maxsplit=1) comment_continued = textwrap.indent(rest, (line_width + 3) * " ") From d750ec12df4d4696288c3905eb9cbcec8c749afa Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 02/12] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index df9d8c8143c1..c8706f19e548 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -212,7 +212,7 @@ def object_annotation(self, obj: object, line: str) -> str: if formatted.startswith("frozenset({"): frozenset_items = formatted[11:-2] # if our frozenset contains another frozenset or a tuple, we will need better logic - # here, but this redimentary logic will still vastly improve codegen determinism. + # here, but this rudimentary logic will still vastly improve codegen determinism. if "(" not in frozenset_items: sorted_items = ", ".join(sorted(frozenset_items.split(", "))) formatted = "frozenset({" + sorted_items + "})" From 486f9f01b253c31bbd0b2b3faa227adc820c691a Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 03/12] new sort key --- mypyc/codegen/emit.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index c8706f19e548..e33b39c48095 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1235,3 +1235,11 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str # Multi-line result res.append(indent + ", ".join(current)) return "{\n " + ",\n ".join(res) + "\n" + indent + "}" + + +class _mypyc_safe_key(pprint._safe_key): + """A custom sort key implementation for pprint that makes the output deterministic + for all literal types supported by mypyc + """ + def __lt__(self, other: "_mypyc_safe_key") -> bool: + return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From c30a72735180daed5e5b141dc95fa6e35f58b444 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index e33b39c48095..10b831be5623 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1241,5 +1241,6 @@ class _mypyc_safe_key(pprint._safe_key): """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc """ - def __lt__(self, other: "_mypyc_safe_key") -> bool: + + def __lt__(self, other: _mypyc_safe_key) -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From d174f4eeb3538ef046c05a79c44a975bc8c321ee Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 05/12] use new safe key --- mypyc/codegen/emit.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 10b831be5623..ac4ccb665f48 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -204,19 +204,20 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) + + # temporarily override pprint._safe_key + default_safe_key = pprint._safe_key + pprint._safe_key = _mypyc_safe_key + + # pretty print the object formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + + # replace the _safe_key + pprint._safe_key = default_safe_key + if any(x in formatted for x in ("/*", "*/", "\0")): return "" - # make frozenset annotations deterministic - if formatted.startswith("frozenset({"): - frozenset_items = formatted[11:-2] - # if our frozenset contains another frozenset or a tuple, we will need better logic - # here, but this rudimentary logic will still vastly improve codegen determinism. - if "(" not in frozenset_items: - sorted_items = ", ".join(sorted(frozenset_items.split(", "))) - formatted = "frozenset({" + sorted_items + "})" - if "\n" in formatted: first_line, rest = formatted.split("\n", maxsplit=1) comment_continued = textwrap.indent(rest, (line_width + 3) * " ") @@ -1239,8 +1240,10 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str class _mypyc_safe_key(pprint._safe_key): """A custom sort key implementation for pprint that makes the output deterministic - for all literal types supported by mypyc - """ + for all literal types supported by mypyc. - def __lt__(self, other: _mypyc_safe_key) -> bool: + This is NOT safe for use as a sort key for other types, so we MUST replace the + original pprint._safe_key once we've pprinted our object. + """ + def __lt__(self, other: "_mypyc_safe_key") -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From 28c0399bd04939af69a1445de0d34ea845b89556 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 06/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index ac4ccb665f48..1d8b8ef5022f 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -214,7 +214,7 @@ def object_annotation(self, obj: object, line: str) -> str: # replace the _safe_key pprint._safe_key = default_safe_key - + if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1245,5 +1245,6 @@ class _mypyc_safe_key(pprint._safe_key): This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - def __lt__(self, other: "_mypyc_safe_key") -> bool: + + def __lt__(self, other: _mypyc_safe_key) -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From be0de786c953ed44bc8d964de28be1ac016321aa Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 07/12] Update emit.py --- mypyc/codegen/emit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 1d8b8ef5022f..849322ef26e9 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -206,14 +206,14 @@ def object_annotation(self, obj: object, line: str) -> str: line_width = self._indent + len(line) # temporarily override pprint._safe_key - default_safe_key = pprint._safe_key - pprint._safe_key = _mypyc_safe_key + default_safe_key = pprint._safe_key # type: ignore [attr-defined] + pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] # pretty print the object formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) # replace the _safe_key - pprint._safe_key = default_safe_key + pprint._safe_key = default_safe_key # type: ignore [attr-defined] if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): +def _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From d33d2d979cfab684058ac968f5afdd4cc8de15fb Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 08/12] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 849322ef26e9..394ae0d11644 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -def _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From 650d99d6b74e5433de399f7231d07cd7054dd3bb Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 09/12] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 394ae0d11644..f2bc07d6587f 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, name-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From da2278578d3431b5dcf8ba9f1360aa3e4bb31ccd Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 10/12] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index f2bc07d6587f..24df8e6ecf00 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, name-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [name-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From 96aa63e6818ce44da0d0da724523d37d2da185fc Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 11/12] refactor --- mypyc/codegen/emit.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 24df8e6ecf00..9db22cc75c79 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,13 +1238,11 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [name-defined, misc] +def _mypyc_safe_key(obj: object) -> str: """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - - def __lt__(self, other: _mypyc_safe_key) -> bool: - return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) + return str(type(obj)) + repr(obj) From 58e4b6752ff39e15dcb4634db3f94c46207831ab Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:32:40 -0400 Subject: [PATCH 12/12] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 9db22cc75c79..ebb152923ff6 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1245,4 +1245,4 @@ def _mypyc_safe_key(obj: object) -> str: This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - return str(type(obj)) + repr(obj) + return str(type(obj)) + pprint.pformat(obj)