-
-
Notifications
You must be signed in to change notification settings - Fork 3.1k
[mypyc] feat: further optimize equality check with string literals [1/1] #19883
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 28 commits
f095d5f
c3c04a5
6528245
d781dde
2580b10
fea651d
9ed369c
fb21187
a0d36ec
577cd74
9bec581
bdee878
45f4885
613f644
e99864d
0014212
4f8786f
996c4d6
4054151
f22d8ac
b95facd
e27c716
8a66a0f
e3045d5
b5a4bf3
28c6510
b957e14
4d491c1
ef0a46e
60c093f
e494f0c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,8 @@ | |
|
|
||
| import sys | ||
| from collections.abc import Sequence | ||
| from typing import Callable, Final, Optional | ||
| from typing import Callable, Final, Optional, cast | ||
| from typing_extensions import TypeGuard | ||
|
|
||
| from mypy.argmap import map_actuals_to_formals | ||
| from mypy.nodes import ARG_POS, ARG_STAR, ARG_STAR2, ArgKind | ||
|
|
@@ -185,6 +186,7 @@ | |
| from mypyc.primitives.str_ops import ( | ||
| str_check_if_true, | ||
| str_eq, | ||
| str_eq_literal, | ||
| str_ssize_t_size_op, | ||
| unicode_compare, | ||
| ) | ||
|
|
@@ -1551,9 +1553,33 @@ def check_tagged_short_int(self, val: Value, line: int, negated: bool = False) - | |
| def compare_strings(self, lhs: Value, rhs: Value, op: str, line: int) -> Value: | ||
| """Compare two strings""" | ||
| if op == "==": | ||
| # We can specialize this case if one or both values are string literals | ||
| literal_fastpath = False | ||
|
|
||
| def is_string_literal(value: Value) -> TypeGuard[LoadLiteral]: | ||
| return isinstance(value, LoadLiteral) and is_str_rprimitive(value.type) | ||
|
|
||
| if is_string_literal(lhs): | ||
| if is_string_literal(rhs): | ||
| # we can optimize out the check entirely in some constant-folded cases | ||
| return self.true() if lhs.value == rhs.value else self.false() | ||
|
|
||
| # if lhs argument is string literal, switch sides to match specializer C api | ||
| lhs, rhs = rhs, lhs | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add irbuild test case for string literal as the lhs.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added both |
||
| literal_fastpath = True | ||
| elif is_string_literal(rhs): | ||
| literal_fastpath = True | ||
|
|
||
| if literal_fastpath: | ||
| literal_string = cast(str, cast(LoadLiteral, rhs).value) | ||
| literal_length = Integer(len(literal_string), c_pyssize_t_rprimitive, line) | ||
| return self.primitive_op(str_eq_literal, [lhs, rhs, literal_length], line) | ||
|
|
||
| return self.primitive_op(str_eq, [lhs, rhs], line) | ||
|
|
||
| elif op == "!=": | ||
| eq = self.primitive_op(str_eq, [lhs, rhs], line) | ||
| # perform a standard equality check, then negate | ||
| eq = self.compare_strings(lhs, rhs, "==", line) | ||
| return self.add(ComparisonOp(eq, self.false(), ComparisonOp.EQ, line)) | ||
|
|
||
| # TODO: modify 'str' to use same interface as 'compare_bytes' as it would avoid | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,20 +64,33 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len) | |
| #undef BLOOM_UPDATE | ||
| } | ||
|
|
||
| // Adapted from CPython 3.13.1 (_PyUnicode_Equal) | ||
| char CPyStr_Equal(PyObject *str1, PyObject *str2) { | ||
| if (str1 == str2) { | ||
| return 1; | ||
| } | ||
| Py_ssize_t len = PyUnicode_GET_LENGTH(str1); | ||
| if (PyUnicode_GET_LENGTH(str2) != len) | ||
| static inline char _CPyStr_Equal_NoIdentCheck(PyObject *str1, PyObject *str2, Py_ssize_t str2_length) { | ||
| // This helper function only exists to deduplicate code in CPyStr_Equal and CPyStr_EqualLiteral | ||
| Py_ssize_t str1_length = PyUnicode_GET_LENGTH(str1); | ||
| if (str1_length != str2_length) | ||
| return 0; | ||
| int kind = PyUnicode_KIND(str1); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we deduce a literal's
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like there isn't a good way to reliably do this |
||
| if (PyUnicode_KIND(str2) != kind) | ||
| return 0; | ||
| const void *data1 = PyUnicode_DATA(str1); | ||
| const void *data2 = PyUnicode_DATA(str2); | ||
| return memcmp(data1, data2, len * kind) == 0; | ||
| return memcmp(data1, data2, str1_length * kind) == 0; | ||
| } | ||
|
|
||
| // Adapted from CPython 3.13.1 (_PyUnicode_Equal) | ||
| char CPyStr_Equal(PyObject *str1, PyObject *str2) { | ||
| if (str1 == str2) { | ||
| return 1; | ||
| } | ||
| Py_ssize_t str2_length = PyUnicode_GET_LENGTH(str2); | ||
| return _CPyStr_Equal_NoIdentCheck(str1, str2, str2_length); | ||
| } | ||
|
|
||
| char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t literal_length) { | ||
| if (str == literal_str) { | ||
| return 1; | ||
| } | ||
| return _CPyStr_Equal_NoIdentCheck(str, literal_str, literal_length); | ||
| } | ||
|
|
||
| PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a irbuild test cases for constant folding.