Skip to content

Commit 914757b

Browse files
committed
[mypyc] Add faster primitive for string equality
This speeds up self check by ~1.4%. String equality is one of the top five most common primitive function calls in self check. We previously used a string comparison primitive that calculated the relative order of two strings. Usually we only care about equality, which we can do quicker since we can fast path using a length check, for example. I checked the CPython implementation of string equality in 3.9 (lowest supported Python version) and 3.13, and both of them had a fast path based on string object kind, and equality checks overall have the same semantics.
1 parent 503f5bd commit 914757b

File tree

4 files changed

+38
-1
lines changed

4 files changed

+38
-1
lines changed

mypyc/irbuild/ll_builder.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,12 @@
175175
unary_ops,
176176
)
177177
from mypyc.primitives.set_ops import new_set_op
178-
from mypyc.primitives.str_ops import str_check_if_true, str_ssize_t_size_op, unicode_compare
178+
from mypyc.primitives.str_ops import (
179+
str_check_if_true,
180+
str_eq,
181+
str_ssize_t_size_op,
182+
unicode_compare,
183+
)
179184
from mypyc.primitives.tuple_ops import list_tuple_op, new_tuple_op, new_tuple_with_length_op
180185
from mypyc.rt_subtype import is_runtime_subtype
181186
from mypyc.sametype import is_same_type
@@ -1471,6 +1476,11 @@ def check_tagged_short_int(self, val: Value, line: int, negated: bool = False) -
14711476

14721477
def compare_strings(self, lhs: Value, rhs: Value, op: str, line: int) -> Value:
14731478
"""Compare two strings"""
1479+
if op == "==":
1480+
return self.primitive_op(str_eq, [lhs, rhs], line)
1481+
elif op == "!=":
1482+
eq = self.primitive_op(str_eq, [lhs, rhs], line)
1483+
return self.add(ComparisonOp(eq, self.false(), ComparisonOp.EQ, line))
14741484
compare_result = self.call_c(unicode_compare, [lhs, rhs], line)
14751485
error_constant = Integer(-1, c_int_rprimitive, line)
14761486
compare_error_check = self.add(

mypyc/lib-rt/CPy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ static inline char CPyDict_CheckSize(PyObject *dict, CPyTagged size) {
726726
#define RIGHTSTRIP 1
727727
#define BOTHSTRIP 2
728728

729+
char CPyStr_Equal(PyObject *str1, PyObject *str2);
729730
PyObject *CPyStr_Build(Py_ssize_t len, ...);
730731
PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index);
731732
CPyTagged CPyStr_Find(PyObject *str, PyObject *substr, CPyTagged start, int direction);

mypyc/lib-rt/str_ops.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,22 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
6464
#undef BLOOM_UPDATE
6565
}
6666

67+
// Adapted from CPython 3.13.1 (_PyUnicode_Equal)
68+
char CPyStr_Equal(PyObject *str1, PyObject *str2) {
69+
if (str1 == str2) {
70+
return 1;
71+
}
72+
Py_ssize_t len = PyUnicode_GET_LENGTH(str1);
73+
if (PyUnicode_GET_LENGTH(str2) != len)
74+
return 0;
75+
int kind = PyUnicode_KIND(str1);
76+
if (PyUnicode_KIND(str2) != kind)
77+
return 0;
78+
const void *data1 = PyUnicode_DATA(str1);
79+
const void *data2 = PyUnicode_DATA(str2);
80+
return memcmp(data1, data2, len * kind) == 0;
81+
}
82+
6783
PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) {
6884
if (PyUnicode_READY(str) != -1) {
6985
if (CPyTagged_CheckShort(index)) {

mypyc/primitives/str_ops.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
ERR_NEG_INT,
2222
binary_op,
2323
custom_op,
24+
custom_primitive_op,
2425
function_op,
2526
load_address_op,
2627
method_op,
@@ -69,6 +70,15 @@
6970
steals=[True, False],
7071
)
7172

73+
# str1 == str2 (very common operation, so we provide our own)
74+
str_eq = custom_primitive_op(
75+
name="str_eq",
76+
c_function_name="CPyStr_Equal",
77+
arg_types=[str_rprimitive, str_rprimitive],
78+
return_type=bool_rprimitive,
79+
error_kind=ERR_NEVER,
80+
)
81+
7282
unicode_compare = custom_op(
7383
arg_types=[str_rprimitive, str_rprimitive],
7484
return_type=c_int_rprimitive,

0 commit comments

Comments
 (0)