Skip to content

Commit 5466462

Browse files
committed
Optimize str len
1 parent 33c0f3f commit 5466462

File tree

2 files changed

+41
-13
lines changed

2 files changed

+41
-13
lines changed

mypy/types.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1674,7 +1674,7 @@ def __eq__(self, other: object) -> bool:
16741674
def serialize(self) -> JsonDict | str:
16751675
assert self.type is not None
16761676
type_ref = self.type.fullname
1677-
if not self.args and not self.last_known_value:
1677+
if not self.args and not self.last_known_value and not self.extra_attrs:
16781678
return type_ref
16791679
data: JsonDict = {
16801680
".class": "Instance",
@@ -1745,7 +1745,6 @@ def copy_modified(
17451745
),
17461746
extra_attrs=self.extra_attrs,
17471747
)
1748-
# We intentionally don't copy the extra_attrs here, so they will be erased.
17491748
new.can_be_true = self.can_be_true
17501749
new.can_be_false = self.can_be_false
17511750
return new

mypyc/lib-rt/native_internal.c

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
#define START_SIZE 512
99
#define MAX_SHORT_INT_TAGGED (255 << 1)
1010

11+
#define MAX_SHORT_LEN 127
12+
#define LONG_STR_TAG 1
13+
14+
#define MIN_SHORT_INT (-10)
15+
#define MAX_SHORT_INT 117
16+
#define MEDIUM_INT_TAG 1
17+
#define LONG_INT_TAG 3
18+
1119
typedef struct {
1220
PyObject_HEAD
1321
Py_ssize_t pos;
@@ -230,15 +238,22 @@ read_str_internal(PyObject *data) {
230238
if (_check_buffer(data) == 2)
231239
return NULL;
232240

233-
if (_check_read((BufferObject *)data, sizeof(Py_ssize_t)) == 2)
234-
return NULL;
241+
Py_ssize_t size;
235242
char *buf = ((BufferObject *)data)->buf;
236243
// Read string length.
237-
Py_ssize_t size = *(Py_ssize_t *)(buf + ((BufferObject *)data)->pos);
238-
((BufferObject *)data)->pos += sizeof(Py_ssize_t);
239-
if (_check_read((BufferObject *)data, size) == 2)
244+
if (_check_read((BufferObject *)data, 1) == 2)
240245
return NULL;
246+
uint8_t first = *(uint8_t *)(buf + ((BufferObject *)data)->pos);
247+
((BufferObject *)data)->pos += 1;
248+
if (first != LONG_STR_TAG) {
249+
size = (Py_ssize_t)(first >> 1);
250+
} else {
251+
size = *(Py_ssize_t *)(buf + ((BufferObject *)data)->pos);
252+
((BufferObject *)data)->pos += sizeof(Py_ssize_t);
253+
}
241254
// Read string content.
255+
if (_check_read((BufferObject *)data, size) == 2)
256+
return NULL;
242257
PyObject *res = PyUnicode_FromStringAndSize(
243258
buf + ((BufferObject *)data)->pos, (Py_ssize_t)size
244259
);
@@ -266,14 +281,28 @@ write_str_internal(PyObject *data, PyObject *value) {
266281
const char *chunk = PyUnicode_AsUTF8AndSize(value, &size);
267282
if (!chunk)
268283
return 2;
269-
Py_ssize_t need = size + sizeof(Py_ssize_t);
270-
if (_check_size((BufferObject *)data, need) == 2)
271-
return 2;
272284

273-
char *buf = ((BufferObject *)data)->buf;
285+
Py_ssize_t need;
286+
char *buf;
274287
// Write string length.
275-
*(Py_ssize_t *)(buf + ((BufferObject *)data)->pos) = size;
276-
((BufferObject *)data)->pos += sizeof(Py_ssize_t);
288+
if (size <= MAX_SHORT_LEN) {
289+
// Common case: short string (len <= 127) store as single byte.
290+
need = size + 1;
291+
if (_check_size((BufferObject *)data, need) == 2)
292+
return 2;
293+
buf = ((BufferObject *)data)->buf;
294+
*(uint8_t *)(buf + ((BufferObject *)data)->pos) = (uint8_t)size << 1;
295+
((BufferObject *)data)->pos += 1;
296+
} else {
297+
need = size + sizeof(Py_ssize_t) + 1;
298+
if (_check_size((BufferObject *)data, need) == 2)
299+
return 2;
300+
buf = ((BufferObject *)data)->buf;
301+
*(uint8_t *)(buf + ((BufferObject *)data)->pos) = LONG_STR_TAG;
302+
((BufferObject *)data)->pos += 1;
303+
*(Py_ssize_t *)(buf + ((BufferObject *)data)->pos) = size;
304+
((BufferObject *)data)->pos += sizeof(Py_ssize_t);
305+
}
277306
// Write string content.
278307
memcpy(buf + ((BufferObject *)data)->pos, chunk, size);
279308
((BufferObject *)data)->pos += size;

0 commit comments

Comments
 (0)