Skip to content

Commit f64bf11

Browse files
authored
Optimize serialization format for 2 and 4 bytes ints (#20120)
This is important for serialized ASTs (think line numbers for every node). Also in this PR: * Re-use same integer logic for str/bytes length (it is slightly less optimal, but code re-use is good). * Remove unused field from `Buffer` type. * Make format the same on 32-bit and 64-bit platforms (we still assume little-endian platform).
1 parent 3f03755 commit f64bf11

File tree

2 files changed

+220
-106
lines changed

2 files changed

+220
-106
lines changed

mypyc/lib-rt/librt_internal.c

Lines changed: 146 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,23 @@
88
#include "librt_internal.h"
99

1010
#define START_SIZE 512
11-
#define MAX_SHORT_INT_TAGGED (255 << 1)
1211

13-
#define MAX_SHORT_LEN 127
14-
#define LONG_STR_TAG 1
12+
// See comment in read_int_internal() on motivation for these values.
13+
#define MIN_ONE_BYTE_INT -10
14+
#define MAX_ONE_BYTE_INT 117 // 2 ** 7 - 1 - 10
15+
#define MIN_TWO_BYTES_INT -100
16+
#define MAX_TWO_BYTES_INT 16283 // 2 ** (8 + 6) - 1 - 100
17+
#define MIN_FOUR_BYTES_INT -10000
18+
#define MAX_FOUR_BYTES_INT 536860911 // 2 ** (3 * 8 + 5) - 1 - 10000
1519

16-
#define MIN_SHORT_INT -10
17-
#define MAX_SHORT_INT 117
18-
#define MEDIUM_INT_TAG 1
19-
#define LONG_INT_TAG 3
20+
#define TWO_BYTES_INT_BIT 1
21+
#define FOUR_BYTES_INT_BIT 2
22+
#define LONG_INT_BIT 4
23+
24+
#define FOUR_BYTES_INT_TRAILER 3
25+
// We add one reserved bit here so that we can potentially support
26+
// 8 bytes format in the future.
27+
#define LONG_INT_TRAILER 15
2028

2129
#define CPY_BOOL_ERROR 2
2230
#define CPY_NONE_ERROR 2
@@ -35,13 +43,22 @@
3543
#define _WRITE(data, type, v) *(type *)(((BufferObject *)data)->buf + ((BufferObject *)data)->pos) = v; \
3644
((BufferObject *)data)->pos += sizeof(type);
3745

46+
#if PY_BIG_ENDIAN
47+
uint16_t reverse_16(uint16_t number) {
48+
return (number << 8) | (number >> 8);
49+
}
50+
51+
uint32_t reverse_32(uint32_t number) {
52+
return ((number & 0xFF) << 24) | ((number & 0xFF00) << 8) | ((number & 0xFF0000) >> 8) | (number >> 24);
53+
}
54+
#endif
55+
3856
typedef struct {
3957
PyObject_HEAD
4058
Py_ssize_t pos;
4159
Py_ssize_t end;
4260
Py_ssize_t size;
4361
char *buf;
44-
PyObject *source;
4562
} BufferObject;
4663

4764
static PyTypeObject BufferType;
@@ -259,26 +276,50 @@ write_bool(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwname
259276
}
260277

261278
/*
262-
str format: size followed by UTF-8 bytes
263-
short strings (len <= 127): single byte for size as `(uint8_t)size << 1`
264-
long strings: \x01 followed by size as Py_ssize_t
279+
str format: size as int (see below) followed by UTF-8 bytes
265280
*/
266281

282+
static inline CPyTagged
283+
_read_short_int(PyObject *data, uint8_t first) {
284+
uint8_t second;
285+
uint16_t two_more;
286+
if ((first & TWO_BYTES_INT_BIT) == 0) {
287+
// Note we use tagged ints since this function can return an error.
288+
return ((Py_ssize_t)(first >> 1) + MIN_ONE_BYTE_INT) << 1;
289+
}
290+
if ((first & FOUR_BYTES_INT_BIT) == 0) {
291+
_CHECK_READ(data, 1, CPY_INT_TAG)
292+
second = _READ(data, uint8_t)
293+
return ((((Py_ssize_t)second) << 6) + (Py_ssize_t)(first >> 2) + MIN_TWO_BYTES_INT) << 1;
294+
}
295+
// The caller is responsible to verify this is called only for short ints.
296+
_CHECK_READ(data, 3, CPY_INT_TAG)
297+
// TODO: check if compilers emit optimal code for these two reads, and tweak if needed.
298+
second = _READ(data, uint8_t)
299+
two_more = _READ(data, uint16_t)
300+
#if PY_BIG_ENDIAN
301+
two_more = reverse_16(two_more);
302+
#endif
303+
Py_ssize_t higher = (((Py_ssize_t)two_more) << 13) + (((Py_ssize_t)second) << 5);
304+
return (higher + (Py_ssize_t)(first >> 3) + MIN_FOUR_BYTES_INT) << 1;
305+
}
306+
267307
static PyObject*
268308
read_str_internal(PyObject *data) {
269309
_CHECK_BUFFER(data, NULL)
270310

271311
// Read string length.
272-
Py_ssize_t size;
273312
_CHECK_READ(data, 1, NULL)
274313
uint8_t first = _READ(data, uint8_t)
275-
if (likely(first != LONG_STR_TAG)) {
276-
// Common case: short string (len <= 127).
277-
size = (Py_ssize_t)(first >> 1);
278-
} else {
279-
_CHECK_READ(data, sizeof(CPyTagged), NULL)
280-
size = _READ(data, Py_ssize_t)
314+
if (unlikely(first == LONG_INT_TRAILER)) {
315+
// Fail fast for invalid/tampered data.
316+
PyErr_SetString(PyExc_ValueError, "invalid str size");
317+
return NULL;
281318
}
319+
CPyTagged tagged_size = _read_short_int(data, first);
320+
if (tagged_size == CPY_INT_TAG)
321+
return NULL;
322+
Py_ssize_t size = tagged_size >> 1;
282323
// Read string content.
283324
char *buf = ((BufferObject *)data)->buf;
284325
_CHECK_READ(data, size, NULL)
@@ -302,6 +343,35 @@ read_str(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames)
302343
return read_str_internal(data);
303344
}
304345

346+
// The caller *must* check that real_value is within allowed range (29 bits).
347+
static inline char
348+
_write_short_int(PyObject *data, Py_ssize_t real_value) {
349+
if (real_value >= MIN_ONE_BYTE_INT && real_value <= MAX_ONE_BYTE_INT) {
350+
_CHECK_SIZE(data, 1)
351+
_WRITE(data, uint8_t, (uint8_t)(real_value - MIN_ONE_BYTE_INT) << 1)
352+
((BufferObject *)data)->end += 1;
353+
} else if (real_value >= MIN_TWO_BYTES_INT && real_value <= MAX_TWO_BYTES_INT) {
354+
_CHECK_SIZE(data, 2)
355+
#if PY_BIG_ENDIAN
356+
uint16_t to_write = ((uint16_t)(real_value - MIN_TWO_BYTES_INT) << 2) | TWO_BYTES_INT_BIT;
357+
_WRITE(data, uint16_t, reverse_16(to_write))
358+
#else
359+
_WRITE(data, uint16_t, ((uint16_t)(real_value - MIN_TWO_BYTES_INT) << 2) | TWO_BYTES_INT_BIT)
360+
#endif
361+
((BufferObject *)data)->end += 2;
362+
} else {
363+
_CHECK_SIZE(data, 4)
364+
#if PY_BIG_ENDIAN
365+
uint32_t to_write = ((uint32_t)(real_value - MIN_FOUR_BYTES_INT) << 3) | FOUR_BYTES_INT_TRAILER;
366+
_WRITE(data, uint32_t, reverse_32(to_write))
367+
#else
368+
_WRITE(data, uint32_t, ((uint32_t)(real_value - MIN_FOUR_BYTES_INT) << 3) | FOUR_BYTES_INT_TRAILER)
369+
#endif
370+
((BufferObject *)data)->end += 4;
371+
}
372+
return CPY_NONE;
373+
}
374+
305375
static char
306376
write_str_internal(PyObject *data, PyObject *value) {
307377
_CHECK_BUFFER(data, CPY_NONE_ERROR)
@@ -311,24 +381,20 @@ write_str_internal(PyObject *data, PyObject *value) {
311381
if (unlikely(chunk == NULL))
312382
return CPY_NONE_ERROR;
313383

314-
Py_ssize_t need;
315384
// Write string length.
316-
if (likely(size <= MAX_SHORT_LEN)) {
317-
// Common case: short string (len <= 127) store as single byte.
318-
need = size + 1;
319-
_CHECK_SIZE(data, need)
320-
_WRITE(data, uint8_t, (uint8_t)size << 1)
385+
if (likely(size >= MIN_FOUR_BYTES_INT && size <= MAX_FOUR_BYTES_INT)) {
386+
if (_write_short_int(data, size) == CPY_NONE_ERROR)
387+
return CPY_NONE_ERROR;
321388
} else {
322-
need = size + sizeof(Py_ssize_t) + 1;
323-
_CHECK_SIZE(data, need)
324-
_WRITE(data, uint8_t, LONG_STR_TAG)
325-
_WRITE(data, Py_ssize_t, size)
389+
PyErr_SetString(PyExc_ValueError, "str too long to serialize");
390+
return CPY_NONE_ERROR;
326391
}
327392
// Write string content.
393+
_CHECK_SIZE(data, size)
328394
char *buf = ((BufferObject *)data)->buf;
329395
memcpy(buf + ((BufferObject *)data)->pos, chunk, size);
330396
((BufferObject *)data)->pos += size;
331-
((BufferObject *)data)->end += need;
397+
((BufferObject *)data)->end += size;
332398
return CPY_NONE;
333399
}
334400

@@ -353,26 +419,25 @@ write_str(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames
353419
}
354420

355421
/*
356-
bytes format: size followed by bytes
357-
short bytes (len <= 127): single byte for size as `(uint8_t)size << 1`
358-
long bytes: \x01 followed by size as Py_ssize_t
422+
bytes format: size as int (see below) followed by bytes
359423
*/
360424

361425
static PyObject*
362426
read_bytes_internal(PyObject *data) {
363427
_CHECK_BUFFER(data, NULL)
364428

365429
// Read length.
366-
Py_ssize_t size;
367430
_CHECK_READ(data, 1, NULL)
368431
uint8_t first = _READ(data, uint8_t)
369-
if (likely(first != LONG_STR_TAG)) {
370-
// Common case: short bytes (len <= 127).
371-
size = (Py_ssize_t)(first >> 1);
372-
} else {
373-
_CHECK_READ(data, sizeof(CPyTagged), NULL)
374-
size = _READ(data, Py_ssize_t)
432+
if (unlikely(first == LONG_INT_TRAILER)) {
433+
// Fail fast for invalid/tampered data.
434+
PyErr_SetString(PyExc_ValueError, "invalid bytes size");
435+
return NULL;
375436
}
437+
CPyTagged tagged_size = _read_short_int(data, first);
438+
if (tagged_size == CPY_INT_TAG)
439+
return NULL;
440+
Py_ssize_t size = tagged_size >> 1;
376441
// Read bytes content.
377442
char *buf = ((BufferObject *)data)->buf;
378443
_CHECK_READ(data, size, NULL)
@@ -405,24 +470,20 @@ write_bytes_internal(PyObject *data, PyObject *value) {
405470
return CPY_NONE_ERROR;
406471
Py_ssize_t size = PyBytes_GET_SIZE(value);
407472

408-
Py_ssize_t need;
409473
// Write length.
410-
if (likely(size <= MAX_SHORT_LEN)) {
411-
// Common case: short bytes (len <= 127) store as single byte.
412-
need = size + 1;
413-
_CHECK_SIZE(data, need)
414-
_WRITE(data, uint8_t, (uint8_t)size << 1)
474+
if (likely(size >= MIN_FOUR_BYTES_INT && size <= MAX_FOUR_BYTES_INT)) {
475+
if (_write_short_int(data, size) == CPY_NONE_ERROR)
476+
return CPY_NONE_ERROR;
415477
} else {
416-
need = size + sizeof(Py_ssize_t) + 1;
417-
_CHECK_SIZE(data, need)
418-
_WRITE(data, uint8_t, LONG_STR_TAG)
419-
_WRITE(data, Py_ssize_t, size)
478+
PyErr_SetString(PyExc_ValueError, "bytes too long to serialize");
479+
return CPY_NONE_ERROR;
420480
}
421481
// Write bytes content.
482+
_CHECK_SIZE(data, size)
422483
char *buf = ((BufferObject *)data)->buf;
423484
memcpy(buf + ((BufferObject *)data)->pos, chunk, size);
424485
((BufferObject *)data)->pos += size;
425-
((BufferObject *)data)->end += need;
486+
((BufferObject *)data)->end += size;
426487
return CPY_NONE;
427488
}
428489

@@ -455,7 +516,7 @@ static double
455516
read_float_internal(PyObject *data) {
456517
_CHECK_BUFFER(data, CPY_FLOAT_ERROR)
457518
_CHECK_READ(data, sizeof(double), CPY_FLOAT_ERROR)
458-
double res = _READ(data, double);
519+
double res = _READ(data, double)
459520
return res;
460521
}
461522

@@ -505,9 +566,13 @@ write_float(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnam
505566

506567
/*
507568
int format:
508-
most common values (-10 <= value <= 117): single byte as `(uint8_t)(value + 10) << 1`
509-
medium values (fit in CPyTagged): \x01 followed by CPyTagged value
510-
long values (very rare): \x03 followed by decimal string (see str format)
569+
one byte: last bit 0, 7 bits used
570+
two bytes: last two bits 01, 14 bits used
571+
four bytes: last three bits 011, 29 bits used
572+
everything else: 00001111 followed by serialized string representation
573+
574+
Note: for fixed size formats we skew ranges towards more positive values,
575+
since negative integers are much more rare.
511576
*/
512577

513578
static CPyTagged
@@ -516,22 +581,17 @@ read_int_internal(PyObject *data) {
516581
_CHECK_READ(data, 1, CPY_INT_TAG)
517582

518583
uint8_t first = _READ(data, uint8_t)
519-
if ((first & MEDIUM_INT_TAG) == 0) {
520-
// Most common case: int that is small in absolute value.
521-
return ((Py_ssize_t)(first >> 1) + MIN_SHORT_INT) << 1;
522-
}
523-
if (first == MEDIUM_INT_TAG) {
524-
_CHECK_READ(data, sizeof(CPyTagged), CPY_INT_TAG)
525-
CPyTagged ret = _READ(data, CPyTagged)
526-
return ret;
584+
if (likely(first != LONG_INT_TRAILER)) {
585+
return _read_short_int(data, first);
527586
}
528-
// People who have literal ints not fitting in size_t should be punished :-)
529587
PyObject *str_ret = read_str_internal(data);
530588
if (unlikely(str_ret == NULL))
531589
return CPY_INT_TAG;
532590
PyObject* ret_long = PyLong_FromUnicodeObject(str_ret, 10);
533591
Py_DECREF(str_ret);
534-
return ((CPyTagged)ret_long) | CPY_INT_TAG;
592+
if (ret_long == NULL)
593+
return CPY_INT_TAG;
594+
return CPyTagged_StealFromObject(ret_long);
535595
}
536596

537597
static PyObject*
@@ -549,36 +609,38 @@ read_int(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames)
549609
return CPyTagged_StealAsObject(retval);
550610
}
551611

612+
static inline char
613+
_write_long_int(PyObject *data, CPyTagged value) {
614+
// TODO(jukka): write a more compact/optimal format for arbitrary length ints.
615+
_CHECK_SIZE(data, 1)
616+
_WRITE(data, uint8_t, LONG_INT_TRAILER)
617+
((BufferObject *)data)->end += 1;
618+
PyObject* int_value = CPyTagged_AsObject(value);
619+
if (unlikely(int_value == NULL))
620+
return CPY_NONE_ERROR;
621+
PyObject *str_value = PyObject_Str(int_value);
622+
Py_DECREF(int_value);
623+
if (unlikely(str_value == NULL))
624+
return CPY_NONE_ERROR;
625+
char res = write_str_internal(data, str_value);
626+
Py_DECREF(str_value);
627+
return res;
628+
}
629+
552630
static char
553631
write_int_internal(PyObject *data, CPyTagged value) {
554632
_CHECK_BUFFER(data, CPY_NONE_ERROR)
555633

556634
if (likely((value & CPY_INT_TAG) == 0)) {
557635
Py_ssize_t real_value = CPyTagged_ShortAsSsize_t(value);
558-
if (real_value >= MIN_SHORT_INT && real_value <= MAX_SHORT_INT) {
559-
// Most common case: int that is small in absolute value.
560-
_CHECK_SIZE(data, 1)
561-
_WRITE(data, uint8_t, (uint8_t)(real_value - MIN_SHORT_INT) << 1)
562-
((BufferObject *)data)->end += 1;
636+
if (likely(real_value >= MIN_FOUR_BYTES_INT && real_value <= MAX_FOUR_BYTES_INT)) {
637+
return _write_short_int(data, real_value);
563638
} else {
564-
_CHECK_SIZE(data, sizeof(CPyTagged) + 1)
565-
_WRITE(data, uint8_t, MEDIUM_INT_TAG)
566-
_WRITE(data, CPyTagged, value)
567-
((BufferObject *)data)->end += sizeof(CPyTagged) + 1;
639+
return _write_long_int(data, value);
568640
}
569641
} else {
570-
_CHECK_SIZE(data, 1)
571-
_WRITE(data, uint8_t, LONG_INT_TAG)
572-
((BufferObject *)data)->end += 1;
573-
PyObject *str_value = PyObject_Str(CPyTagged_LongAsObject(value));
574-
if (unlikely(str_value == NULL))
575-
return CPY_NONE_ERROR;
576-
char res = write_str_internal(data, str_value);
577-
Py_DECREF(str_value);
578-
if (unlikely(res == CPY_NONE_ERROR))
579-
return CPY_NONE_ERROR;
642+
return _write_long_int(data, value);
580643
}
581-
return CPY_NONE;
582644
}
583645

584646
static PyObject*

0 commit comments

Comments
 (0)