From 4bd6b0c5601f131db8dfd9216bf505c0ae4d3a3b Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 14 Dec 2024 10:47:34 +0300 Subject: [PATCH 01/10] gh-127936: convert marshal module to use import/export API for ints (PEP 757) --- Python/marshal.c | 232 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 157 insertions(+), 75 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 72afa4ff89432c..db351929ee2033 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -240,10 +240,6 @@ w_short_pstring(const void *s, Py_ssize_t n, WFILE *p) #define PyLong_MARSHAL_SHIFT 15 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT) #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1) -#if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0 -#error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT" -#endif -#define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) #define W_TYPE(t, p) do { \ w_byte((t) | flag, (p)); \ @@ -252,47 +248,101 @@ w_short_pstring(const void *s, Py_ssize_t n, WFILE *p) static PyObject * _PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code); +#define _r_digits(bs) \ +static void \ +_r_digits##bs(const uint##bs##_t *digits, Py_ssize_t n, uint8_t negative, \ + Py_ssize_t marshal_ratio, WFILE *p) \ +{ \ + /* set l to number of base PyLong_MARSHAL_BASE digits */ \ + Py_ssize_t l = (n - 1)*marshal_ratio; \ + uint##bs##_t d = digits[n - 1]; \ + \ + assert(d != 0); /* a PyLong is always normalized */ \ + do { \ + d >>= PyLong_MARSHAL_SHIFT; \ + l++; \ + } while (d != 0); \ + if (l > SIZE32_MAX) { \ + p->depth--; \ + p->error = WFERR_UNMARSHALLABLE; \ + return; \ + } \ + w_long((long)(negative ? -l : l), p); \ + \ + for (Py_ssize_t i = 0; i < n - 1; i++) { \ + d = digits[i]; \ + for (Py_ssize_t j = 0; j < marshal_ratio; j++) { \ + w_short(d & PyLong_MARSHAL_MASK, p); \ + d >>= PyLong_MARSHAL_SHIFT; \ + } \ + assert (d == 0); \ + } \ + d = digits[n - 1]; \ + do { \ + w_short(d & PyLong_MARSHAL_MASK, p); \ + d >>= PyLong_MARSHAL_SHIFT; \ + } while (d != 0); \ +} +_r_digits(16) +_r_digits(32) + static void w_PyLong(const PyLongObject *ob, char flag, WFILE *p) { - Py_ssize_t i, j, n, l; - digit d; - W_TYPE(TYPE_LONG, p); if (_PyLong_IsZero(ob)) { w_long((long)0, p); return; } - /* set l to number of base PyLong_MARSHAL_BASE digits */ - n = _PyLong_DigitCount(ob); - l = (n-1) * PyLong_MARSHAL_RATIO; - d = ob->long_value.ob_digit[n-1]; - assert(d != 0); /* a PyLong is always normalized */ - do { - d >>= PyLong_MARSHAL_SHIFT; - l++; - } while (d != 0); - if (l > SIZE32_MAX) { + PyLongExport long_export; + + if (PyLong_Export((PyObject *)ob, &long_export) < 0) { p->depth--; p->error = WFERR_UNMARSHALLABLE; return; } - w_long((long)(_PyLong_IsNegative(ob) ? -l : l), p); + if (!long_export.digits) { + int8_t sign = long_export.value < 0 ? -1 : 1; + uint64_t abs_value = Py_ABS(long_export.value); + uint64_t d = abs_value; + long l = 0; - for (i=0; i < n-1; i++) { - d = ob->long_value.ob_digit[i]; - for (j=0; j < PyLong_MARSHAL_RATIO; j++) { + /* set l to number of base PyLong_MARSHAL_BASE digits */ + do { + d >>= PyLong_MARSHAL_SHIFT; + l += sign; + } while (d); + w_long(l, p); + d = abs_value; + do { w_short(d & PyLong_MARSHAL_MASK, p); d >>= PyLong_MARSHAL_SHIFT; - } - assert (d == 0); + } while (d); + return; + } + + const PyLongLayout *layout = PyLong_GetNativeLayout(); + Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT; + + /* must be a multiple of PyLong_MARSHAL_SHIFT */ + assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0); + + /* other assumptions on PyLongObject internals */ + assert(layout->bits_per_digit <= 32); + assert(layout->digits_order == -1); + assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1)); + assert(layout->digit_size == 2 || layout->digit_size == 4); + + if (layout->digit_size == 4) { + _r_digits32(long_export.digits, long_export.ndigits, + long_export.negative, marshal_ratio, p); + } + else { + _r_digits16(long_export.digits, long_export.ndigits, + long_export.negative, marshal_ratio, p); } - d = ob->long_value.ob_digit[n-1]; - do { - w_short(d & PyLong_MARSHAL_MASK, p); - d >>= PyLong_MARSHAL_SHIFT; - } while (d != 0); + PyLong_FreeExport(&long_export); } static void @@ -875,17 +925,60 @@ r_long64(RFILE *p) 1 /* signed */); } +#define _w_digits(bs) \ +static int \ +_w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ + Py_ssize_t marshal_ratio, \ + int shorts_in_top_digit, RFILE *p) \ +{ \ + int md; \ + uint##bs##_t d; \ + \ + for (Py_ssize_t i = 0; i < size - 1; i++) { \ + d = 0; \ + for (Py_ssize_t j = 0; j < marshal_ratio; j++) { \ + md = r_short(p); \ + if (md < 0 || md > PyLong_MARSHAL_BASE) { \ + goto bad_digit; \ + } \ + d += (uint##bs##_t)md << j*PyLong_MARSHAL_SHIFT; \ + } \ + digits[i] = d; \ + } \ + \ + d = 0; \ + for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) { \ + md = r_short(p); \ + if (md < 0 || md > PyLong_MARSHAL_BASE) { \ + goto bad_digit; \ + } \ + /* topmost marshal digit should be nonzero */ \ + if (md == 0 && j == shorts_in_top_digit - 1) { \ + PyErr_SetString(PyExc_ValueError, \ + "bad marshal data (unnormalized long data)"); \ + return -1; \ + } \ + d += (uint##bs##_t)md << j*PyLong_MARSHAL_SHIFT; \ + } \ + assert(!PyErr_Occurred()); \ + /* top digit should be nonzero, else the resulting PyLong won't be \ + normalized */ \ + digits[size - 1] = d; \ + return 0; \ +bad_digit: \ + if (!PyErr_Occurred()) { \ + PyErr_SetString(PyExc_ValueError, \ + "bad marshal data (digit out of range in long)"); \ + } \ + return -1; \ +} +_w_digits(32) +_w_digits(16) + static PyObject * r_PyLong(RFILE *p) { - PyLongObject *ob; - long n, size, i; - int j, md, shorts_in_top_digit; - digit d; - - n = r_long(p); - if (n == 0) - return (PyObject *)_PyLong_New(0); + long n = r_long(p); if (n == -1 && PyErr_Occurred()) { return NULL; } @@ -895,51 +988,40 @@ r_PyLong(RFILE *p) return NULL; } - size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO; - shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO; - ob = _PyLong_New(size); - if (ob == NULL) - return NULL; + const PyLongLayout *layout = PyLong_GetNativeLayout(); + Py_ssize_t marshal_ratio = layout->bits_per_digit/PyLong_MARSHAL_SHIFT; - _PyLong_SetSignAndDigitCount(ob, n < 0 ? -1 : 1, size); + /* must be a multiple of PyLong_MARSHAL_SHIFT */ + assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0); - for (i = 0; i < size-1; i++) { - d = 0; - for (j=0; j < PyLong_MARSHAL_RATIO; j++) { - md = r_short(p); - if (md < 0 || md > PyLong_MARSHAL_BASE) - goto bad_digit; - d += (digit)md << j*PyLong_MARSHAL_SHIFT; - } - ob->long_value.ob_digit[i] = d; + /* other assumptions on PyLongObject internals */ + assert(layout->bits_per_digit <= 32); + assert(layout->digits_order == -1); + assert(layout->digit_endianness == (PY_LITTLE_ENDIAN ? -1 : 1)); + assert(layout->digit_size == 2 || layout->digit_size == 4); + + Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio; + int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio; + void *digits; + PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits); + + if (writer == NULL) { + return NULL; } - d = 0; - for (j=0; j < shorts_in_top_digit; j++) { - md = r_short(p); - if (md < 0 || md > PyLong_MARSHAL_BASE) - goto bad_digit; - /* topmost marshal digit should be nonzero */ - if (md == 0 && j == shorts_in_top_digit - 1) { - Py_DECREF(ob); - PyErr_SetString(PyExc_ValueError, - "bad marshal data (unnormalized long data)"); - return NULL; - } - d += (digit)md << j*PyLong_MARSHAL_SHIFT; + int ret; + + if (layout->digit_size == 4) { + ret = _w_digits32(digits, size, marshal_ratio, shorts_in_top_digit, p); } - assert(!PyErr_Occurred()); - /* top digit should be nonzero, else the resulting PyLong won't be - normalized */ - ob->long_value.ob_digit[size-1] = d; - return (PyObject *)ob; - bad_digit: - Py_DECREF(ob); - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "bad marshal data (digit out of range in long)"); + else { + ret = _w_digits16(digits, size, marshal_ratio, shorts_in_top_digit, p); + } + if (ret < 0) { + PyLongWriter_Discard(writer); + return NULL; } - return NULL; + return PyLongWriter_Finish(writer); } static double From 44c7c6af63caf8a104c3fb65aabf2f2819a38afc Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 7 Jan 2025 13:25:56 +0300 Subject: [PATCH 02/10] Apply suggestions from code review Co-authored-by: Victor Stinner --- Python/marshal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/marshal.c b/Python/marshal.c index db351929ee2033..d9364ec42ff01c 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -314,6 +314,7 @@ w_PyLong(const PyLongObject *ob, char flag, WFILE *p) l += sign; } while (d); w_long(l, p); + d = abs_value; do { w_short(d & PyLong_MARSHAL_MASK, p); @@ -965,6 +966,7 @@ _w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ normalized */ \ digits[size - 1] = d; \ return 0; \ + \ bad_digit: \ if (!PyErr_Occurred()) { \ PyErr_SetString(PyExc_ValueError, \ From 95d89a71a2920853babfbfffb518585e808b0c11 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 7 Jan 2025 13:56:27 +0300 Subject: [PATCH 03/10] address review: local decl --- Python/marshal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index d9364ec42ff01c..e3677c0622fe1b 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -932,13 +932,12 @@ _w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ Py_ssize_t marshal_ratio, \ int shorts_in_top_digit, RFILE *p) \ { \ - int md; \ uint##bs##_t d; \ \ for (Py_ssize_t i = 0; i < size - 1; i++) { \ d = 0; \ for (Py_ssize_t j = 0; j < marshal_ratio; j++) { \ - md = r_short(p); \ + int md = r_short(p); \ if (md < 0 || md > PyLong_MARSHAL_BASE) { \ goto bad_digit; \ } \ @@ -949,7 +948,7 @@ _w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ \ d = 0; \ for (Py_ssize_t j = 0; j < shorts_in_top_digit; j++) { \ - md = r_short(p); \ + int md = r_short(p); \ if (md < 0 || md > PyLong_MARSHAL_BASE) { \ goto bad_digit; \ } \ From 1e0db95665d5efb7fd8411138da6abb6ee8e5809 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 14 Jan 2025 17:31:26 +0300 Subject: [PATCH 04/10] Apply suggestions from code review --- Python/marshal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/marshal.c b/Python/marshal.c index e3677c0622fe1b..cd1e954bbe8bbf 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -1002,6 +1002,9 @@ r_PyLong(RFILE *p) assert(layout->digit_size == 2 || layout->digit_size == 4); Py_ssize_t size = 1 + (Py_ABS(n) - 1) / marshal_ratio; + + assert(size >= 1); + int shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % marshal_ratio; void *digits; PyLongWriter *writer = PyLongWriter_Create(n < 0, size, &digits); From d66af47881faecc308e220b199c4ad38f3f84f0d Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 14 Jan 2025 17:33:56 +0300 Subject: [PATCH 05/10] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Python/marshal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/marshal.c b/Python/marshal.c index cd1e954bbe8bbf..723597a7e32353 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -275,7 +275,7 @@ _r_digits##bs(const uint##bs##_t *digits, Py_ssize_t n, uint8_t negative, \ w_short(d & PyLong_MARSHAL_MASK, p); \ d >>= PyLong_MARSHAL_SHIFT; \ } \ - assert (d == 0); \ + assert(d == 0); \ } \ d = digits[n - 1]; \ do { \ @@ -285,6 +285,7 @@ _r_digits##bs(const uint##bs##_t *digits, Py_ssize_t n, uint8_t negative, \ } _r_digits(16) _r_digits(32) +#undef _r_digits static void w_PyLong(const PyLongObject *ob, char flag, WFILE *p) @@ -976,6 +977,7 @@ bad_digit: \ _w_digits(32) _w_digits(16) +#undef _w_digits static PyObject * r_PyLong(RFILE *p) { From f152274e657e0aee6a81b28c4cce2eb8d5c76a81 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 14 Jan 2025 17:43:19 +0300 Subject: [PATCH 06/10] Update Python/marshal.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/marshal.c b/Python/marshal.c index 723597a7e32353..be5b8b68ab4420 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -976,8 +976,8 @@ bad_digit: \ } _w_digits(32) _w_digits(16) - #undef _w_digits + static PyObject * r_PyLong(RFILE *p) { From 035c772dc88a4094092d03bcdc74b1cf265c3f06 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 14 Jan 2025 18:21:54 +0300 Subject: [PATCH 07/10] address review: bitsize (naming) --- Python/marshal.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index be5b8b68ab4420..92cee278d97f1b 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -248,14 +248,14 @@ w_short_pstring(const void *s, Py_ssize_t n, WFILE *p) static PyObject * _PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code); -#define _r_digits(bs) \ +#define _r_digits(bitsize) \ static void \ -_r_digits##bs(const uint##bs##_t *digits, Py_ssize_t n, uint8_t negative, \ - Py_ssize_t marshal_ratio, WFILE *p) \ +_r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n, \ + uint8_t negative, Py_ssize_t marshal_ratio, WFILE *p) \ { \ /* set l to number of base PyLong_MARSHAL_BASE digits */ \ Py_ssize_t l = (n - 1)*marshal_ratio; \ - uint##bs##_t d = digits[n - 1]; \ + uint ## bitsize ## _t d = digits[n - 1]; \ \ assert(d != 0); /* a PyLong is always normalized */ \ do { \ @@ -927,13 +927,13 @@ r_long64(RFILE *p) 1 /* signed */); } -#define _w_digits(bs) \ +#define _w_digits(bitsize) \ static int \ -_w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ - Py_ssize_t marshal_ratio, \ - int shorts_in_top_digit, RFILE *p) \ +_w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size, \ + Py_ssize_t marshal_ratio, \ + int shorts_in_top_digit, RFILE *p) \ { \ - uint##bs##_t d; \ + uint ## bitsize ## _t d; \ \ for (Py_ssize_t i = 0; i < size - 1; i++) { \ d = 0; \ @@ -942,7 +942,7 @@ _w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ if (md < 0 || md > PyLong_MARSHAL_BASE) { \ goto bad_digit; \ } \ - d += (uint##bs##_t)md << j*PyLong_MARSHAL_SHIFT; \ + d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT; \ } \ digits[i] = d; \ } \ @@ -959,7 +959,7 @@ _w_digits##bs(uint##bs##_t *digits, Py_ssize_t size, \ "bad marshal data (unnormalized long data)"); \ return -1; \ } \ - d += (uint##bs##_t)md << j*PyLong_MARSHAL_SHIFT; \ + d += (uint ## bitsize ## _t)md << j*PyLong_MARSHAL_SHIFT; \ } \ assert(!PyErr_Occurred()); \ /* top digit should be nonzero, else the resulting PyLong won't be \ From 9d95c35993e5bb7ce1658cfbeb17f450c535c11f Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Wed, 15 Jan 2025 06:51:32 +0300 Subject: [PATCH 08/10] Apply suggestions from code review --- Python/marshal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/marshal.c b/Python/marshal.c index 92cee278d97f1b..696995cd27acbe 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -329,6 +329,7 @@ w_PyLong(const PyLongObject *ob, char flag, WFILE *p) /* must be a multiple of PyLong_MARSHAL_SHIFT */ assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0); + assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT); /* other assumptions on PyLongObject internals */ assert(layout->bits_per_digit <= 32); @@ -996,6 +997,7 @@ r_PyLong(RFILE *p) /* must be a multiple of PyLong_MARSHAL_SHIFT */ assert(layout->bits_per_digit % PyLong_MARSHAL_SHIFT == 0); + assert(layout->bits_per_digit >= PyLong_MARSHAL_SHIFT); /* other assumptions on PyLongObject internals */ assert(layout->bits_per_digit <= 32); From 47eaacff061b21cc9d9105dabce3247db47a8a96 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 23 Jan 2025 02:10:39 +0100 Subject: [PATCH 09/10] Update Python/marshal.c Co-authored-by: Sergey B Kirpichev --- Python/marshal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/marshal.c b/Python/marshal.c index 696995cd27acbe..aef6580b4a61a3 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -257,6 +257,8 @@ _r_digits##bitsize(const uint ## bitsize ## _t *digits, Py_ssize_t n, \ Py_ssize_t l = (n - 1)*marshal_ratio; \ uint ## bitsize ## _t d = digits[n - 1]; \ \ + assert(marshal_ratio > 0); \ + assert(n >= 1); \ assert(d != 0); /* a PyLong is always normalized */ \ do { \ d >>= PyLong_MARSHAL_SHIFT; \ From 9353013e9d128c4332ee7d511ecd5f141171f5ef Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 23 Jan 2025 03:04:44 +0100 Subject: [PATCH 10/10] Update Python/marshal.c Co-authored-by: Sergey B Kirpichev --- Python/marshal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/marshal.c b/Python/marshal.c index aef6580b4a61a3..cf7011652513ae 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -938,6 +938,7 @@ _w_digits##bitsize(uint ## bitsize ## _t *digits, Py_ssize_t size, \ { \ uint ## bitsize ## _t d; \ \ + assert(size >= 1); \ for (Py_ssize_t i = 0; i < size - 1; i++) { \ d = 0; \ for (Py_ssize_t j = 0; j < marshal_ratio; j++) { \