From db3f68de3a413b99baec55ebba0f9d8a40e7b78f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 27 Aug 2025 11:59:38 +0300 Subject: [PATCH 1/4] gh-71679: Share the repr implementation between bytes and bytearray This allows to use the smart quotes algorithm in the bytearray's repr. --- Include/internal/pycore_bytes_methods.h | 3 + Lib/test/test_bytes.py | 2 +- ...5-08-27-13-11-47.gh-issue-71679.V0yFeT.rst | 3 + Objects/bytearrayobject.c | 94 +++---------------- Objects/bytesobject.c | 19 ++-- 5 files changed, 30 insertions(+), 91 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst diff --git a/Include/internal/pycore_bytes_methods.h b/Include/internal/pycore_bytes_methods.h index 059dc2599bbd77..0d9c7b11efb441 100644 --- a/Include/internal/pycore_bytes_methods.h +++ b/Include/internal/pycore_bytes_methods.h @@ -47,6 +47,9 @@ extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, /* The maketrans() static method. */ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); +/* Helper for repr(). */ +extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int); + /* Shared __doc__ strings. */ extern const char _Py_isspace__doc__[]; extern const char _Py_isalpha__doc__[]; diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 5e57b6d0eee5ba..f10e4041937f4f 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1979,7 +1979,7 @@ def test_bytearray_repr(self, f=repr): self.assertEqual(f(bytearray([7, 8, 9, 10, 11, 12, 13])), r"bytearray(b'\x07\x08\t\n\x0b\x0c\r')") self.assertEqual(f(bytearray(b'"')), """bytearray(b'"')""") # '"' - self.assertEqual(f(bytearray(b"'")), r'''bytearray(b"\'")''') # "\'" + self.assertEqual(f(bytearray(b"'")), '''bytearray(b"'")''') # "'" self.assertEqual(f(bytearray(b"'\"")), r"""bytearray(b'\'"')""") # '\'"' self.assertEqual(f(bytearray(b"\"'\"")), r"""bytearray(b'"\'"')""") # '"\'"' self.assertEqual(f(bytearray(b'\'"\'')), r"""bytearray(b'\'"\'')""") # '\'"\'' diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst new file mode 100644 index 00000000000000..b0e1e968249933 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst @@ -0,0 +1,3 @@ +Use the same quoting algorithm for the repr of bytearrays as for bytes +objects and strings -- use double quotes for quoting if the bytearray +contains single quotes and does not contain double quotes. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 3cb2d411a30b8b..775e0808b97a51 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1067,95 +1067,23 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg, return -1; } -/* Mostly copied from string_repr, but without the - "smart quote" functionality. */ static PyObject * bytearray_repr_lock_held(PyObject *op) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); - PyByteArrayObject *self = _PyByteArray_CAST(op); - const char *className = _PyType_Name(Py_TYPE(self)); - const char *quote_prefix = "(b"; - const char *quote_postfix = ")"; - Py_ssize_t length = Py_SIZE(self); - /* 6 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */ - Py_ssize_t newsize; - PyObject *v; - Py_ssize_t i; - char *bytes; - char c; - char *p; - int quote; - char *test, *start; - char *buffer; - - newsize = strlen(className); - if (length > (PY_SSIZE_T_MAX - 6 - newsize) / 4) { - PyErr_SetString(PyExc_OverflowError, - "bytearray object is too large to make repr"); - return NULL; - } - - newsize += 6 + length * 4; - buffer = PyMem_Malloc(newsize); - if (buffer == NULL) { - PyErr_NoMemory(); - return NULL; - } - - /* Figure out which quote to use; single is preferred */ - quote = '\''; - start = PyByteArray_AS_STRING(self); - for (test = start; test < start+length; ++test) { - if (*test == '"') { - quote = '\''; /* back to single */ - break; - } - else if (*test == '\'') - quote = '"'; - } - - p = buffer; - while (*className) - *p++ = *className++; - while (*quote_prefix) - *p++ = *quote_prefix++; - *p++ = quote; - - bytes = PyByteArray_AS_STRING(self); - for (i = 0; i < length; i++) { - /* There's at least enough room for a hex escape - and a closing quote. */ - assert(newsize - (p - buffer) >= 5); - c = bytes[i]; - if (c == '\'' || c == '\\') - *p++ = '\\', *p++ = c; - else if (c == '\t') - *p++ = '\\', *p++ = 't'; - else if (c == '\n') - *p++ = '\\', *p++ = 'n'; - else if (c == '\r') - *p++ = '\\', *p++ = 'r'; - else if (c == 0) - *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0'; - else if (c < ' ' || c >= 0x7f) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = Py_hexdigits[(c & 0xf0) >> 4]; - *p++ = Py_hexdigits[c & 0xf]; + const char *className = _PyType_Name(Py_TYPE(op)); + PyObject *bytes_repr = _Py_bytes_repr(PyByteArray_AS_STRING(op), + PyByteArray_GET_SIZE(op), 1); + if (bytes_repr == NULL) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_OverflowError, + "bytearray object is too large to make repr"); } - else - *p++ = c; - } - assert(newsize - (p - buffer) >= 1); - *p++ = quote; - while (*quote_postfix) { - *p++ = *quote_postfix++; + return NULL; } - - v = PyUnicode_FromStringAndSize(buffer, p - buffer); - PyMem_Free(buffer); - return v; + PyObject *res = PyUnicode_FromFormat("%s(%U)", className, bytes_repr); + Py_DECREF(bytes_repr); + return res; } static PyObject * diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index db82f7eb684f30..a3a999ee71a30e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1340,27 +1340,32 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack, PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { - PyBytesObject* op = (PyBytesObject*) obj; - Py_ssize_t i, length = Py_SIZE(op); + return _Py_bytes_repr(PyBytes_AS_STRING(obj), + PyBytes_GET_SIZE(obj), smartquotes); +} + +PyObject * +_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes) +{ + Py_ssize_t i; Py_ssize_t newsize, squotes, dquotes; PyObject *v; unsigned char quote; - const unsigned char *s; Py_UCS1 *p; /* Compute size of output string */ squotes = dquotes = 0; newsize = 3; /* b'' */ - s = (const unsigned char*)op->ob_sval; for (i = 0; i < length; i++) { + unsigned char c = data[i]; Py_ssize_t incr = 1; - switch(s[i]) { + switch(c) { case '\'': squotes++; break; case '"': dquotes++; break; case '\\': case '\t': case '\n': case '\r': incr = 2; break; /* \C */ default: - if (s[i] < ' ' || s[i] >= 0x7f) + if (c < ' ' || c >= 0x7f) incr = 4; /* \xHH */ } if (newsize > PY_SSIZE_T_MAX - incr) @@ -1384,7 +1389,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes) *p++ = 'b', *p++ = quote; for (i = 0; i < length; i++) { - unsigned char c = op->ob_sval[i]; + unsigned char c = data[i]; if (c == quote || c == '\\') *p++ = '\\', *p++ = c; else if (c == '\t') From 0f0e35fcd84e78dc76b4a764dd45ccb3bcb4c19c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 15 Sep 2025 17:14:52 +0300 Subject: [PATCH 2/4] Update Include/internal/pycore_bytes_methods.h Co-authored-by: Victor Stinner --- Include/internal/pycore_bytes_methods.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_bytes_methods.h b/Include/internal/pycore_bytes_methods.h index 0d9c7b11efb441..f95f00e962f69d 100644 --- a/Include/internal/pycore_bytes_methods.h +++ b/Include/internal/pycore_bytes_methods.h @@ -47,7 +47,7 @@ extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, /* The maketrans() static method. */ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); -/* Helper for repr(). */ +/* Helper for repr(bytes) and repr(bytearray). */ extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int); /* Shared __doc__ strings. */ From 524103ae24294ac3f35b29c55aad9bca47872995 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 15 Sep 2025 17:37:41 +0300 Subject: [PATCH 3/4] Address review comments. --- Include/internal/pycore_bytes_methods.h | 2 +- Objects/bytearrayobject.c | 7 ++----- Objects/bytesobject.c | 9 +++++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_bytes_methods.h b/Include/internal/pycore_bytes_methods.h index f95f00e962f69d..3e1474c1c010f9 100644 --- a/Include/internal/pycore_bytes_methods.h +++ b/Include/internal/pycore_bytes_methods.h @@ -48,7 +48,7 @@ extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); /* Helper for repr(bytes) and repr(bytearray). */ -extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int); +extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int, const char *); /* Shared __doc__ strings. */ extern const char _Py_isspace__doc__[]; diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 775e0808b97a51..c519485c1cc74c 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1073,12 +1073,9 @@ bytearray_repr_lock_held(PyObject *op) _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); const char *className = _PyType_Name(Py_TYPE(op)); PyObject *bytes_repr = _Py_bytes_repr(PyByteArray_AS_STRING(op), - PyByteArray_GET_SIZE(op), 1); + PyByteArray_GET_SIZE(op), 1, + "bytearray"); if (bytes_repr == NULL) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_OverflowError, - "bytearray object is too large to make repr"); - } return NULL; } PyObject *res = PyUnicode_FromFormat("%s(%U)", className, bytes_repr); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 9e0bf9f1053d90..be314b7e607549 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1340,11 +1340,12 @@ PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { return _Py_bytes_repr(PyBytes_AS_STRING(obj), - PyBytes_GET_SIZE(obj), smartquotes); + PyBytes_GET_SIZE(obj), smartquotes, "bytes"); } PyObject * -_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes) +_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes, + const char *classname) { Py_ssize_t i; Py_ssize_t newsize, squotes, dquotes; @@ -1411,8 +1412,8 @@ _Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes) return v; overflow: - PyErr_SetString(PyExc_OverflowError, - "bytes object is too large to make repr"); + PyErr_Format(PyExc_OverflowError, + "%s object is too large to make repr", classname); return NULL; } From 2e1b98f375d63cde2296abbf9f4dca5769313381 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 15 Sep 2025 18:33:07 +0300 Subject: [PATCH 4/4] Update Objects/bytesobject.c Co-authored-by: Victor Stinner --- Objects/bytesobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index be314b7e607549..fbed10b5f7c24e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1339,8 +1339,8 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack, PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { - return _Py_bytes_repr(PyBytes_AS_STRING(obj), - PyBytes_GET_SIZE(obj), smartquotes, "bytes"); + return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), + smartquotes, "bytes"); } PyObject *