diff --git a/Include/internal/pycore_bytes_methods.h b/Include/internal/pycore_bytes_methods.h index 059dc2599bbd77..3e1474c1c010f9 100644 --- a/Include/internal/pycore_bytes_methods.h +++ b/Include/internal/pycore_bytes_methods.h @@ -47,6 +47,9 @@ extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, /* The maketrans() static method. */ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); +/* Helper for repr(bytes) and repr(bytearray). */ +extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int, const char *); + /* Shared __doc__ strings. */ extern const char _Py_isspace__doc__[]; extern const char _Py_isalpha__doc__[]; diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 5e57b6d0eee5ba..f10e4041937f4f 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1979,7 +1979,7 @@ def test_bytearray_repr(self, f=repr): self.assertEqual(f(bytearray([7, 8, 9, 10, 11, 12, 13])), r"bytearray(b'\x07\x08\t\n\x0b\x0c\r')") self.assertEqual(f(bytearray(b'"')), """bytearray(b'"')""") # '"' - self.assertEqual(f(bytearray(b"'")), r'''bytearray(b"\'")''') # "\'" + self.assertEqual(f(bytearray(b"'")), '''bytearray(b"'")''') # "'" self.assertEqual(f(bytearray(b"'\"")), r"""bytearray(b'\'"')""") # '\'"' self.assertEqual(f(bytearray(b"\"'\"")), r"""bytearray(b'"\'"')""") # '"\'"' self.assertEqual(f(bytearray(b'\'"\'')), r"""bytearray(b'\'"\'')""") # '\'"\'' diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst new file mode 100644 index 00000000000000..b0e1e968249933 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-13-11-47.gh-issue-71679.V0yFeT.rst @@ -0,0 +1,3 @@ +Use the same quoting algorithm for the repr of bytearrays as for bytes +objects and strings -- use double quotes for quoting if the bytearray +contains single quotes and does not contain double quotes. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 3cb2d411a30b8b..c519485c1cc74c 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1067,95 +1067,20 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg, return -1; } -/* Mostly copied from string_repr, but without the - "smart quote" functionality. */ static PyObject * bytearray_repr_lock_held(PyObject *op) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); - PyByteArrayObject *self = _PyByteArray_CAST(op); - const char *className = _PyType_Name(Py_TYPE(self)); - const char *quote_prefix = "(b"; - const char *quote_postfix = ")"; - Py_ssize_t length = Py_SIZE(self); - /* 6 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */ - Py_ssize_t newsize; - PyObject *v; - Py_ssize_t i; - char *bytes; - char c; - char *p; - int quote; - char *test, *start; - char *buffer; - - newsize = strlen(className); - if (length > (PY_SSIZE_T_MAX - 6 - newsize) / 4) { - PyErr_SetString(PyExc_OverflowError, - "bytearray object is too large to make repr"); - return NULL; - } - - newsize += 6 + length * 4; - buffer = PyMem_Malloc(newsize); - if (buffer == NULL) { - PyErr_NoMemory(); + const char *className = _PyType_Name(Py_TYPE(op)); + PyObject *bytes_repr = _Py_bytes_repr(PyByteArray_AS_STRING(op), + PyByteArray_GET_SIZE(op), 1, + "bytearray"); + if (bytes_repr == NULL) { return NULL; } - - /* Figure out which quote to use; single is preferred */ - quote = '\''; - start = PyByteArray_AS_STRING(self); - for (test = start; test < start+length; ++test) { - if (*test == '"') { - quote = '\''; /* back to single */ - break; - } - else if (*test == '\'') - quote = '"'; - } - - p = buffer; - while (*className) - *p++ = *className++; - while (*quote_prefix) - *p++ = *quote_prefix++; - *p++ = quote; - - bytes = PyByteArray_AS_STRING(self); - for (i = 0; i < length; i++) { - /* There's at least enough room for a hex escape - and a closing quote. */ - assert(newsize - (p - buffer) >= 5); - c = bytes[i]; - if (c == '\'' || c == '\\') - *p++ = '\\', *p++ = c; - else if (c == '\t') - *p++ = '\\', *p++ = 't'; - else if (c == '\n') - *p++ = '\\', *p++ = 'n'; - else if (c == '\r') - *p++ = '\\', *p++ = 'r'; - else if (c == 0) - *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0'; - else if (c < ' ' || c >= 0x7f) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = Py_hexdigits[(c & 0xf0) >> 4]; - *p++ = Py_hexdigits[c & 0xf]; - } - else - *p++ = c; - } - assert(newsize - (p - buffer) >= 1); - *p++ = quote; - while (*quote_postfix) { - *p++ = *quote_postfix++; - } - - v = PyUnicode_FromStringAndSize(buffer, p - buffer); - PyMem_Free(buffer); - return v; + PyObject *res = PyUnicode_FromFormat("%s(%U)", className, bytes_repr); + Py_DECREF(bytes_repr); + return res; } static PyObject * diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 3de57fe4e99e86..fbed10b5f7c24e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1339,27 +1339,33 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack, PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { - PyBytesObject* op = (PyBytesObject*) obj; - Py_ssize_t i, length = Py_SIZE(op); + return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), + smartquotes, "bytes"); +} + +PyObject * +_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes, + const char *classname) +{ + Py_ssize_t i; Py_ssize_t newsize, squotes, dquotes; PyObject *v; unsigned char quote; - const unsigned char *s; Py_UCS1 *p; /* Compute size of output string */ squotes = dquotes = 0; newsize = 3; /* b'' */ - s = (const unsigned char*)op->ob_sval; for (i = 0; i < length; i++) { + unsigned char c = data[i]; Py_ssize_t incr = 1; - switch(s[i]) { + switch(c) { case '\'': squotes++; break; case '"': dquotes++; break; case '\\': case '\t': case '\n': case '\r': incr = 2; break; /* \C */ default: - if (s[i] < ' ' || s[i] >= 0x7f) + if (c < ' ' || c >= 0x7f) incr = 4; /* \xHH */ } if (newsize > PY_SSIZE_T_MAX - incr) @@ -1383,7 +1389,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes) *p++ = 'b', *p++ = quote; for (i = 0; i < length; i++) { - unsigned char c = op->ob_sval[i]; + unsigned char c = data[i]; if (c == quote || c == '\\') *p++ = '\\', *p++ = c; else if (c == '\t') @@ -1406,8 +1412,8 @@ PyBytes_Repr(PyObject *obj, int smartquotes) return v; overflow: - PyErr_SetString(PyExc_OverflowError, - "bytes object is too large to make repr"); + PyErr_Format(PyExc_OverflowError, + "%s object is too large to make repr", classname); return NULL; }