Skip to content

Commit a1cf6e9

Browse files
gh-71679: Share the repr implementation between bytes and bytearray (GH-138181)
This allows to use the smart quotes algorithm in the bytearray's repr.
1 parent cf9ef73 commit a1cf6e9

File tree

5 files changed

+30
-93
lines changed

5 files changed

+30
-93
lines changed

Include/internal/pycore_bytes_methods.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len,
4747
/* The maketrans() static method. */
4848
extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to);
4949

50+
/* Helper for repr(bytes) and repr(bytearray). */
51+
extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int, const char *);
52+
5053
/* Shared __doc__ strings. */
5154
extern const char _Py_isspace__doc__[];
5255
extern const char _Py_isalpha__doc__[];

Lib/test/test_bytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1979,7 +1979,7 @@ def test_bytearray_repr(self, f=repr):
19791979
self.assertEqual(f(bytearray([7, 8, 9, 10, 11, 12, 13])),
19801980
r"bytearray(b'\x07\x08\t\n\x0b\x0c\r')")
19811981
self.assertEqual(f(bytearray(b'"')), """bytearray(b'"')""") # '"'
1982-
self.assertEqual(f(bytearray(b"'")), r'''bytearray(b"\'")''') # "\'"
1982+
self.assertEqual(f(bytearray(b"'")), '''bytearray(b"'")''') # "'"
19831983
self.assertEqual(f(bytearray(b"'\"")), r"""bytearray(b'\'"')""") # '\'"'
19841984
self.assertEqual(f(bytearray(b"\"'\"")), r"""bytearray(b'"\'"')""") # '"\'"'
19851985
self.assertEqual(f(bytearray(b'\'"\'')), r"""bytearray(b'\'"\'')""") # '\'"\''
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Use the same quoting algorithm for the repr of bytearrays as for bytes
2+
objects and strings -- use double quotes for quoting if the bytearray
3+
contains single quotes and does not contain double quotes.

Objects/bytearrayobject.c

Lines changed: 8 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,95 +1067,20 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
10671067
return -1;
10681068
}
10691069

1070-
/* Mostly copied from string_repr, but without the
1071-
"smart quote" functionality. */
10721070
static PyObject *
10731071
bytearray_repr_lock_held(PyObject *op)
10741072
{
10751073
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
1076-
PyByteArrayObject *self = _PyByteArray_CAST(op);
1077-
const char *className = _PyType_Name(Py_TYPE(self));
1078-
const char *quote_prefix = "(b";
1079-
const char *quote_postfix = ")";
1080-
Py_ssize_t length = Py_SIZE(self);
1081-
/* 6 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */
1082-
Py_ssize_t newsize;
1083-
PyObject *v;
1084-
Py_ssize_t i;
1085-
char *bytes;
1086-
char c;
1087-
char *p;
1088-
int quote;
1089-
char *test, *start;
1090-
char *buffer;
1091-
1092-
newsize = strlen(className);
1093-
if (length > (PY_SSIZE_T_MAX - 6 - newsize) / 4) {
1094-
PyErr_SetString(PyExc_OverflowError,
1095-
"bytearray object is too large to make repr");
1096-
return NULL;
1097-
}
1098-
1099-
newsize += 6 + length * 4;
1100-
buffer = PyMem_Malloc(newsize);
1101-
if (buffer == NULL) {
1102-
PyErr_NoMemory();
1074+
const char *className = _PyType_Name(Py_TYPE(op));
1075+
PyObject *bytes_repr = _Py_bytes_repr(PyByteArray_AS_STRING(op),
1076+
PyByteArray_GET_SIZE(op), 1,
1077+
"bytearray");
1078+
if (bytes_repr == NULL) {
11031079
return NULL;
11041080
}
1105-
1106-
/* Figure out which quote to use; single is preferred */
1107-
quote = '\'';
1108-
start = PyByteArray_AS_STRING(self);
1109-
for (test = start; test < start+length; ++test) {
1110-
if (*test == '"') {
1111-
quote = '\''; /* back to single */
1112-
break;
1113-
}
1114-
else if (*test == '\'')
1115-
quote = '"';
1116-
}
1117-
1118-
p = buffer;
1119-
while (*className)
1120-
*p++ = *className++;
1121-
while (*quote_prefix)
1122-
*p++ = *quote_prefix++;
1123-
*p++ = quote;
1124-
1125-
bytes = PyByteArray_AS_STRING(self);
1126-
for (i = 0; i < length; i++) {
1127-
/* There's at least enough room for a hex escape
1128-
and a closing quote. */
1129-
assert(newsize - (p - buffer) >= 5);
1130-
c = bytes[i];
1131-
if (c == '\'' || c == '\\')
1132-
*p++ = '\\', *p++ = c;
1133-
else if (c == '\t')
1134-
*p++ = '\\', *p++ = 't';
1135-
else if (c == '\n')
1136-
*p++ = '\\', *p++ = 'n';
1137-
else if (c == '\r')
1138-
*p++ = '\\', *p++ = 'r';
1139-
else if (c == 0)
1140-
*p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
1141-
else if (c < ' ' || c >= 0x7f) {
1142-
*p++ = '\\';
1143-
*p++ = 'x';
1144-
*p++ = Py_hexdigits[(c & 0xf0) >> 4];
1145-
*p++ = Py_hexdigits[c & 0xf];
1146-
}
1147-
else
1148-
*p++ = c;
1149-
}
1150-
assert(newsize - (p - buffer) >= 1);
1151-
*p++ = quote;
1152-
while (*quote_postfix) {
1153-
*p++ = *quote_postfix++;
1154-
}
1155-
1156-
v = PyUnicode_FromStringAndSize(buffer, p - buffer);
1157-
PyMem_Free(buffer);
1158-
return v;
1081+
PyObject *res = PyUnicode_FromFormat("%s(%U)", className, bytes_repr);
1082+
Py_DECREF(bytes_repr);
1083+
return res;
11591084
}
11601085

11611086
static PyObject *

Objects/bytesobject.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,27 +1348,33 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
13481348
PyObject *
13491349
PyBytes_Repr(PyObject *obj, int smartquotes)
13501350
{
1351-
PyBytesObject* op = (PyBytesObject*) obj;
1352-
Py_ssize_t i, length = Py_SIZE(op);
1351+
return _Py_bytes_repr(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
1352+
smartquotes, "bytes");
1353+
}
1354+
1355+
PyObject *
1356+
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes,
1357+
const char *classname)
1358+
{
1359+
Py_ssize_t i;
13531360
Py_ssize_t newsize, squotes, dquotes;
13541361
PyObject *v;
13551362
unsigned char quote;
1356-
const unsigned char *s;
13571363
Py_UCS1 *p;
13581364

13591365
/* Compute size of output string */
13601366
squotes = dquotes = 0;
13611367
newsize = 3; /* b'' */
1362-
s = (const unsigned char*)op->ob_sval;
13631368
for (i = 0; i < length; i++) {
1369+
unsigned char c = data[i];
13641370
Py_ssize_t incr = 1;
1365-
switch(s[i]) {
1371+
switch(c) {
13661372
case '\'': squotes++; break;
13671373
case '"': dquotes++; break;
13681374
case '\\': case '\t': case '\n': case '\r':
13691375
incr = 2; break; /* \C */
13701376
default:
1371-
if (s[i] < ' ' || s[i] >= 0x7f)
1377+
if (c < ' ' || c >= 0x7f)
13721378
incr = 4; /* \xHH */
13731379
}
13741380
if (newsize > PY_SSIZE_T_MAX - incr)
@@ -1392,7 +1398,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
13921398

13931399
*p++ = 'b', *p++ = quote;
13941400
for (i = 0; i < length; i++) {
1395-
unsigned char c = op->ob_sval[i];
1401+
unsigned char c = data[i];
13961402
if (c == quote || c == '\\')
13971403
*p++ = '\\', *p++ = c;
13981404
else if (c == '\t')
@@ -1415,8 +1421,8 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
14151421
return v;
14161422

14171423
overflow:
1418-
PyErr_SetString(PyExc_OverflowError,
1419-
"bytes object is too large to make repr");
1424+
PyErr_Format(PyExc_OverflowError,
1425+
"%s object is too large to make repr", classname);
14201426
return NULL;
14211427
}
14221428

0 commit comments

Comments
 (0)