diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 71c133f173f157..bba35daab23cf3 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -40,3 +40,31 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable) { return PyBytes_Join(sep, iterable); } + + +// --- PyBytesWriter API ----------------------------------------------------- + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( + Py_ssize_t prealloc); +PyAPI_FUNC(void) PyBytesWriter_Discard( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( + PyBytesWriter *writer); + +PyAPI_FUNC(void*) PyBytesWriter_Alloc( + PyBytesWriter *writer, + Py_ssize_t alloc); +PyAPI_FUNC(void*) PyBytesWriter_Extend( + PyBytesWriter *writer, + void *buf, + Py_ssize_t extend); +PyAPI_FUNC(int) PyBytesWriter_Truncate( + PyBytesWriter *writer, + void *buf); + +PyAPI_FUNC(int) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + const void *bytes, + Py_ssize_t size); diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 7c252f5b570c13..50c8e04c761e03 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -24,6 +24,7 @@ extern "C" { # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 # define Py_unicode_writers_MAXFREELIST 1 +# define Py_bytes_writers_MAXFREELIST 1 # define Py_pymethodobjects_MAXFREELIST 20 // A generic freelist of either PyObjects or other data structures. @@ -53,6 +54,7 @@ struct _Py_freelists { struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; struct _Py_freelist unicode_writers; + struct _Py_freelist bytes_writers; struct _Py_freelist pymethodobjects; }; diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 5b61c73381542d..df51f7295121d1 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -291,5 +291,70 @@ def test_join(self): bytes_join(b'', NULL) +class PyBytesWriterTest(unittest.TestCase): + def create_writer(self, prealloc): + return _testcapi.PyBytesWriter(prealloc) + + def test_empty(self): + # Test PyBytesWriter_Create() + writer = self.create_writer(0) + self.assertEqual(writer.finish(), b'') + + def test_write_bytes(self): + # Test PyBytesWriter_WriteBytes() + + writer = self.create_writer(0) + writer.write_bytes(b'Hello World!', -1) + self.assertEqual(writer.finish(), b'Hello World!') + + writer = self.create_writer(0) + writer.write_bytes(b'Hello ', -1) + writer.write_bytes(b'World! ', 6) + self.assertEqual(writer.finish(), b'Hello World!') + + def test_extend(self): + # Test PyBytesWriter_Extend() and PyBytesWriter_SetSizeFromBuf() + + writer = self.create_writer(0) + writer.extend(13, b'number=123456') + writer.extend(0, b'') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(0, b'') + writer.extend(13, b'number=123456') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(10, b'number=') + writer.extend(10, b'123456') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(7, b'number=') + writer.extend(0, b'') + writer.extend(6, b'123456') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer(0) + writer.extend(6, b'number') + writer.extend(1, b'=') + writer.extend(3, b'123') + writer.extend(3, b'456') + self.assertEqual(writer.finish(), b'number=123456') + + def test_hello_world_example(self): + self.assertEqual(_testcapi.byteswriter_hello_world(), + b'Hello World') + + def test_alloc_example(self): + self.assertEqual(_testcapi.byteswriter_alloc(), + b'abc') + + def test_extend_example(self): + self.assertEqual(_testcapi.byteswriter_extend(), + b'Hello World') + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d260f1a68f8c70..686133d8d86bfb 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2615,29 +2615,25 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj) static PyObject * raw_unicode_escape(PyObject *obj) { - char *p; - Py_ssize_t i, size; - const void *data; - int kind; - _PyBytesWriter writer; + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + const void *data = PyUnicode_DATA(obj); + int kind = PyUnicode_KIND(obj); - _PyBytesWriter_Init(&writer); - - size = PyUnicode_GET_LENGTH(obj); - data = PyUnicode_DATA(obj); - kind = PyUnicode_KIND(obj); - - p = _PyBytesWriter_Alloc(&writer, size); - if (p == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { + return NULL; + } + char *p = PyBytesWriter_Alloc(writer, size); + if (p == NULL) { goto error; - writer.overallocate = 1; + } - for (i=0; i < size; i++) { + for (Py_ssize_t i=0; i < size; i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 10-1); + p = PyBytesWriter_Extend(writer, p, 10-1); if (p == NULL) goto error; @@ -2658,7 +2654,7 @@ raw_unicode_escape(PyObject *obj) ch == 0x1a) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 6-1); + p = PyBytesWriter_Extend(writer, p, 6-1); if (p == NULL) goto error; @@ -2674,10 +2670,13 @@ raw_unicode_escape(PyObject *obj) *p++ = (char) ch; } - return _PyBytesWriter_Finish(&writer, p); + if (PyBytesWriter_Truncate(writer, p) < 0) { + goto error; + } + return PyBytesWriter_Finish(writer); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/_struct.c b/Modules/_struct.c index f04805d9d6d1d7..1e22cf55d5e57c 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2272,7 +2272,6 @@ strings."); static PyObject * s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { - char *buf; PyStructObject *soself; _structmodulestate *state = get_struct_state_structinst(self); @@ -2288,21 +2287,23 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } /* Allocate a new string */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - buf = _PyBytesWriter_Alloc(&writer, soself->s_size); + PyBytesWriter *writer = PyBytesWriter_Create(soself->s_size); + if (writer == NULL) { + return NULL; + } + char *buf = PyBytesWriter_Alloc(writer, soself->s_size); if (buf == NULL) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } /* Call the guts */ if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } - return _PyBytesWriter_Finish(&writer, buf + soself->s_size); + return PyBytesWriter_Finish(writer); } PyDoc_STRVAR(s_pack_into__doc__, diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 33903de14ba68d..fde395fce7e4cb 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -51,9 +51,259 @@ bytes_join(PyObject *Py_UNUSED(module), PyObject *args) } +// --- PyBytesWriter type --------------------------------------------------- + +typedef struct { + PyObject_HEAD + PyBytesWriter *writer; +} WriterObject; + + +static PyObject * +writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)type->tp_alloc(type, 0); + if (!self) { + return NULL; + } + self->writer = NULL; + return (PyObject*)self; +} + + +static int +writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)self_raw; + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + + if (kwargs && PyDict_GET_SIZE(kwargs)) { + PyErr_Format(PyExc_TypeError, + "PyBytesWriter() takes exactly no keyword arguments"); + return -1; + } + + Py_ssize_t prealloc; + if (!PyArg_ParseTuple(args, "n", &prealloc)) { + return -1; + } + + self->writer = PyBytesWriter_Create(prealloc); + if (self->writer == NULL) { + return -1; + } + return 0; +} + + +static void +writer_dealloc(PyObject *self_raw) +{ + WriterObject *self = (WriterObject *)self_raw; + PyTypeObject *tp = Py_TYPE(self); + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + tp->tp_free(self); + Py_DECREF(tp); +} + + +static inline int +writer_check(WriterObject *self) +{ + if (self->writer == NULL) { + PyErr_SetString(PyExc_ValueError, "operation on finished writer"); + return -1; + } + return 0; +} + + +static PyObject* +writer_write_bytes(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &str, &size)) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(self->writer, str, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_extend(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t extend; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, + "ny#", + &extend, &str, &str_size)) { + return NULL; + } + assert(extend >= str_size); + + char *buf = PyBytesWriter_Alloc(self->writer, extend); + if (buf == NULL) { + return NULL; + } + memcpy(buf, str, str_size); + buf += str_size; + + if (PyBytesWriter_Truncate(self->writer, buf) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + +static PyObject* +writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + PyObject *str = PyBytesWriter_Finish(self->writer); + self->writer = NULL; + return str; +} + + +static PyMethodDef writer_methods[] = { + {"write_bytes", writer_write_bytes, METH_VARARGS}, + {"extend", writer_extend, METH_VARARGS}, + {"finish", writer_finish, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_new, writer_new}, + {Py_tp_init, writer_init}, + {Py_tp_dealloc, writer_dealloc}, + {Py_tp_methods, writer_methods}, + {0, 0}, /* sentinel */ +}; + +static PyType_Spec Writer_spec = { + .name = "_testcapi.PyBytesWriter", + .basicsize = sizeof(WriterObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = Writer_Type_slots, +}; + + +static PyObject * +byteswriter_hello_world(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(0); + if (writer == NULL) { + return NULL; + } + + char *buf = PyBytesWriter_Alloc(writer, 20); + if (buf == NULL) { + goto error; + } + memcpy(buf, "Hello World", strlen("Hello World")); + buf += strlen("Hello World"); + if (PyBytesWriter_Truncate(writer, buf) < 0) { + goto error; + } + + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +static PyObject * +byteswriter_alloc(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(3); + if (writer == NULL) { + return NULL; + } + + // Allocate 10 bytes + char *buf = PyBytesWriter_Alloc(writer, 3); + if (buf == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + memcpy(buf, "abc", 3); + return PyBytesWriter_Finish(writer); +} + + +static PyObject * +byteswriter_extend(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(0); + if (writer == NULL) { + return NULL; + } + + // Allocate 10 bytes + char *buf = PyBytesWriter_Alloc(writer, 10); + if (buf == NULL) { + goto error; + } + + // Write some bytes + memcpy(buf, "Hello ", strlen("Hello ")); + buf += strlen("Hello "); + + // Allocate 10 more bytes + buf = PyBytesWriter_Extend(writer, buf, 10); + if (buf == NULL) { + goto error; + } + + // Write more bytes + memcpy(buf, "World", strlen("World")); + buf += strlen("World"); + + // Truncate to len("Hello World") bytes + if (PyBytesWriter_Truncate(writer, buf) < 0) { + goto error; + } + + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, + {"byteswriter_hello_world", byteswriter_hello_world, METH_NOARGS}, + {"byteswriter_alloc", byteswriter_alloc, METH_NOARGS}, + {"byteswriter_extend", byteswriter_extend, METH_NOARGS}, {NULL}, }; @@ -64,5 +314,15 @@ _PyTestCapi_Init_Bytes(PyObject *m) return -1; } + PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec); + if (writer_type == NULL) { + return -1; + } + if (PyModule_AddType(m, writer_type) < 0) { + Py_DECREF(writer_type); + return -1; + } + Py_DECREF(writer_type); + return 0; } diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..3c5903ef7623f1 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -302,16 +302,13 @@ static PyObject * binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) /*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; binascii_state *state; Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); bin_data = data->buf; bin_len = data->len; if ( bin_len > 45 ) { @@ -326,9 +323,16 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) /* We're lazy and allocate to much (fixed up later) */ out_len = 2 + (bin_len + 2) / 3 * 4; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { return NULL; + } + unsigned char *ascii_start = PyBytesWriter_Alloc(writer, out_len); + if (ascii_start == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + unsigned char *ascii_data = ascii_start; /* Store the length */ if (backtick && !bin_len) @@ -356,7 +360,8 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + assert((ascii_data - ascii_start) == out_len); + return PyBytesWriter_Finish(writer); } /*[clinic input] @@ -387,12 +392,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) /* Allocate the buffer */ Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len); - if (bin_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { return NULL; - unsigned char *bin_data_start = bin_data; + } + unsigned char *bin_data_start = PyBytesWriter_Alloc(writer, bin_len); + if (bin_data_start == NULL) { + goto error_end; + } + unsigned char *bin_data = bin_data_start; if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { state = get_binascii_state(module); @@ -502,13 +510,18 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) } else { PyErr_SetString(state->Error, "Incorrect padding"); } - error_end: - _PyBytesWriter_Dealloc(&writer); - return NULL; + goto error_end; } done: - return _PyBytesWriter_Finish(&writer, bin_data); + if (PyBytesWriter_Truncate(writer, bin_data) < 0) { + goto error_end; + } + return PyBytesWriter_Finish(writer); + +error_end: + PyBytesWriter_Discard(writer); + return NULL; } @@ -527,18 +540,15 @@ static PyObject * binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; binascii_state *state; bin_data = data->buf; bin_len = data->len; - _PyBytesWriter_Init(&writer); assert(bin_len >= 0); @@ -557,9 +567,14 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) out_len = bin_len*2 + 2; if (newline) out_len++; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { return NULL; + } + unsigned char *ascii_data = PyBytesWriter_Alloc(writer, out_len); + if (ascii_data == NULL) { + goto error; + } for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ @@ -584,7 +599,14 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + if (PyBytesWriter_Truncate(writer, ascii_data) < 0) { + goto error; + } + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bdb389eb25823e..2e6eb11b60c421 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE() #include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue @@ -198,7 +199,6 @@ PyBytes_FromString(const char *str) PyObject * PyBytes_FromFormatV(const char *format, va_list vargs) { - char *s; const char *f; const char *p; Py_ssize_t prec; @@ -212,21 +212,29 @@ PyBytes_FromFormatV(const char *format, va_list vargs) Longest 64-bit pointer representation: "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - s = _PyBytesWriter_Alloc(&writer, strlen(format)); - if (s == NULL) + size_t format_len = strlen(format); + PyBytesWriter *writer = PyBytesWriter_Create(format_len); + if (writer == NULL) { return NULL; - writer.overallocate = 1; + } -#define WRITE_BYTES(str) \ + char *s = PyBytesWriter_Alloc(writer, format_len); + if (s == NULL) { + goto error; + } + +#define WRITE_BYTES_LEN(str, len_expr) \ do { \ - s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ - if (s == NULL) \ + size_t len = (len_expr); \ + s = PyBytesWriter_Extend(writer, s, len); \ + if (s == NULL) { \ goto error; \ + } \ + memcpy(s, (str), len); \ + s += len; \ } while (0) +#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str)) for (f = format; *f; f++) { if (*f != '%') { @@ -267,10 +275,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) ++f; } - /* subtract bytes preallocated for the format string - (ex: 2 for "%s") */ - writer.min_size -= (f - p + 1); - switch (*f) { case 'c': { @@ -281,7 +285,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) "expects an integer in range [0; 255]"); goto error; } - writer.min_size++; *s++ = (unsigned char)c; break; } @@ -340,9 +343,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) i++; } } - s = _PyBytesWriter_WriteBytes(&writer, s, p, i); - if (s == NULL) - goto error; + WRITE_BYTES_LEN(p, i); break; } @@ -361,28 +362,26 @@ PyBytes_FromFormatV(const char *format, va_list vargs) break; case '%': - writer.min_size++; *s++ = '%'; break; default: - if (*f == 0) { - /* fix min_size if we reached the end of the format string */ - writer.min_size++; - } - /* invalid format string: copy unformatted string and exit */ WRITE_BYTES(p); - return _PyBytesWriter_Finish(&writer, s); + goto done; } } - #undef WRITE_BYTES +#undef WRITE_BYTES_LEN - return _PyBytesWriter_Finish(&writer, s); +done: + if (PyBytesWriter_Truncate(writer, s) < 0) { + goto error; + } + return PyBytesWriter_Finish(writer); - error: - _PyBytesWriter_Dealloc(&writer); +error: + PyBytesWriter_Discard(writer); return NULL; } @@ -1083,16 +1082,17 @@ PyObject *_PyBytes_DecodeEscape(const char *s, const char **first_invalid_escape) { int c; - char *p; const char *end; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - p = _PyBytesWriter_Alloc(&writer, len); - if (p == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(len); + if (writer == NULL) { return NULL; - writer.overallocate = 1; + } + + char *p = PyBytesWriter_Alloc(writer, len); + if (p == NULL) { + goto failed; + } *first_invalid_escape = NULL; @@ -1184,10 +1184,13 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } } - return _PyBytesWriter_Finish(&writer, p); + if (PyBytesWriter_Truncate(writer, p) < 0) { + goto failed; + } + return PyBytesWriter_Finish(writer); failed: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -3728,3 +3731,219 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, } } + +// --- PyBytesWriter API ----------------------------------------------------- + +struct PyBytesWriter { + char small_buffer[256]; + PyObject *obj; + Py_ssize_t size; +}; + + +static inline char* +byteswriter_start(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return writer->small_buffer; + } + else { + return PyBytes_AS_STRING(writer->obj); + } +} + + +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + + +static inline char* +byteswriter_alloc(PyBytesWriter *writer, Py_ssize_t size, int overallocate) +{ + if (writer->obj == NULL) { + if ((size_t)size <= sizeof(writer->small_buffer)) { + return writer->small_buffer; + } + } + else { + if (size <= PyBytes_GET_SIZE(writer->obj)) { + return PyBytes_AS_STRING(writer->obj); + } + } + + if (overallocate) { + if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) { + size += size / OVERALLOCATE_FACTOR; + } + } + + if (writer->obj != NULL) { + if (_PyBytes_Resize(&writer->obj, size)) { + return NULL; + } + assert(writer->obj != NULL); + } + else { + writer->obj = PyBytes_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return NULL; + } + if (writer->size) { + memcpy(PyBytes_AS_STRING(writer->obj), + writer->small_buffer, + writer->size); + } + } + return byteswriter_start(writer); +} + + +PyBytesWriter* +PyBytesWriter_Create(Py_ssize_t prealloc) +{ + if (prealloc < 0) { + PyErr_SetString(PyExc_ValueError, "prealloc must be >= 0"); + return NULL; + } + + PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers); + if (writer == NULL) { + writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + return NULL; + } + } + writer->obj = NULL; + writer->size = 0; + + if (prealloc >= 1) { + char *start = byteswriter_alloc(writer, prealloc, 0); + if (start == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + } + return writer; +} + + +void +PyBytesWriter_Discard(PyBytesWriter *writer) +{ + if (writer == NULL) { + return; + } + + Py_XDECREF(writer->obj); + _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free); +} + + +PyObject* +PyBytesWriter_Finish(PyBytesWriter *writer) +{ + Py_ssize_t final_size = writer->size; + writer->size = -1; + + PyObject *result; + if (final_size == 0) { + result = bytes_get_empty(); + } + else if (writer->obj != NULL) { + if (final_size != PyBytes_GET_SIZE(writer->obj)) { + if (_PyBytes_Resize(&writer->obj, final_size)) { + goto error; + } + } + result = writer->obj; + writer->obj = NULL; + } + else { + result = PyBytes_FromStringAndSize(writer->small_buffer, final_size); + } + PyBytesWriter_Discard(writer); + return result; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +int +PyBytesWriter_Truncate(PyBytesWriter *writer, void *buf) +{ + char *start = byteswriter_start(writer); + if ((char*)buf < start || (start + writer->size < (char*)buf)) { + PyErr_SetString(PyExc_ValueError, "invalid buffer pointer"); + return -1; + } + writer->size = (char*)buf - start; + return 0; +} + + +void* +PyBytesWriter_Alloc(PyBytesWriter *writer, Py_ssize_t alloc) +{ + Py_ssize_t size = writer->size; + if (alloc > PY_SSIZE_T_MAX - size) { + PyErr_NoMemory(); + return NULL; + } + size += alloc; + + Py_ssize_t pos = writer->size; + char *start = byteswriter_alloc(writer, size, 1); + if (start == NULL) { + return NULL; + } + writer->size = size; + + return start + pos; +} + + +void* +PyBytesWriter_Extend(PyBytesWriter *writer, void *buf, Py_ssize_t extend) +{ + Py_ssize_t alloc_size = writer->size; + if (extend > PY_SSIZE_T_MAX - alloc_size) { + PyErr_NoMemory(); + return NULL; + } + alloc_size += extend; + + Py_ssize_t pos = (char*)buf - byteswriter_start(writer); + char *start = byteswriter_alloc(writer, alloc_size, 1); + if (start == NULL) { + return NULL; + } + writer->size = alloc_size; + + return start + pos; +} + + +int +PyBytesWriter_WriteBytes(PyBytesWriter *writer, + const void *bytes, Py_ssize_t size) +{ + if (size < 0) { + size = strlen(bytes); + } + + void *buf = PyBytesWriter_Alloc(writer, size); + if (buf == NULL) { + return -1; + } + + memcpy(buf, bytes, size); + return 0; +} diff --git a/Objects/object.c b/Objects/object.c index 4eff24bc3a9507..3edb670073c22d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -942,6 +942,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->bytes_writers, is_finalization, PyMem_Free); clear_freelist(&freelists->ints, is_finalization, free_object); clear_freelist(&freelists->pymethodobjects, is_finalization, free_object); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3e0bd90c17995f..8e113a6b9b012b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -46,7 +46,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_codecs.h" // _PyCodec_Lookup() #include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST #include "pycore_format.h" // F_LJUST -#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() +#include "pycore_freelist.h" // _Py_FREELIST_FREE() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_FormatWriter()