Skip to content

Commit 051d070

Browse files
committed
gh-129813, PEP 782: Use PyBytesWriter in utf8_encoder()
Replace the private _PyBytesWriter API with the new public PyBytesWriter API in utf8_encoder() and unicode_encode_ucs1().
1 parent 7168e98 commit 051d070

File tree

2 files changed

+88
-103
lines changed

2 files changed

+88
-103
lines changed

Objects/stringlib/codecs.h

Lines changed: 31 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -257,16 +257,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
257257
/* UTF-8 encoder specialized for a Unicode kind to avoid the slow
258258
PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
259259
UCS-1 strings don't need to handle surrogates for example. */
260-
Py_LOCAL_INLINE(char *)
261-
STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
262-
PyObject *unicode,
260+
Py_LOCAL_INLINE(PyBytesWriter*)
261+
STRINGLIB(utf8_encoder)(PyObject *unicode,
263262
const STRINGLIB_CHAR *data,
264263
Py_ssize_t size,
265264
_Py_error_handler error_handler,
266-
const char *errors)
265+
const char *errors,
266+
char **end)
267267
{
268-
Py_ssize_t i; /* index into data of next input character */
269-
char *p; /* next free byte in output buffer */
270268
#if STRINGLIB_SIZEOF_CHAR > 1
271269
PyObject *error_handler_obj = NULL;
272270
PyObject *exc = NULL;
@@ -284,14 +282,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
284282
if (size > PY_SSIZE_T_MAX / max_char_size) {
285283
/* integer overflow */
286284
PyErr_NoMemory();
285+
*end = NULL;
287286
return NULL;
288287
}
289288

290-
_PyBytesWriter_Init(writer);
291-
p = _PyBytesWriter_Alloc(writer, size * max_char_size);
292-
if (p == NULL)
289+
PyBytesWriter *writer = PyBytesWriter_Create(size * max_char_size);
290+
if (writer == NULL) {
291+
*end = NULL;
293292
return NULL;
293+
}
294+
/* next free byte in output buffer */
295+
char *p = PyBytesWriter_GetData(writer);
294296

297+
Py_ssize_t i; /* index into data of next input character */
295298
for (i = 0; i < size;) {
296299
Py_UCS4 ch = data[i++];
297300

@@ -323,9 +326,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
323326
while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))
324327
endpos++;
325328

326-
/* Only overallocate the buffer if it's not the last write */
327-
writer->overallocate = (endpos < size);
328-
329329
switch (error_handler)
330330
{
331331
case _Py_ERROR_REPLACE:
@@ -347,8 +347,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
347347
break;
348348

349349
case _Py_ERROR_BACKSLASHREPLACE:
350-
/* subtract preallocated bytes */
351-
writer->min_size -= max_char_size * (endpos - startpos);
352350
p = backslashreplace(writer, p,
353351
unicode, startpos, endpos);
354352
if (p == NULL)
@@ -357,8 +355,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
357355
break;
358356

359357
case _Py_ERROR_XMLCHARREFREPLACE:
360-
/* subtract preallocated bytes */
361-
writer->min_size -= max_char_size * (endpos - startpos);
362358
p = xmlcharrefreplace(writer, p,
363359
unicode, startpos, endpos);
364360
if (p == NULL)
@@ -388,23 +384,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
388384
goto error;
389385

390386
if (newpos < startpos) {
391-
writer->overallocate = 1;
392-
p = _PyBytesWriter_Prepare(writer, p,
393-
max_char_size * (startpos - newpos));
394-
if (p == NULL)
387+
p = PyBytesWriter_GrowAndUpdatePointer(writer,
388+
max_char_size * (startpos - newpos),
389+
p);
390+
if (p == NULL) {
395391
goto error;
396-
}
397-
else {
398-
/* subtract preallocated bytes */
399-
writer->min_size -= max_char_size * (newpos - startpos);
400-
/* Only overallocate the buffer if it's not the last write */
401-
writer->overallocate = (newpos < size);
392+
}
402393
}
403394

395+
char *rep_str;
396+
Py_ssize_t rep_len;
404397
if (PyBytes_Check(rep)) {
405-
p = _PyBytesWriter_WriteBytes(writer, p,
406-
PyBytes_AS_STRING(rep),
407-
PyBytes_GET_SIZE(rep));
398+
rep_str = PyBytes_AS_STRING(rep);
399+
rep_len = PyBytes_GET_SIZE(rep);
408400
}
409401
else {
410402
/* rep is unicode */
@@ -415,21 +407,20 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
415407
goto error;
416408
}
417409

418-
p = _PyBytesWriter_WriteBytes(writer, p,
419-
PyUnicode_DATA(rep),
420-
PyUnicode_GET_LENGTH(rep));
410+
rep_str = PyUnicode_DATA(rep);
411+
rep_len = PyUnicode_GET_LENGTH(rep);
421412
}
422413

423-
if (p == NULL)
414+
p = PyBytesWriter_GrowAndUpdatePointer(writer, rep_len, p);
415+
if (p == NULL) {
424416
goto error;
417+
}
418+
memcpy(p, rep_str, rep_len);
419+
p += rep_len;
425420
Py_CLEAR(rep);
426421

427422
i = newpos;
428423
}
429-
430-
/* If overallocation was disabled, ensure that it was the last
431-
write. Otherwise, we missed an optimization */
432-
assert(writer->overallocate || i == size);
433424
}
434425
else
435426
#if STRINGLIB_SIZEOF_CHAR > 2
@@ -458,13 +449,15 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
458449
Py_XDECREF(error_handler_obj);
459450
Py_XDECREF(exc);
460451
#endif
461-
return p;
452+
*end = p;
453+
return writer;
462454

463455
#if STRINGLIB_SIZEOF_CHAR > 1
464456
error:
465457
Py_XDECREF(rep);
466458
Py_XDECREF(error_handler_obj);
467459
Py_XDECREF(exc);
460+
*end = NULL;
468461
return NULL;
469462
#endif
470463
}

0 commit comments

Comments
 (0)