Skip to content

Commit d807b18

Browse files
committed
gh-139156: Use PyBytesWriter in _PyUnicode_EncodeCharmap()
Replace PyBytes_FromStringAndSize() and _PyBytes_Resize() with the PyBytesWriter API.
1 parent c863349 commit d807b18

File tree

1 file changed

+48
-52
lines changed

1 file changed

+48
-52
lines changed

Objects/unicodeobject.c

Lines changed: 48 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8858,15 +8858,13 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping, unsigned char *replace)
88588858
}
88598859

88608860
static int
8861-
charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
8861+
charmapencode_resize(PyBytesWriter *writer, Py_ssize_t *outpos, Py_ssize_t requiredsize)
88628862
{
8863-
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
8863+
Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
88648864
/* exponentially overallocate to minimize reallocations */
8865-
if (requiredsize < 2*outsize)
8866-
requiredsize = 2*outsize;
8867-
if (_PyBytes_Resize(outobj, requiredsize))
8868-
return -1;
8869-
return 0;
8865+
if (requiredsize < 2 * outsize)
8866+
requiredsize = 2 * outsize;
8867+
return PyBytesWriter_Resize(writer, requiredsize);
88708868
}
88718869

88728870
typedef enum charmapencode_result {
@@ -8880,22 +8878,22 @@ typedef enum charmapencode_result {
88808878
reallocation error occurred. The caller must decref the result */
88818879
static charmapencode_result
88828880
charmapencode_output(Py_UCS4 c, PyObject *mapping,
8883-
PyObject **outobj, Py_ssize_t *outpos)
8881+
PyBytesWriter *writer, Py_ssize_t *outpos)
88848882
{
88858883
PyObject *rep;
88868884
unsigned char replace;
88878885
char *outstart;
8888-
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
8886+
Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
88898887

88908888
if (Py_IS_TYPE(mapping, &EncodingMapType)) {
88918889
int res = encoding_map_lookup(c, mapping);
88928890
Py_ssize_t requiredsize = *outpos+1;
88938891
if (res == -1)
88948892
return enc_FAILED;
88958893
if (outsize<requiredsize)
8896-
if (charmapencode_resize(outobj, outpos, requiredsize))
8894+
if (charmapencode_resize(writer, outpos, requiredsize))
88978895
return enc_EXCEPTION;
8898-
outstart = PyBytes_AS_STRING(*outobj);
8896+
outstart = PyBytesWriter_GetData(writer);
88998897
outstart[(*outpos)++] = (char)res;
89008898
return enc_SUCCESS;
89018899
}
@@ -8910,23 +8908,23 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
89108908
if (PyLong_Check(rep)) {
89118909
Py_ssize_t requiredsize = *outpos+1;
89128910
if (outsize<requiredsize)
8913-
if (charmapencode_resize(outobj, outpos, requiredsize)) {
8911+
if (charmapencode_resize(writer, outpos, requiredsize)) {
89148912
Py_DECREF(rep);
89158913
return enc_EXCEPTION;
89168914
}
8917-
outstart = PyBytes_AS_STRING(*outobj);
8915+
outstart = PyBytesWriter_GetData(writer);
89188916
outstart[(*outpos)++] = (char)replace;
89198917
}
89208918
else {
89218919
const char *repchars = PyBytes_AS_STRING(rep);
89228920
Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
89238921
Py_ssize_t requiredsize = *outpos+repsize;
89248922
if (outsize<requiredsize)
8925-
if (charmapencode_resize(outobj, outpos, requiredsize)) {
8923+
if (charmapencode_resize(writer, outpos, requiredsize)) {
89268924
Py_DECREF(rep);
89278925
return enc_EXCEPTION;
89288926
}
8929-
outstart = PyBytes_AS_STRING(*outobj);
8927+
outstart = PyBytesWriter_GetData(writer);
89308928
memcpy(outstart + *outpos, repchars, repsize);
89318929
*outpos += repsize;
89328930
}
@@ -8942,7 +8940,7 @@ charmap_encoding_error(
89428940
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
89438941
PyObject **exceptionObject,
89448942
_Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors,
8945-
PyObject **res, Py_ssize_t *respos)
8943+
PyBytesWriter *writer, Py_ssize_t *respos)
89468944
{
89478945
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
89488946
Py_ssize_t size, repsize;
@@ -8997,7 +8995,7 @@ charmap_encoding_error(
89978995

89988996
case _Py_ERROR_REPLACE:
89998997
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
9000-
x = charmapencode_output('?', mapping, res, respos);
8998+
x = charmapencode_output('?', mapping, writer, respos);
90018999
if (x==enc_EXCEPTION) {
90029000
return -1;
90039001
}
@@ -9018,7 +9016,7 @@ charmap_encoding_error(
90189016
char *cp;
90199017
sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos));
90209018
for (cp = buffer; *cp; ++cp) {
9021-
x = charmapencode_output(*cp, mapping, res, respos);
9019+
x = charmapencode_output(*cp, mapping, writer, respos);
90229020
if (x==enc_EXCEPTION)
90239021
return -1;
90249022
else if (x==enc_FAILED) {
@@ -9038,17 +9036,17 @@ charmap_encoding_error(
90389036
return -1;
90399037
if (PyBytes_Check(repunicode)) {
90409038
/* Directly copy bytes result to output. */
9041-
Py_ssize_t outsize = PyBytes_Size(*res);
9039+
Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
90429040
Py_ssize_t requiredsize;
90439041
repsize = PyBytes_Size(repunicode);
90449042
requiredsize = *respos + repsize;
90459043
if (requiredsize > outsize)
90469044
/* Make room for all additional bytes. */
9047-
if (charmapencode_resize(res, respos, requiredsize)) {
9045+
if (charmapencode_resize(writer, respos, requiredsize)) {
90489046
Py_DECREF(repunicode);
90499047
return -1;
90509048
}
9051-
memcpy(PyBytes_AsString(*res) + *respos,
9049+
memcpy(PyBytesWriter_GetData(writer) + *respos,
90529050
PyBytes_AsString(repunicode), repsize);
90539051
*respos += repsize;
90549052
*inpos = newpos;
@@ -9061,7 +9059,7 @@ charmap_encoding_error(
90619059
kind = PyUnicode_KIND(repunicode);
90629060
for (index = 0; index < repsize; index++) {
90639061
Py_UCS4 repch = PyUnicode_READ(kind, data, index);
9064-
x = charmapencode_output(repch, mapping, res, respos);
9062+
x = charmapencode_output(repch, mapping, writer, respos);
90659063
if (x==enc_EXCEPTION) {
90669064
Py_DECREF(repunicode);
90679065
return -1;
@@ -9083,65 +9081,63 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
90839081
PyObject *mapping,
90849082
const char *errors)
90859083
{
9084+
/* Default to Latin-1 */
9085+
if (mapping == NULL) {
9086+
return unicode_encode_ucs1(unicode, errors, 256);
9087+
}
9088+
9089+
Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
9090+
if (size == 0) {
9091+
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
9092+
}
9093+
const void *data = PyUnicode_DATA(unicode);
9094+
int kind = PyUnicode_KIND(unicode);
9095+
90869096
/* output object */
9087-
PyObject *res = NULL;
9097+
PyBytesWriter *writer;
9098+
/* allocate enough for a simple encoding without
9099+
replacements, if we need more, we'll resize */
9100+
writer = PyBytesWriter_Create(size);
9101+
if (writer == NULL) {
9102+
goto onError;
9103+
}
9104+
90889105
/* current input position */
90899106
Py_ssize_t inpos = 0;
9090-
Py_ssize_t size;
90919107
/* current output position */
90929108
Py_ssize_t respos = 0;
90939109
PyObject *error_handler_obj = NULL;
90949110
PyObject *exc = NULL;
90959111
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
9096-
const void *data;
9097-
int kind;
9098-
9099-
size = PyUnicode_GET_LENGTH(unicode);
9100-
data = PyUnicode_DATA(unicode);
9101-
kind = PyUnicode_KIND(unicode);
9102-
9103-
/* Default to Latin-1 */
9104-
if (mapping == NULL)
9105-
return unicode_encode_ucs1(unicode, errors, 256);
9106-
9107-
/* allocate enough for a simple encoding without
9108-
replacements, if we need more, we'll resize */
9109-
res = PyBytes_FromStringAndSize(NULL, size);
9110-
if (res == NULL)
9111-
goto onError;
9112-
if (size == 0)
9113-
return res;
91149112

91159113
while (inpos<size) {
91169114
Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
91179115
/* try to encode it */
9118-
charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
9116+
charmapencode_result x = charmapencode_output(ch, mapping, writer, &respos);
91199117
if (x==enc_EXCEPTION) /* error */
91209118
goto onError;
91219119
if (x==enc_FAILED) { /* unencodable character */
91229120
if (charmap_encoding_error(unicode, &inpos, mapping,
91239121
&exc,
91249122
&error_handler, &error_handler_obj, errors,
9125-
&res, &respos)) {
9123+
writer, &respos)) {
91269124
goto onError;
91279125
}
91289126
}
9129-
else
9127+
else {
91309128
/* done with this character => adjust input position */
91319129
++inpos;
9130+
}
91329131
}
91339132

9134-
/* Resize if we allocated to much */
9135-
if (respos<PyBytes_GET_SIZE(res))
9136-
if (_PyBytes_Resize(&res, respos) < 0)
9137-
goto onError;
9138-
91399133
Py_XDECREF(exc);
91409134
Py_XDECREF(error_handler_obj);
9141-
return res;
9135+
9136+
/* Resize if we allocated too much */
9137+
return PyBytesWriter_FinishWithSize(writer, respos);
91429138

91439139
onError:
9144-
Py_XDECREF(res);
9140+
PyBytesWriter_Discard(writer);
91459141
Py_XDECREF(exc);
91469142
Py_XDECREF(error_handler_obj);
91479143
return NULL;

0 commit comments

Comments
 (0)