@@ -257,16 +257,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
257
257
/* UTF-8 encoder specialized for a Unicode kind to avoid the slow
258
258
PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
259
259
UCS-1 strings don't need to handle surrogates for example. */
260
- Py_LOCAL_INLINE (char * )
261
- STRINGLIB (utf8_encoder )(_PyBytesWriter * writer ,
262
- PyObject * unicode ,
260
+ Py_LOCAL_INLINE (PyBytesWriter * )
261
+ STRINGLIB (utf8_encoder )(PyObject * unicode ,
263
262
const STRINGLIB_CHAR * data ,
264
263
Py_ssize_t size ,
265
264
_Py_error_handler error_handler ,
266
- const char * errors )
265
+ const char * errors ,
266
+ char * * end )
267
267
{
268
- Py_ssize_t i ; /* index into data of next input character */
269
- char * p ; /* next free byte in output buffer */
270
268
#if STRINGLIB_SIZEOF_CHAR > 1
271
269
PyObject * error_handler_obj = NULL ;
272
270
PyObject * exc = NULL ;
@@ -284,14 +282,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
284
282
if (size > PY_SSIZE_T_MAX / max_char_size ) {
285
283
/* integer overflow */
286
284
PyErr_NoMemory ();
285
+ * end = NULL ;
287
286
return NULL ;
288
287
}
289
288
290
- _PyBytesWriter_Init ( writer );
291
- p = _PyBytesWriter_Alloc (writer , size * max_char_size );
292
- if ( p == NULL )
289
+ PyBytesWriter * writer = PyBytesWriter_Create ( size * max_char_size );
290
+ if (writer == NULL ) {
291
+ * end = NULL ;
293
292
return NULL ;
293
+ }
294
+ /* next free byte in output buffer */
295
+ char * p = PyBytesWriter_GetData (writer );
294
296
297
+ Py_ssize_t i ; /* index into data of next input character */
295
298
for (i = 0 ; i < size ;) {
296
299
Py_UCS4 ch = data [i ++ ];
297
300
@@ -323,9 +326,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
323
326
while ((endpos < size ) && Py_UNICODE_IS_SURROGATE (data [endpos ]))
324
327
endpos ++ ;
325
328
326
- /* Only overallocate the buffer if it's not the last write */
327
- writer -> overallocate = (endpos < size );
328
-
329
329
switch (error_handler )
330
330
{
331
331
case _Py_ERROR_REPLACE :
@@ -347,8 +347,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
347
347
break ;
348
348
349
349
case _Py_ERROR_BACKSLASHREPLACE :
350
- /* subtract preallocated bytes */
351
- writer -> min_size -= max_char_size * (endpos - startpos );
352
350
p = backslashreplace (writer , p ,
353
351
unicode , startpos , endpos );
354
352
if (p == NULL )
@@ -357,8 +355,6 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
357
355
break ;
358
356
359
357
case _Py_ERROR_XMLCHARREFREPLACE :
360
- /* subtract preallocated bytes */
361
- writer -> min_size -= max_char_size * (endpos - startpos );
362
358
p = xmlcharrefreplace (writer , p ,
363
359
unicode , startpos , endpos );
364
360
if (p == NULL )
@@ -388,23 +384,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
388
384
goto error ;
389
385
390
386
if (newpos < startpos ) {
391
- writer -> overallocate = 1 ;
392
- p = _PyBytesWriter_Prepare ( writer , p ,
393
- max_char_size * ( startpos - newpos ) );
394
- if (p == NULL )
387
+ p = PyBytesWriter_GrowAndUpdatePointer ( writer ,
388
+ max_char_size * ( startpos - newpos ) ,
389
+ p );
390
+ if (p == NULL ) {
395
391
goto error ;
396
- }
397
- else {
398
- /* subtract preallocated bytes */
399
- writer -> min_size -= max_char_size * (newpos - startpos );
400
- /* Only overallocate the buffer if it's not the last write */
401
- writer -> overallocate = (newpos < size );
392
+ }
402
393
}
403
394
395
+ char * rep_str ;
396
+ Py_ssize_t rep_len ;
404
397
if (PyBytes_Check (rep )) {
405
- p = _PyBytesWriter_WriteBytes (writer , p ,
406
- PyBytes_AS_STRING (rep ),
407
- PyBytes_GET_SIZE (rep ));
398
+ rep_str = PyBytes_AS_STRING (rep );
399
+ rep_len = PyBytes_GET_SIZE (rep );
408
400
}
409
401
else {
410
402
/* rep is unicode */
@@ -415,21 +407,20 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
415
407
goto error ;
416
408
}
417
409
418
- p = _PyBytesWriter_WriteBytes (writer , p ,
419
- PyUnicode_DATA (rep ),
420
- PyUnicode_GET_LENGTH (rep ));
410
+ rep_str = PyUnicode_DATA (rep );
411
+ rep_len = PyUnicode_GET_LENGTH (rep );
421
412
}
422
413
423
- if (p == NULL )
414
+ p = PyBytesWriter_GrowAndUpdatePointer (writer , rep_len , p );
415
+ if (p == NULL ) {
424
416
goto error ;
417
+ }
418
+ memcpy (p , rep_str , rep_len );
419
+ p += rep_len ;
425
420
Py_CLEAR (rep );
426
421
427
422
i = newpos ;
428
423
}
429
-
430
- /* If overallocation was disabled, ensure that it was the last
431
- write. Otherwise, we missed an optimization */
432
- assert (writer -> overallocate || i == size );
433
424
}
434
425
else
435
426
#if STRINGLIB_SIZEOF_CHAR > 2
@@ -458,13 +449,15 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
458
449
Py_XDECREF (error_handler_obj );
459
450
Py_XDECREF (exc );
460
451
#endif
461
- return p ;
452
+ * end = p ;
453
+ return writer ;
462
454
463
455
#if STRINGLIB_SIZEOF_CHAR > 1
464
456
error :
465
457
Py_XDECREF (rep );
466
458
Py_XDECREF (error_handler_obj );
467
459
Py_XDECREF (exc );
460
+ * end = NULL ;
468
461
return NULL ;
469
462
#endif
470
463
}
0 commit comments