@@ -374,6 +374,119 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
374374}
375375
376376
377+ static PyObject *
378+ test_unicodewriter_decode_utf8 (PyObject * self , PyObject * Py_UNUSED (args ))
379+ {
380+ // test PyUnicodeWriter_DecodeUTF8Stateful()
381+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
382+ if (writer == NULL ) {
383+ return NULL ;
384+ }
385+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "ign\xFFore" , -1 , "ignore" , NULL ) < 0 ) {
386+ goto error ;
387+ }
388+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
389+ goto error ;
390+ }
391+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "replace\xFF" , -1 , "replace" , NULL ) < 0 ) {
392+ goto error ;
393+ }
394+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
395+ goto error ;
396+ }
397+
398+ // incomplete trailing UTF-8 sequence
399+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "incomplete\xC3" , -1 , "replace" , NULL ) < 0 ) {
400+ goto error ;
401+ }
402+
403+ PyObject * result = PyUnicodeWriter_Finish (writer );
404+ if (result == NULL ) {
405+ return NULL ;
406+ }
407+ assert (PyUnicode_EqualToUTF8 (result ,
408+ "ignore-replace\xef\xbf\xbd"
409+ "-incomplete\xef\xbf\xbd" ));
410+ Py_DECREF (result );
411+
412+ Py_RETURN_NONE ;
413+
414+ error :
415+ PyUnicodeWriter_Discard (writer );
416+ return NULL ;
417+ }
418+
419+
420+ static PyObject *
421+ test_unicodewriter_decode_utf8_consumed (PyObject * self , PyObject * Py_UNUSED (args ))
422+ {
423+ // test PyUnicodeWriter_DecodeUTF8Stateful()
424+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
425+ if (writer == NULL ) {
426+ return NULL ;
427+ }
428+ Py_ssize_t consumed ;
429+
430+ // valid string
431+ consumed = 12345 ;
432+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "text" , -1 , NULL , & consumed ) < 0 ) {
433+ goto error ;
434+ }
435+ assert (consumed == 4 );
436+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
437+ goto error ;
438+ }
439+
440+ // non-ASCII
441+ consumed = 12345 ;
442+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "\xC3\xA9-\xE2\x82\xAC" , 6 , NULL , & consumed ) < 0 ) {
443+ goto error ;
444+ }
445+ assert (consumed == 6 );
446+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
447+ goto error ;
448+ }
449+
450+ // consumed is 0 if write fails
451+ consumed = 12345 ;
452+ assert (PyUnicodeWriter_DecodeUTF8Stateful (writer , "invalid\xFF" , -1 , NULL , & consumed ) < 0 );
453+ PyErr_Clear ();
454+ assert (consumed == 0 );
455+
456+ // ignore error handler
457+ consumed = 12345 ;
458+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "more\xFF" , -1 , "ignore" , & consumed ) < 0 ) {
459+ goto error ;
460+ }
461+ assert (consumed == 5 );
462+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
463+ goto error ;
464+ }
465+
466+ // incomplete trailing UTF-8 sequence
467+ consumed = 12345 ;
468+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "incomplete\xC3" , -1 , "ignore" , & consumed ) < 0 ) {
469+ goto error ;
470+ }
471+ assert (consumed == 10 );
472+
473+ PyObject * result = PyUnicodeWriter_Finish (writer );
474+ if (result == NULL ) {
475+ return NULL ;
476+ }
477+ assert (PyUnicode_EqualToUTF8 (result ,
478+ "text-\xC3\xA9-\xE2\x82\xAC-"
479+ "more-incomplete" ));
480+ Py_DECREF (result );
481+
482+ Py_RETURN_NONE ;
483+
484+ error :
485+ PyUnicodeWriter_Discard (writer );
486+ return NULL ;
487+ }
488+
489+
377490static PyObject *
378491test_unicodewriter_format (PyObject * self , PyObject * Py_UNUSED (args ))
379492{
@@ -436,6 +549,42 @@ test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args
436549}
437550
438551
552+ static PyObject *
553+ test_unicodewriter_widechar (PyObject * self , PyObject * Py_UNUSED (args ))
554+ {
555+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
556+ if (writer == NULL ) {
557+ return NULL ;
558+ }
559+ if (PyUnicodeWriter_WriteWideChar (writer , L"latin1=\xE9 IGNORED" , 8 ) < 0 ) {
560+ goto error ;
561+ }
562+ if (PyUnicodeWriter_WriteWideChar (writer , L"-" , 1 ) < 0 ) {
563+ goto error ;
564+ }
565+ if (PyUnicodeWriter_WriteWideChar (writer , L"euro=\u20AC" , -1 ) < 0 ) {
566+ goto error ;
567+ }
568+ if (PyUnicodeWriter_WriteChar (writer , '.' ) < 0 ) {
569+ goto error ;
570+ }
571+
572+ PyObject * result = PyUnicodeWriter_Finish (writer );
573+ if (result == NULL ) {
574+ return NULL ;
575+ }
576+ assert (PyUnicode_EqualToUTF8 (result ,
577+ "latin1=\xC3\xA9-euro=\xE2\x82\xAC." ));
578+ Py_DECREF (result );
579+
580+ Py_RETURN_NONE ;
581+
582+ error :
583+ PyUnicodeWriter_Discard (writer );
584+ return NULL ;
585+ }
586+
587+
439588static PyMethodDef TestMethods [] = {
440589 {"unicode_new" , unicode_new , METH_VARARGS },
441590 {"unicode_fill" , unicode_fill , METH_VARARGS },
@@ -448,8 +597,11 @@ static PyMethodDef TestMethods[] = {
448597 {"test_unicodewriter_utf8" , test_unicodewriter_utf8 , METH_NOARGS },
449598 {"test_unicodewriter_invalid_utf8" , test_unicodewriter_invalid_utf8 , METH_NOARGS },
450599 {"test_unicodewriter_recover_error" , test_unicodewriter_recover_error , METH_NOARGS },
600+ {"test_unicodewriter_decode_utf8" , test_unicodewriter_decode_utf8 , METH_NOARGS },
601+ {"test_unicodewriter_decode_utf8_consumed" , test_unicodewriter_decode_utf8_consumed , METH_NOARGS },
451602 {"test_unicodewriter_format" , test_unicodewriter_format , METH_NOARGS },
452603 {"test_unicodewriter_format_recover_error" , test_unicodewriter_format_recover_error , METH_NOARGS },
604+ {"test_unicodewriter_widechar" , test_unicodewriter_widechar , METH_NOARGS },
453605 {NULL },
454606};
455607
0 commit comments