@@ -113,7 +113,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently
113113
114114static inline char * _PyUnicode_UTF8 (PyObject * op )
115115{
116- return (_PyCompactUnicodeObject_CAST (op )-> utf8 );
116+ return FT_ATOMIC_LOAD_PTR_ACQUIRE (_PyCompactUnicodeObject_CAST (op )-> utf8 );
117117}
118118
119119static inline char * PyUnicode_UTF8 (PyObject * op )
@@ -129,7 +129,7 @@ static inline char* PyUnicode_UTF8(PyObject *op)
129129
130130static inline void PyUnicode_SET_UTF8 (PyObject * op , char * utf8 )
131131{
132- _PyCompactUnicodeObject_CAST (op )-> utf8 = utf8 ;
132+ FT_ATOMIC_STORE_PTR_RELEASE ( _PyCompactUnicodeObject_CAST (op )-> utf8 , utf8 ) ;
133133}
134134
135135static inline Py_ssize_t PyUnicode_UTF8_LENGTH (PyObject * op )
@@ -683,7 +683,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
683683 || kind == PyUnicode_2BYTE_KIND
684684 || kind == PyUnicode_4BYTE_KIND );
685685 CHECK (ascii -> state .ascii == 0 );
686- CHECK (compact -> utf8 != data );
686+ CHECK (_PyUnicode_UTF8 ( op ) != data );
687687 }
688688 else {
689689 PyUnicodeObject * unicode = _PyUnicodeObject_CAST (op );
@@ -695,16 +695,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
695695 CHECK (ascii -> state .compact == 0 );
696696 CHECK (data != NULL );
697697 if (ascii -> state .ascii ) {
698- CHECK (compact -> utf8 == data );
698+ CHECK (_PyUnicode_UTF8 ( op ) == data );
699699 CHECK (compact -> utf8_length == ascii -> length );
700700 }
701701 else {
702- CHECK (compact -> utf8 != data );
702+ CHECK (_PyUnicode_UTF8 ( op ) != data );
703703 }
704704 }
705-
706- if (compact -> utf8 == NULL )
705+ #ifndef Py_GIL_DISABLED
706+ if (_PyUnicode_UTF8 ( op ) == NULL )
707707 CHECK (compact -> utf8_length == 0 );
708+ #endif
708709 }
709710
710711 /* check that the best kind is used: O(n) operation */
@@ -1148,8 +1149,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
11481149
11491150 if (_PyUnicode_HAS_UTF8_MEMORY (unicode )) {
11501151 PyMem_Free (_PyUnicode_UTF8 (unicode ));
1151- PyUnicode_SET_UTF8 (unicode , NULL );
11521152 PyUnicode_SET_UTF8_LENGTH (unicode , 0 );
1153+ PyUnicode_SET_UTF8 (unicode , NULL );
11531154 }
11541155#ifdef Py_TRACE_REFS
11551156 _Py_ForgetReference (unicode );
@@ -1202,8 +1203,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12021203 if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY (unicode ))
12031204 {
12041205 PyMem_Free (_PyUnicode_UTF8 (unicode ));
1205- PyUnicode_SET_UTF8 (unicode , NULL );
12061206 PyUnicode_SET_UTF8_LENGTH (unicode , 0 );
1207+ PyUnicode_SET_UTF8 (unicode , NULL );
12071208 }
12081209
12091210 data = (PyObject * )PyObject_Realloc (data , new_size );
@@ -1213,8 +1214,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12131214 }
12141215 _PyUnicode_DATA_ANY (unicode ) = data ;
12151216 if (share_utf8 ) {
1216- PyUnicode_SET_UTF8 (unicode , data );
12171217 PyUnicode_SET_UTF8_LENGTH (unicode , length );
1218+ PyUnicode_SET_UTF8 (unicode , data );
12181219 }
12191220 _PyUnicode_LENGTH (unicode ) = length ;
12201221 PyUnicode_WRITE (PyUnicode_KIND (unicode ), data , length , 0 );
@@ -4085,6 +4086,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
40854086
40864087static int unicode_fill_utf8 (PyObject * unicode );
40874088
4089+
4090+ static int
4091+ unicode_ensure_utf8 (PyObject * unicode )
4092+ {
4093+ int err = 0 ;
4094+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4095+ Py_BEGIN_CRITICAL_SECTION (unicode );
4096+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4097+ err = unicode_fill_utf8 (unicode );
4098+ }
4099+ Py_END_CRITICAL_SECTION ();
4100+ }
4101+ return err ;
4102+ }
4103+
40884104const char *
40894105PyUnicode_AsUTF8AndSize (PyObject * unicode , Py_ssize_t * psize )
40904106{
@@ -4096,13 +4112,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
40964112 return NULL ;
40974113 }
40984114
4099- if (PyUnicode_UTF8 (unicode ) == NULL ) {
4100- if (unicode_fill_utf8 (unicode ) == -1 ) {
4101- if (psize ) {
4102- * psize = -1 ;
4103- }
4104- return NULL ;
4115+ if (unicode_ensure_utf8 (unicode ) == -1 ) {
4116+ if (psize ) {
4117+ * psize = -1 ;
41054118 }
4119+ return NULL ;
41064120 }
41074121
41084122 if (psize ) {
@@ -5434,6 +5448,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
54345448static int
54355449unicode_fill_utf8 (PyObject * unicode )
54365450{
5451+ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED (unicode );
54375452 /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
54385453 assert (!PyUnicode_IS_ASCII (unicode ));
54395454
@@ -5475,10 +5490,10 @@ unicode_fill_utf8(PyObject *unicode)
54755490 PyErr_NoMemory ();
54765491 return -1 ;
54775492 }
5478- PyUnicode_SET_UTF8 (unicode , cache );
5479- PyUnicode_SET_UTF8_LENGTH (unicode , len );
54805493 memcpy (cache , start , len );
54815494 cache [len ] = '\0' ;
5495+ PyUnicode_SET_UTF8_LENGTH (unicode , len );
5496+ PyUnicode_SET_UTF8 (unicode , cache );
54825497 _PyBytesWriter_Dealloc (& writer );
54835498 return 0 ;
54845499}
0 commit comments