@@ -111,47 +111,80 @@ NOTE: In the interpreter's initialization phase, some globals are currently
111111# define _PyUnicode_CHECK (op ) PyUnicode_Check(op)
112112#endif
113113
114- #define _PyUnicode_UTF8 (op ) \
115- (_PyCompactUnicodeObject_CAST(op)->utf8)
116- #define PyUnicode_UTF8 (op ) \
117- (assert(_PyUnicode_CHECK(op)), \
118- PyUnicode_IS_COMPACT_ASCII(op) ? \
119- ((char*)(_PyASCIIObject_CAST(op) + 1)) : \
120- _PyUnicode_UTF8(op))
121- #define _PyUnicode_UTF8_LENGTH (op ) \
122- (_PyCompactUnicodeObject_CAST(op)->utf8_length)
123- #define PyUnicode_UTF8_LENGTH (op ) \
124- (assert(_PyUnicode_CHECK(op)), \
125- PyUnicode_IS_COMPACT_ASCII(op) ? \
126- _PyASCIIObject_CAST(op)->length : \
127- _PyUnicode_UTF8_LENGTH(op))
114+ static inline char * _PyUnicode_UTF8 (PyObject * op )
115+ {
116+ return FT_ATOMIC_LOAD_PTR_ACQUIRE (_PyCompactUnicodeObject_CAST (op )-> utf8 );
117+ }
118+
119+ static inline char * PyUnicode_UTF8 (PyObject * op )
120+ {
121+ assert (_PyUnicode_CHECK (op ));
122+ if (PyUnicode_IS_COMPACT_ASCII (op )) {
123+ return ((char * )(_PyASCIIObject_CAST (op ) + 1 ));
124+ }
125+ else {
126+ return _PyUnicode_UTF8 (op );
127+ }
128+ }
129+
130+ static inline void PyUnicode_SET_UTF8 (PyObject * op , char * utf8 )
131+ {
132+ FT_ATOMIC_STORE_PTR_RELEASE (_PyCompactUnicodeObject_CAST (op )-> utf8 , utf8 );
133+ }
134+
135+ static inline Py_ssize_t PyUnicode_UTF8_LENGTH (PyObject * op )
136+ {
137+ assert (_PyUnicode_CHECK (op ));
138+ if (PyUnicode_IS_COMPACT_ASCII (op )) {
139+ return _PyASCIIObject_CAST (op )-> length ;
140+ }
141+ else {
142+ return _PyCompactUnicodeObject_CAST (op )-> utf8_length ;
143+ }
144+ }
145+
146+ static inline void PyUnicode_SET_UTF8_LENGTH (PyObject * op , Py_ssize_t length )
147+ {
148+ _PyCompactUnicodeObject_CAST (op )-> utf8_length = length ;
149+ }
128150
129151#define _PyUnicode_LENGTH (op ) \
130152 (_PyASCIIObject_CAST(op)->length)
131153#define _PyUnicode_STATE (op ) \
132154 (_PyASCIIObject_CAST(op)->state)
133155#define _PyUnicode_HASH (op ) \
134156 (_PyASCIIObject_CAST(op)->hash)
135- #define _PyUnicode_KIND (op ) \
136- (assert(_PyUnicode_CHECK(op)), \
137- _PyASCIIObject_CAST(op)->state.kind)
138- #define _PyUnicode_GET_LENGTH (op ) \
139- (assert(_PyUnicode_CHECK(op)), \
140- _PyASCIIObject_CAST(op)->length)
157+
158+ static inline Py_hash_t PyUnicode_HASH (PyObject * op )
159+ {
160+ assert (_PyUnicode_CHECK (op ));
161+ return FT_ATOMIC_LOAD_SSIZE_RELAXED (_PyASCIIObject_CAST (op )-> hash );
162+ }
163+
164+ static inline void PyUnicode_SET_HASH (PyObject * op , Py_hash_t hash )
165+ {
166+ FT_ATOMIC_STORE_SSIZE_RELAXED (_PyASCIIObject_CAST (op )-> hash , hash );
167+ }
168+
141169#define _PyUnicode_DATA_ANY (op ) \
142170 (_PyUnicodeObject_CAST(op)->data.any)
143171
144- #define _PyUnicode_SHARE_UTF8 (op ) \
145- (assert(_PyUnicode_CHECK(op)), \
146- assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
147- (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
172+ static inline int _PyUnicode_SHARE_UTF8 (PyObject * op )
173+ {
174+ assert (_PyUnicode_CHECK (op ));
175+ assert (!PyUnicode_IS_COMPACT_ASCII (op ));
176+ return (_PyUnicode_UTF8 (op ) == PyUnicode_DATA (op ));
177+ }
148178
149179/* true if the Unicode object has an allocated UTF-8 memory block
150180 (not shared with other data) */
151- #define _PyUnicode_HAS_UTF8_MEMORY (op ) \
152- ((!PyUnicode_IS_COMPACT_ASCII(op) \
153- && _PyUnicode_UTF8(op) \
154- && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
181+ static inline int _PyUnicode_HAS_UTF8_MEMORY (PyObject * op )
182+ {
183+ return (!PyUnicode_IS_COMPACT_ASCII (op )
184+ && _PyUnicode_UTF8 (op ) != NULL
185+ && _PyUnicode_UTF8 (op ) != PyUnicode_DATA (op ));
186+ }
187+
155188
156189/* Generic helper macro to convert characters of different types.
157190 from_type and to_type have to be valid type names, begin and end
@@ -650,7 +683,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
650683 || kind == PyUnicode_2BYTE_KIND
651684 || kind == PyUnicode_4BYTE_KIND );
652685 CHECK (ascii -> state .ascii == 0 );
653- CHECK (compact -> utf8 != data );
686+ CHECK (_PyUnicode_UTF8 ( op ) != data );
654687 }
655688 else {
656689 PyUnicodeObject * unicode = _PyUnicodeObject_CAST (op );
@@ -662,16 +695,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
662695 CHECK (ascii -> state .compact == 0 );
663696 CHECK (data != NULL );
664697 if (ascii -> state .ascii ) {
665- CHECK (compact -> utf8 == data );
698+ CHECK (_PyUnicode_UTF8 ( op ) == data );
666699 CHECK (compact -> utf8_length == ascii -> length );
667700 }
668701 else {
669- CHECK (compact -> utf8 != data );
702+ CHECK (_PyUnicode_UTF8 ( op ) != data );
670703 }
671704 }
672-
673- if (compact -> utf8 == NULL )
705+ #ifndef Py_GIL_DISABLED
706+ if (_PyUnicode_UTF8 ( op ) == NULL )
674707 CHECK (compact -> utf8_length == 0 );
708+ #endif
675709 }
676710
677711 /* check that the best kind is used: O(n) operation */
@@ -1115,8 +1149,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
11151149
11161150 if (_PyUnicode_HAS_UTF8_MEMORY (unicode )) {
11171151 PyMem_Free (_PyUnicode_UTF8 (unicode ));
1118- _PyUnicode_UTF8 (unicode ) = NULL ;
1119- _PyUnicode_UTF8_LENGTH (unicode ) = 0 ;
1152+ PyUnicode_SET_UTF8_LENGTH (unicode , 0 ) ;
1153+ PyUnicode_SET_UTF8 (unicode , NULL ) ;
11201154 }
11211155#ifdef Py_TRACE_REFS
11221156 _Py_ForgetReference (unicode );
@@ -1169,8 +1203,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
11691203 if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY (unicode ))
11701204 {
11711205 PyMem_Free (_PyUnicode_UTF8 (unicode ));
1172- _PyUnicode_UTF8 (unicode ) = NULL ;
1173- _PyUnicode_UTF8_LENGTH (unicode ) = 0 ;
1206+ PyUnicode_SET_UTF8_LENGTH (unicode , 0 ) ;
1207+ PyUnicode_SET_UTF8 (unicode , NULL ) ;
11741208 }
11751209
11761210 data = (PyObject * )PyObject_Realloc (data , new_size );
@@ -1180,8 +1214,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
11801214 }
11811215 _PyUnicode_DATA_ANY (unicode ) = data ;
11821216 if (share_utf8 ) {
1183- _PyUnicode_UTF8 (unicode ) = data ;
1184- _PyUnicode_UTF8_LENGTH (unicode ) = length ;
1217+ PyUnicode_SET_UTF8_LENGTH (unicode , length ) ;
1218+ PyUnicode_SET_UTF8 (unicode , data ) ;
11851219 }
11861220 _PyUnicode_LENGTH (unicode ) = length ;
11871221 PyUnicode_WRITE (PyUnicode_KIND (unicode ), data , length , 0 );
@@ -1411,12 +1445,12 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
14111445
14121446 assert (unicode != NULL );
14131447 assert (_PyUnicode_CHECK (unicode ));
1414- assert (_PyUnicode_KIND (unicode ) == PyUnicode_4BYTE_KIND );
1448+ assert (PyUnicode_KIND (unicode ) == PyUnicode_4BYTE_KIND );
14151449 ucs4_out = PyUnicode_4BYTE_DATA (unicode );
14161450
14171451 for (iter = begin ; iter < end ; ) {
14181452 assert (ucs4_out < (PyUnicode_4BYTE_DATA (unicode ) +
1419- _PyUnicode_GET_LENGTH (unicode )));
1453+ PyUnicode_GET_LENGTH (unicode )));
14201454 if (Py_UNICODE_IS_HIGH_SURROGATE (iter [0 ])
14211455 && (iter + 1 ) < end
14221456 && Py_UNICODE_IS_LOW_SURROGATE (iter [1 ]))
@@ -1430,7 +1464,7 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
14301464 }
14311465 }
14321466 assert (ucs4_out == (PyUnicode_4BYTE_DATA (unicode ) +
1433- _PyUnicode_GET_LENGTH (unicode )));
1467+ PyUnicode_GET_LENGTH (unicode )));
14341468
14351469}
14361470#endif
@@ -1801,7 +1835,7 @@ unicode_modifiable(PyObject *unicode)
18011835 assert (_PyUnicode_CHECK (unicode ));
18021836 if (Py_REFCNT (unicode ) != 1 )
18031837 return 0 ;
1804- if (FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( unicode ) ) != -1 )
1838+ if (PyUnicode_HASH ( unicode ) != -1 )
18051839 return 0 ;
18061840 if (PyUnicode_CHECK_INTERNED (unicode ))
18071841 return 0 ;
@@ -4052,6 +4086,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
40524086
40534087static int unicode_fill_utf8 (PyObject * unicode );
40544088
4089+
4090+ static int
4091+ unicode_ensure_utf8 (PyObject * unicode )
4092+ {
4093+ int err = 0 ;
4094+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4095+ Py_BEGIN_CRITICAL_SECTION (unicode );
4096+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4097+ err = unicode_fill_utf8 (unicode );
4098+ }
4099+ Py_END_CRITICAL_SECTION ();
4100+ }
4101+ return err ;
4102+ }
4103+
40554104const char *
40564105PyUnicode_AsUTF8AndSize (PyObject * unicode , Py_ssize_t * psize )
40574106{
@@ -4063,13 +4112,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
40634112 return NULL ;
40644113 }
40654114
4066- if (PyUnicode_UTF8 (unicode ) == NULL ) {
4067- if (unicode_fill_utf8 (unicode ) == -1 ) {
4068- if (psize ) {
4069- * psize = -1 ;
4070- }
4071- return NULL ;
4115+ if (unicode_ensure_utf8 (unicode ) == -1 ) {
4116+ if (psize ) {
4117+ * psize = -1 ;
40724118 }
4119+ return NULL ;
40734120 }
40744121
40754122 if (psize ) {
@@ -5401,6 +5448,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
54015448static int
54025449unicode_fill_utf8 (PyObject * unicode )
54035450{
5451+ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED (unicode );
54045452 /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
54055453 assert (!PyUnicode_IS_ASCII (unicode ));
54065454
@@ -5442,10 +5490,10 @@ unicode_fill_utf8(PyObject *unicode)
54425490 PyErr_NoMemory ();
54435491 return -1 ;
54445492 }
5445- _PyUnicode_UTF8 (unicode ) = cache ;
5446- _PyUnicode_UTF8_LENGTH (unicode ) = len ;
54475493 memcpy (cache , start , len );
54485494 cache [len ] = '\0' ;
5495+ PyUnicode_SET_UTF8_LENGTH (unicode , len );
5496+ PyUnicode_SET_UTF8 (unicode , cache );
54495497 _PyBytesWriter_Dealloc (& writer );
54505498 return 0 ;
54515499}
@@ -10996,9 +11044,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1099611044 return 0 ;
1099711045 }
1099811046
10999- Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( right_uni ) );
11047+ Py_hash_t right_hash = PyUnicode_HASH ( right_uni );
1100011048 assert (right_hash != -1 );
11001- Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( left ) );
11049+ Py_hash_t hash = PyUnicode_HASH ( left );
1100211050 if (hash != -1 && hash != right_hash ) {
1100311051 return 0 ;
1100411052 }
@@ -11484,14 +11532,14 @@ unicode_hash(PyObject *self)
1148411532#ifdef Py_DEBUG
1148511533 assert (_Py_HashSecret_Initialized );
1148611534#endif
11487- Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( self ) );
11535+ Py_hash_t hash = PyUnicode_HASH ( self );
1148811536 if (hash != -1 ) {
1148911537 return hash ;
1149011538 }
1149111539 x = _Py_HashBytes (PyUnicode_DATA (self ),
1149211540 PyUnicode_GET_LENGTH (self ) * PyUnicode_KIND (self ));
1149311541
11494- FT_ATOMIC_STORE_SSIZE_RELAXED ( _PyUnicode_HASH ( self ) , x );
11542+ PyUnicode_SET_HASH ( self , x );
1149511543 return x ;
1149611544}
1149711545
@@ -14888,8 +14936,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1488814936 _PyUnicode_STATE (self ).compact = 0 ;
1488914937 _PyUnicode_STATE (self ).ascii = _PyUnicode_STATE (unicode ).ascii ;
1489014938 _PyUnicode_STATE (self ).statically_allocated = 0 ;
14891- _PyUnicode_UTF8_LENGTH (self ) = 0 ;
14892- _PyUnicode_UTF8 (self ) = NULL ;
14939+ PyUnicode_SET_UTF8_LENGTH (self , 0 ) ;
14940+ PyUnicode_SET_UTF8 (self , NULL ) ;
1489314941 _PyUnicode_DATA_ANY (self ) = NULL ;
1489414942
1489514943 share_utf8 = 0 ;
@@ -14919,8 +14967,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1491914967
1492014968 _PyUnicode_DATA_ANY (self ) = data ;
1492114969 if (share_utf8 ) {
14922- _PyUnicode_UTF8_LENGTH (self ) = length ;
14923- _PyUnicode_UTF8 (self ) = data ;
14970+ PyUnicode_SET_UTF8_LENGTH (self , length ) ;
14971+ PyUnicode_SET_UTF8 (self , data ) ;
1492414972 }
1492514973
1492614974 memcpy (data , PyUnicode_DATA (unicode ), kind * (length + 1 ));
0 commit comments