Skip to content

Commit 28551d5

Browse files
committed
gh-128972: Add _Py_ALIGN_AS and revert PyASCIIObject memory layout.
Add `_Py_ALIGN_AS` as per C API WG vote: capi-workgroup/decisions#61 This patch only adds it to free-threaded builds; the `#ifdef Py_GIL_DISABLED` can be removed in the future. Use this to revert `PyASCIIObject` memory layout for non-free-threaded builds. The long-term plan is to deprecate the entire struct; until that happens it's better to keep it unchanged, as courtesy to people that rely on it despite it not being stable ABI.
1 parent 5bf0f36 commit 28551d5

File tree

4 files changed

+65
-13
lines changed

4 files changed

+65
-13
lines changed

Include/cpython/unicodeobject.h

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ typedef struct {
9999
PyObject_HEAD
100100
Py_ssize_t length; /* Number of code points in the string */
101101
Py_hash_t hash; /* Hash value; -1 if not set */
102+
#ifdef Py_GIL_DISABLED
103+
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
104+
In the non-free-threaded build, we'll use explicit padding instead */
105+
_Py_ALIGN_AS(4)
106+
#endif
102107
struct {
103108
/* If interned is non-zero, the two references from the
104109
dictionary to this object are *not* counted in ob_refcnt.
@@ -109,7 +114,12 @@ typedef struct {
109114
3: Interned, Immortal, and Static
110115
This categorization allows the runtime to determine the right
111116
cleanup mechanism at runtime shutdown. */
112-
uint16_t interned;
117+
#ifdef Py_GIL_DISABLED
118+
// Needs to be accessed atomically, so can't be a bit field.
119+
unsigned char interned;
120+
#else
121+
unsigned int interned:2;
122+
#endif
113123
/* Character size:
114124
115125
- PyUnicode_1BYTE_KIND (1):
@@ -132,23 +142,23 @@ typedef struct {
132142
* all characters are in the range U+0000-U+10FFFF
133143
* at least one character is in the range U+10000-U+10FFFF
134144
*/
135-
unsigned short kind:3;
145+
unsigned int kind:3;
136146
/* Compact is with respect to the allocation scheme. Compact unicode
137147
objects only require one memory block while non-compact objects use
138148
one block for the PyUnicodeObject struct and another for its data
139149
buffer. */
140-
unsigned short compact:1;
150+
unsigned int compact:1;
141151
/* The string only contains characters in the range U+0000-U+007F (ASCII)
142152
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
143153
set, use the PyASCIIObject structure. */
144-
unsigned short ascii:1;
154+
unsigned int ascii:1;
145155
/* The object is statically allocated. */
146-
unsigned short statically_allocated:1;
156+
unsigned int statically_allocated:1;
157+
#ifndef Py_GIL_DISABLED
147158
/* Padding to ensure that PyUnicode_DATA() is always aligned to
148-
4 bytes (see issue #19537 on m68k) and we use unsigned short to avoid
149-
the extra four bytes on 32-bit Windows. This is restricted features
150-
for specific compilers including GCC, MSVC, Clang and IBM's XL compiler. */
151-
unsigned short :10;
159+
4 bytes (see issue gh-63736 on m68k) */
160+
unsigned int :24;
161+
#endif
152162
} state;
153163
} PyASCIIObject;
154164

@@ -198,7 +208,7 @@ typedef struct {
198208
/* Use only if you know it's a string */
199209
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
200210
#ifdef Py_GIL_DISABLED
201-
return _Py_atomic_load_uint16_relaxed(&_PyASCIIObject_CAST(op)->state.interned);
211+
return _Py_atomic_load_uint8_relaxed(&_PyASCIIObject_CAST(op)->state.interned);
202212
#else
203213
return _PyASCIIObject_CAST(op)->state.interned;
204214
#endif

Include/pymacro.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,45 @@
2323
# define static_assert _Static_assert
2424
#endif
2525

26+
27+
// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
28+
// We currently use alignas for free-threaded builds only; additional compat
29+
// checking would be great before we add it to the default build.
30+
// Standards/compiler support:
31+
// - `alignas` is a keyword in C23 and C++11.
32+
// - `_Alignas` is a keyword in C11
33+
// - GCC & clang has __attribute__((aligned))
34+
// (use that for older standards in pedantic mode)
35+
// - MSVC has __declspec(align)
36+
// - `_Alignas` is common C compiler extension
37+
// Older compilers may name it differently; to allow compilation on such
38+
// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
39+
// defined. Note that defining it wrong (including defining it to nothing) will
40+
// cause ABI incompatibilities.
41+
#ifdef Py_GIL_DISABLED
42+
# ifndef _Py_ALIGN_AS
43+
# ifdef __cplusplus
44+
# if (__cplusplus < 201103L) \
45+
&& (defined(__GNUC__) || defined(__clang__))
46+
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
47+
# else
48+
# define _Py_ALIGN_AS(V) alignas(V)
49+
# endif
50+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
51+
# define _Py_ALIGN_AS(V) alignas(V)
52+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
53+
# define _Py_ALIGN_AS(V) _Alignas(V)
54+
# elif (defined(__GNUC__) || defined(__clang__)) \
55+
&& defined(__STDC_VERSION__) && __STDC_VERSION__ < 201112L
56+
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
57+
# elif defined(_MSC_VER)
58+
# define _Py_ALIGN_AS(V) __declspec(align(V))
59+
# else
60+
# define _Py_ALIGN_AS(V) _Alignas(V)
61+
# endif
62+
# endif
63+
#endif
64+
2665
/* Minimum value between x and y */
2766
#define Py_MIN(x, y) (((x) > (y)) ? (y) : (x))
2867

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
For non-free-threaded builds, the memory layout of :c:struct:`PyASCIIObject`
2+
is reverted to match Python 3.13. (Note that the structure is not part of
3+
stable ABI and so its memory layout is *guaranteed* to remain stable.)

Objects/unicodeobject.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15904,7 +15904,7 @@ immortalize_interned(PyObject *s)
1590415904
_Py_DecRefTotal(_PyThreadState_GET());
1590515905
}
1590615906
#endif
15907-
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
15907+
FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
1590815908
_Py_SetImmortal(s);
1590915909
}
1591015910

@@ -16022,7 +16022,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1602216022
Py_DECREF(s);
1602316023
Py_DECREF(s);
1602416024
}
16025-
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
16025+
FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
1602616026

1602716027
/* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */
1602816028

@@ -16158,7 +16158,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1615816158
Py_UNREACHABLE();
1615916159
}
1616016160
if (!shared) {
16161-
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED);
16161+
FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED);
1616216162
}
1616316163
}
1616416164
#ifdef INTERNED_STATS

0 commit comments

Comments
 (0)