Merge in the main branch, switch to _Py_ALIGNED_DEF

encukou · encukou · commit cad8ca2b8150 · 2025-06-11T14:18:18.000+02:00
diff --git a/Include/Python.h b/Include/Python.h
@@ -59,14 +59,6 @@
 #  include <intrin.h>             // __readgsqword()
 #endif
 
-// Suppress known warnings in Python header files.
-#if defined(_MSC_VER)
-// Warning that alignas behaviour has changed. Doesn't affect us, because we
-// never relied on the old behaviour.
-#pragma warning(push)
-#pragma warning(disable: 5274)
-#endif
-
 // Include Python header files
 #include "pyport.h"
 #include "pymacro.h"
@@ -146,9 +138,4 @@
 #include "cpython/pyfpe.h"
 #include "cpython/tracemalloc.h"
 
-// Restore warning filter
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
 #endif /* !Py_PYTHON_H */
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
@@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {
 
 /* --- Unicode Type ------------------------------------------------------- */
 
+struct _PyUnicodeObject_state {
+    /* If interned is non-zero, the two references from the
+       dictionary to this object are *not* counted in ob_refcnt.
+       The possible values here are:
+           0: Not Interned
+           1: Interned
+           2: Interned and Immortal
+           3: Interned, Immortal, and Static
+       This categorization allows the runtime to determine the right
+       cleanup mechanism at runtime shutdown. */
+#ifdef Py_GIL_DISABLED
+    // Needs to be accessed atomically, so can't be a bit field.
+    unsigned char interned;
+#else
+    unsigned int interned:2;
+#endif
+    /* Character size:
+
+       - PyUnicode_1BYTE_KIND (1):
+
+         * character type = Py_UCS1 (8 bits, unsigned)
+         * all characters are in the range U+0000-U+00FF (latin1)
+         * if ascii is set, all characters are in the range U+0000-U+007F
+         (ASCII), otherwise at least one character is in the range
+         U+0080-U+00FF
+
+       - PyUnicode_2BYTE_KIND (2):
+
+         * character type = Py_UCS2 (16 bits, unsigned)
+         * all characters are in the range U+0000-U+FFFF (BMP)
+         * at least one character is in the range U+0100-U+FFFF
+
+       - PyUnicode_4BYTE_KIND (4):
+
+         * character type = Py_UCS4 (32 bits, unsigned)
+         * all characters are in the range U+0000-U+10FFFF
+         * at least one character is in the range U+10000-U+10FFFF
+       */
+    unsigned int kind:3;
+    /* Compact is with respect to the allocation scheme. Compact unicode
+       objects only require one memory block while non-compact objects use
+       one block for the PyUnicodeObject struct and another for its data
+       buffer. */
+    unsigned int compact:1;
+    /* The string only contains characters in the range U+0000-U+007F (ASCII)
+       and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
+       set, use the PyASCIIObject structure. */
+    unsigned int ascii:1;
+    /* The object is statically allocated. */
+    unsigned int statically_allocated:1;
+#ifndef Py_GIL_DISABLED
+    /* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
+       4 bytes (see issue gh-63736 on m68k) */
+    unsigned int :24;
+#endif
+};
+
 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
    structure. state.ascii and state.compact are set, and the data
    immediately follow the structure. utf8_length can be found
@@ -99,67 +156,8 @@ typedef struct {
     PyObject_HEAD
     Py_ssize_t length;          /* Number of code points in the string */
     Py_hash_t hash;             /* Hash value; -1 if not set */
-#ifdef Py_GIL_DISABLED
-    /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
-       In the non-free-threaded build, we'll use explicit padding instead */
-   _Py_ALIGN_AS(4)
-#endif
-    struct {
-        /* If interned is non-zero, the two references from the
-           dictionary to this object are *not* counted in ob_refcnt.
-           The possible values here are:
-               0: Not Interned
-               1: Interned
-               2: Interned and Immortal
-               3: Interned, Immortal, and Static
-           This categorization allows the runtime to determine the right
-           cleanup mechanism at runtime shutdown. */
-#ifdef Py_GIL_DISABLED
-        // Needs to be accessed atomically, so can't be a bit field.
-        unsigned char interned;
-#else
-        unsigned int interned:2;
-#endif
-        /* Character size:
-
-           - PyUnicode_1BYTE_KIND (1):
-
-             * character type = Py_UCS1 (8 bits, unsigned)
-             * all characters are in the range U+0000-U+00FF (latin1)
-             * if ascii is set, all characters are in the range U+0000-U+007F
-               (ASCII), otherwise at least one character is in the range
-               U+0080-U+00FF
-
-           - PyUnicode_2BYTE_KIND (2):
-
-             * character type = Py_UCS2 (16 bits, unsigned)
-             * all characters are in the range U+0000-U+FFFF (BMP)
-             * at least one character is in the range U+0100-U+FFFF
-
-           - PyUnicode_4BYTE_KIND (4):
-
-             * character type = Py_UCS4 (32 bits, unsigned)
-             * all characters are in the range U+0000-U+10FFFF
-             * at least one character is in the range U+10000-U+10FFFF
-         */
-        unsigned int kind:3;
-        /* Compact is with respect to the allocation scheme. Compact unicode
-           objects only require one memory block while non-compact objects use
-           one block for the PyUnicodeObject struct and another for its data
-           buffer. */
-        unsigned int compact:1;
-        /* The string only contains characters in the range U+0000-U+007F (ASCII)
-           and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
-           set, use the PyASCIIObject structure. */
-        unsigned int ascii:1;
-        /* The object is statically allocated. */
-        unsigned int statically_allocated:1;
-#ifndef Py_GIL_DISABLED
-        /* Padding to ensure that PyUnicode_DATA() is always aligned to
-           4 bytes (see issue gh-63736 on m68k) */
-        unsigned int :24;
-#endif
-    } state;
+    /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
+   _Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
 } PyASCIIObject;
 
 /* Non-ASCII strings allocated through PyUnicode_New use the
diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
@@ -159,10 +159,11 @@ struct atexit_state {
 typedef struct {
     // Tagged pointer to next object in the list.
     // 0 means the object is not tracked
-    uintptr_t _gc_next;
+    _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;
 
     // Tagged pointer to previous object in the list.
     // Lowest two bits are used for flags documented later.
+    // Those bits are made available by the struct's minimum alignment.
     uintptr_t _gc_prev;
 } PyGC_Head;
 
diff --git a/Include/object.h b/Include/object.h
@@ -101,6 +101,12 @@ whose size is determined when the object is allocated.
 #define PyObject_VAR_HEAD      PyVarObject ob_base;
 #define Py_INVALID_SIZE (Py_ssize_t)-1
 
+/* PyObjects are given a minimum alignment so that the least significant bits
+ * of an object pointer become available for other purposes.
+ * This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
+ */
+#define _PyObject_MIN_ALIGNMENT 4
+
 /* Nothing is actually declared to be a PyObject, but every pointer to
  * a Python object can be cast to a PyObject*.  This is inheritance built
  * by hand.  Similarly every pointer to a variable-size Python object can,
@@ -136,6 +142,7 @@ struct _object {
 #else
         Py_ssize_t ob_refcnt;
 #endif
+        _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
     };
 #ifdef _MSC_VER
     __pragma(warning(pop))
@@ -153,7 +160,7 @@ struct _object {
     // ob_tid stores the thread id (or zero). It is also used by the GC and the
     // trashcan mechanism as a linked list pointer and by the GC to store the
     // computed "gc_refs" refcount.
-    uintptr_t ob_tid;
+    _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
     uint16_t ob_flags;
     PyMutex ob_mutex;           // per-object lock
     uint8_t ob_gc_bits;         // gc-related state
diff --git a/Include/pymacro.h b/Include/pymacro.h
@@ -24,41 +24,65 @@
 #endif
 
 
-// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
-// additional compat checking would be great since we added it to the default
-// build.
-// Standards/compiler support:
+// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
+//
+// `N`: the desired minimum alignment, an integer literal, number of bytes
+// `T`: the type of the defined variable
+//      (or a type with at least the defined variable's alignment)
+//
+// May not be used on a struct definition.
+//
+// Standards/compiler support for `alignas` alternatives:
 // - `alignas` is a keyword in C23 and C++11.
 // - `_Alignas` is a keyword in C11
 // - GCC & clang has __attribute__((aligned))
 //   (use that for older standards in pedantic mode)
 // - MSVC has __declspec(align)
 // - `_Alignas` is common C compiler extension
-// Older compilers may name it differently; to allow compilation on such
-// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
+// Older compilers may name `alignas` differently; to allow compilation on such
+// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
 // defined. Note that defining it wrong (including defining it to nothing) will
 // cause ABI incompatibilities.
-#ifndef _Py_ALIGN_AS
+//
+// Behavior of `alignas` alternatives:
+// - `alignas` & `_Alignas`:
+//   - Can be used multiple times; the greatest alignment applies.
+//   - It is an *error* if the combined effect of all `alignas` modifiers would
+//     decrease the alignment.
+//   - Takes types or numbers.
+//   - May not be used on a struct definition, unless also defining a variable.
+// - `__declspec(align)`:
+//   - Has no effect if it would decrease alignment.
+//   - Only takes an integer literal.
+//   - May be used on struct or variable definitions.
+//     However, when defining both the struct and the variable at once,
+//     `declspec(aligned)` causes compiler warning 5274 and possible ABI
+//     incompatibility.
+// - ` __attribute__((aligned))`:
+//   - Has no effect if it would decrease alignment.
+//   - Takes types or numbers
+//   - May be used on struct or variable definitions.
+#ifndef _Py_ALIGNED_DEF
 #    ifdef __cplusplus
 #        if __cplusplus >= 201103L
-#            define _Py_ALIGN_AS(V) alignas(V)
+#            define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
 #        elif defined(__GNUC__) || defined(__clang__)
-#            define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
+#            define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
 #        elif defined(_MSC_VER)
-#            define _Py_ALIGN_AS(V) __declspec(align(V))
+#            define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
 #        else
-#            define _Py_ALIGN_AS(V) alignas(V)
+#            define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
 #        endif
 #    elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
-#        define _Py_ALIGN_AS(V) alignas(V)
+#        define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
 #    elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#        define _Py_ALIGN_AS(V) _Alignas(V)
+#        define _Py_ALIGNED_DEF(N, T)  _Alignas(N) _Alignas(T) T
 #    elif (defined(__GNUC__) || defined(__clang__))
-#        define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
+#        define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
 #    elif defined(_MSC_VER)
-#        define _Py_ALIGN_AS(V) __declspec(align(V))
+#        define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
 #    else
-#        define _Py_ALIGN_AS(V) _Alignas(V)
+#        define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
 #    endif
 #endif
 
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
@@ -1100,6 +1100,11 @@ def test_large_32b_binunicode8(self):
         self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
                                     dumped)
 
+    def test_large_binstring(self):
+        errmsg = 'BINSTRING pickle has negative byte count'
+        with self.assertRaisesRegex(pickle.UnpicklingError, errmsg):
+            self.loads(b'T\0\0\0\x80')
+
     def test_get(self):
         pickled = b'((lp100000\ng100000\nt.'
         unpickled = self.loads(pickled)
diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py
@@ -14,6 +14,15 @@
 from fractions import Fraction
 from collections import abc, Counter
 
+
+class MyIndex:
+    def __init__(self, value):
+        self.value = value
+
+    def __index__(self):
+        return self.value
+
+
 class TestBasicOps:
     # Superclass with tests common to all generators.
     # Subclasses must arrange for self.gen to retrieve the Random instance
@@ -809,6 +818,9 @@ def test_getrandbits(self):
         self.gen.seed(1234567)
         self.assertEqual(self.gen.getrandbits(100),
                          97904845777343510404718956115)
+        self.gen.seed(1234567)
+        self.assertEqual(self.gen.getrandbits(MyIndex(100)),
+                         97904845777343510404718956115)
 
     def test_getrandbits_2G_bits(self):
         size = 2**31
diff --git a/Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst b/Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst
@@ -0,0 +1 @@
+Fix crash when building on Linux/m68k.
diff --git a/Misc/NEWS.d/next/Library/2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst b/Misc/NEWS.d/next/Library/2025-06-10-00-42-30.gh-issue-135321.UHh9jT.rst
@@ -0,0 +1 @@
+Raise a correct exception for values greater than 0x7fffffff for the ``BINSTRING`` opcode in the C implementation of :mod:`pickle`.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
@@ -5543,17 +5543,16 @@ static int
 load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes)
 {
     PyObject *obj;
-    Py_ssize_t size;
+    long size;
     char *s;
 
     if (_Unpickler_Read(self, st, &s, nbytes) < 0)
         return -1;
 
-    size = calc_binsize(s, nbytes);
+    size = calc_binint(s, nbytes);
     if (size < 0) {
-        PyErr_Format(st->UnpicklingError,
-                     "BINSTRING exceeds system's maximum size of %zd bytes",
-                     PY_SSIZE_T_MAX);
+        PyErr_SetString(st->UnpicklingError,
+                     "BINSTRING pickle has negative byte count");
         return -1;
     }
 
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
@@ -45,8 +45,7 @@ struct arraydescr {
 
 typedef struct {
     Py_ssize_t allocated;
-    _Py_ALIGN_AS(8)
-    char items[];
+    _Py_ALIGNED_DEF(8, char) items[];
 } arraydata;
 
 typedef struct arrayobject {