Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,6 @@
# include <intrin.h> // __readgsqword()
#endif

// Suppress known warnings in Python header files.
#if defined(_MSC_VER)
// Warning that alignas behaviour has changed. Doesn't affect us, because we
// never relied on the old behaviour.
#pragma warning(push)
#pragma warning(disable: 5274)
#endif

// Include Python header files
#include "pyport.h"
#include "pymacro.h"
Expand Down Expand Up @@ -146,9 +138,4 @@
#include "cpython/pyfpe.h"
#include "cpython/tracemalloc.h"

// Restore warning filter
#ifdef _MSC_VER
#pragma warning(pop)
#endif

#endif /* !Py_PYTHON_H */
120 changes: 59 additions & 61 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {

/* --- Unicode Type ------------------------------------------------------- */

struct _PyUnicodeObject_state {
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
#ifdef Py_GIL_DISABLED
// Needs to be accessed atomically, so can't be a bit field.
unsigned char interned;
#else
unsigned int interned:2;
#endif
/* Character size:

- PyUnicode_1BYTE_KIND (1):

* character type = Py_UCS1 (8 bits, unsigned)
* all characters are in the range U+0000-U+00FF (latin1)
* if ascii is set, all characters are in the range U+0000-U+007F
(ASCII), otherwise at least one character is in the range
U+0080-U+00FF

- PyUnicode_2BYTE_KIND (2):

* character type = Py_UCS2 (16 bits, unsigned)
* all characters are in the range U+0000-U+FFFF (BMP)
* at least one character is in the range U+0100-U+FFFF

- PyUnicode_4BYTE_KIND (4):

* character type = Py_UCS4 (32 bits, unsigned)
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
buffer. */
unsigned int compact:1;
/* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */
unsigned int ascii:1;
/* The object is statically allocated. */
unsigned int statically_allocated:1;
#ifndef Py_GIL_DISABLED
/* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue gh-63736 on m68k) */
unsigned int :24;
#endif
};

/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
structure. state.ascii and state.compact are set, and the data
immediately follow the structure. utf8_length can be found
Expand Down Expand Up @@ -99,67 +156,8 @@ typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
#ifdef Py_GIL_DISABLED
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
In the non-free-threaded build, we'll use explicit padding instead */
_Py_ALIGN_AS(4)
#endif
struct {
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
#ifdef Py_GIL_DISABLED
// Needs to be accessed atomically, so can't be a bit field.
unsigned char interned;
#else
unsigned int interned:2;
#endif
/* Character size:

- PyUnicode_1BYTE_KIND (1):

* character type = Py_UCS1 (8 bits, unsigned)
* all characters are in the range U+0000-U+00FF (latin1)
* if ascii is set, all characters are in the range U+0000-U+007F
(ASCII), otherwise at least one character is in the range
U+0080-U+00FF

- PyUnicode_2BYTE_KIND (2):

* character type = Py_UCS2 (16 bits, unsigned)
* all characters are in the range U+0000-U+FFFF (BMP)
* at least one character is in the range U+0100-U+FFFF

- PyUnicode_4BYTE_KIND (4):

* character type = Py_UCS4 (32 bits, unsigned)
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
buffer. */
unsigned int compact:1;
/* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */
unsigned int ascii:1;
/* The object is statically allocated. */
unsigned int statically_allocated:1;
#ifndef Py_GIL_DISABLED
/* Padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue gh-63736 on m68k) */
unsigned int :24;
#endif
} state;
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
_Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
} PyASCIIObject;

/* Non-ASCII strings allocated through PyUnicode_New use the
Expand Down
3 changes: 2 additions & 1 deletion Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,11 @@ struct atexit_state {
typedef struct {
// Tagged pointer to next object in the list.
// 0 means the object is not tracked
uintptr_t _gc_next;
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;

// Tagged pointer to previous object in the list.
// Lowest two bits are used for flags documented later.
// Those bits are made available by the struct's minimum alignment.
uintptr_t _gc_prev;
} PyGC_Head;

Expand Down
9 changes: 8 additions & 1 deletion Include/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ whose size is determined when the object is allocated.
#define PyObject_VAR_HEAD PyVarObject ob_base;
#define Py_INVALID_SIZE (Py_ssize_t)-1

/* PyObjects are given a minimum alignment so that the least significant bits
* of an object pointer become available for other purposes.
* This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
*/
#define _PyObject_MIN_ALIGNMENT 4

/* Nothing is actually declared to be a PyObject, but every pointer to
* a Python object can be cast to a PyObject*. This is inheritance built
* by hand. Similarly every pointer to a variable-size Python object can,
Expand Down Expand Up @@ -136,6 +142,7 @@ struct _object {
#else
Py_ssize_t ob_refcnt;
#endif
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
};
#ifdef _MSC_VER
__pragma(warning(pop))
Expand All @@ -153,7 +160,7 @@ struct _object {
// ob_tid stores the thread id (or zero). It is also used by the GC and the
// trashcan mechanism as a linked list pointer and by the GC to store the
// computed "gc_refs" refcount.
uintptr_t ob_tid;
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
uint16_t ob_flags;
PyMutex ob_mutex; // per-object lock
uint8_t ob_gc_bits; // gc-related state
Expand Down
82 changes: 52 additions & 30 deletions Include/pymacro.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,44 +24,66 @@
#endif


// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
// We currently use alignas for free-threaded builds only; additional compat
// checking would be great before we add it to the default build.
// Standards/compiler support:
// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
//
// `N`: the desired minimum alignment, an integer literal, number of bytes
// `T`: the type of the defined variable
// (or a type with at least the defined variable's alignment)
//
// May not be used on a struct definition.
//
// Standards/compiler support for `alignas` alternatives:
// - `alignas` is a keyword in C23 and C++11.
// - `_Alignas` is a keyword in C11
// - GCC & clang has __attribute__((aligned))
// (use that for older standards in pedantic mode)
// - MSVC has __declspec(align)
// - `_Alignas` is common C compiler extension
// Older compilers may name it differently; to allow compilation on such
// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
// Older compilers may name `alignas` differently; to allow compilation on such
// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
// defined. Note that defining it wrong (including defining it to nothing) will
// cause ABI incompatibilities.
#ifdef Py_GIL_DISABLED
# ifndef _Py_ALIGN_AS
# ifdef __cplusplus
# if __cplusplus >= 201103L
# define _Py_ALIGN_AS(V) alignas(V)
# elif defined(__GNUC__) || defined(__clang__)
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
# elif defined(_MSC_VER)
# define _Py_ALIGN_AS(V) __declspec(align(V))
# else
# define _Py_ALIGN_AS(V) alignas(V)
# endif
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define _Py_ALIGN_AS(V) alignas(V)
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
# define _Py_ALIGN_AS(V) _Alignas(V)
# elif (defined(__GNUC__) || defined(__clang__))
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
# elif defined(_MSC_VER)
# define _Py_ALIGN_AS(V) __declspec(align(V))
# else
# define _Py_ALIGN_AS(V) _Alignas(V)
# endif
# endif
//
// Behavior of `alignas` alternatives:
// - `alignas` & `_Alignas`:
// - Can be used multiple times; the greatest alignment applies.
// - It is an *error* if the combined effect of all `alignas` modifiers would
// decrease the alignment.
// - Takes types or numbers.
// - May not be used on a struct definition, unless also defining a variable.
// - `__declspec(align)`:
// - Has no effect if it would decrease alignment.
// - Only takes an integer literal.
// - May be used on struct or variable definitions.
// However, when defining both the struct and the variable at once,
// `declspec(aligned)` causes compiler warning 5274 and possible ABI
// incompatibility.
// - ` __attribute__((aligned))`:
// - Has no effect if it would decrease alignment.
// - Takes types or numbers
// - May be used on struct or variable definitions.
#ifndef _Py_ALIGNED_DEF
# ifdef __cplusplus
# if __cplusplus >= 201103L
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
# elif defined(__GNUC__) || defined(__clang__)
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
# elif defined(_MSC_VER)
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
# else
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
# endif
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
# elif (defined(__GNUC__) || defined(__clang__))
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
# elif defined(_MSC_VER)
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
# else
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
# endif
#endif

/* Minimum value between x and y */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix crash when building on Linux/m68k.
Loading