diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 287de52b96202c..cea69dd1280999 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -240,6 +240,8 @@ enum PyUnicode_Kind { PyUnicode_4BYTE_KIND = 4 }; +PyAPI_FUNC(int) PyUnicode_KIND(PyObject *op); + // PyUnicode_KIND(): Return one of the PyUnicode_*_KIND values defined above. // // gh-89653: Converting this macro to a static inline function would introduce @@ -264,13 +266,15 @@ static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) { return data; } -static inline void* PyUnicode_DATA(PyObject *op) { +PyAPI_FUNC(void*) PyUnicode_DATA(PyObject *op); + +static inline void* _PyUnicode_DATA(PyObject *op) { if (PyUnicode_IS_COMPACT(op)) { return _PyUnicode_COMPACT_DATA(op); } return _PyUnicode_NONCOMPACT_DATA(op); } -#define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op)) +#define PyUnicode_DATA(op) _PyUnicode_DATA(_PyObject_CAST(op)) /* Return pointers to the canonical representation cast to unsigned char, Py_UCS2, or Py_UCS4 for direct character access. diff --git a/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst b/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst new file mode 100644 index 00000000000000..7ff225a7dc60c7 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst @@ -0,0 +1,3 @@ +Implement :c:func:`PyUnicode_KIND` and :c:func:`PyUnicode_DATA` as function, +in addition to the macros with the same names. The macros rely on C bit +fields which have compiler-specific layout. Patch by Victor Stinner. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d9952f764bb178..c6f13f60ad741f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16486,3 +16486,24 @@ PyInit__string(void) { return PyModuleDef_Init(&_string_module); } + + +#undef PyUnicode_KIND +int PyUnicode_KIND(PyObject *op) +{ + if (!PyUnicode_Check(op)) { + PyErr_Format(PyExc_TypeError, "expect str, got %T", op); + return -1; + } + return _PyASCIIObject_CAST(op)->state.kind; +} + +#undef PyUnicode_DATA +void* PyUnicode_DATA(PyObject *op) +{ + if (!PyUnicode_Check(op)) { + PyErr_Format(PyExc_TypeError, "expect str, got %T", op); + return NULL; + } + return _PyUnicode_DATA(op); +}