From 3279b86cecf1d798b708f6bc8350bbc82365d91e Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 9 Feb 2024 11:42:24 +0000 Subject: [PATCH 1/2] gh-89188: add `PyUnicode_Data` and `PyUnicode_GetKind` --- Include/cpython/unicodeobject.h | 6 ++++++ Objects/unicodeobject.c | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index d9b54bce83202d..df45819102db1e 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -266,6 +266,12 @@ static inline void* PyUnicode_DATA(PyObject *op) { } #define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op)) +/* Symbol to reexport PyUnicode_DATA without needing to read the contents + of the structure directly. +*/ +PyAPI_FUNC(void *) PyUnicode_Data(PyObject *op); +PyAPI_FUNC(int) PyUnicode_GetKind(PyObject *op); + /* Return pointers to the canonical representation cast to unsigned char, Py_UCS2, or Py_UCS4 for direct character access. No checks are performed, use PyUnicode_KIND() before to ensure diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0a569a950e88e2..d431b0de65a5b0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3863,6 +3863,22 @@ _PyUnicode_AsUTF8NoNUL(PyObject *unicode) return s; } +void* PyUnicode_Data(PyObject *unicode) { + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + return PyUnicode_DATA(unicode); +} + +int PyUnicode_GetKind(PyObject *unicode) { + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return -1; + } + return PyUnicode_KIND(unicode); +} + /* PyUnicode_GetSize() has been deprecated since Python 3.3 because it returned length of Py_UNICODE. From 0ed9e4d349f269f6a8fdd028911e7a2faa696676 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 9 Feb 2024 11:56:06 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/C API/2024-02-09-11-56-02.gh-issue-89188.42npsa.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/C API/2024-02-09-11-56-02.gh-issue-89188.42npsa.rst diff --git a/Misc/NEWS.d/next/C API/2024-02-09-11-56-02.gh-issue-89188.42npsa.rst b/Misc/NEWS.d/next/C API/2024-02-09-11-56-02.gh-issue-89188.42npsa.rst new file mode 100644 index 00000000000000..1bd2427dbd0ac5 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-02-09-11-56-02.gh-issue-89188.42npsa.rst @@ -0,0 +1 @@ +Add ``PyUnicode_Data`` and ``PyUnicode_GetKind`` as alternatives to ``PyUnicode_DATA`` and ``PyUnicode_KIND`` which don't rely on the internal structure of unicode objects.