From 6849db8606e964610587e51346507a54ee5a480e Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Mon, 14 Jul 2025 09:26:01 -0400 Subject: [PATCH 1/5] fix the embedded null character issue SpiderMonkey doesn't store the extra null character while some Python APIs assume the string buffer is null-terminated. The issue hasn't been a problem before because it somehow didn't allocate the buffer that follows, making the string buffer null-terminated effectively. --- src/StrType.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/StrType.cc b/src/StrType.cc index 7df301bd..1c9ac772 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -135,6 +135,12 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { if (JS::LinearStringHasLatin1Chars(lstr)) { // latin1 spidermonkey, latin1 python const JS::Latin1Char *chars = JS::GetLatin1LinearStringChars(nogc, lstr); + if (chars[length] != 0) { // not a null-terminated string + // most Python C APIs assume the string buffer is null-terminated, so we need to create a copy + PyObject *copied = PyUnicode_FromObject(pyString); // create a copy when it's not a true Unicode object + Py_DECREF(pyString); + return copied; + } PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars; PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_1BYTE_KIND; @@ -157,6 +163,11 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { } else { // utf16 spidermonkey, ucs2 python const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr); + if (chars[length] != 0) { // not a null-terminated string + PyObject *copied = PyUnicode_FromObject(pyString); + Py_DECREF(pyString); + return copied; + } PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars; PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_2BYTE_KIND; From d3ea2ecbbbfbd4b1c8171e734afb45332f89cdec Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Fri, 18 Jul 2025 11:29:39 -0400 Subject: [PATCH 2/5] fix(string): short path to fix the `embedded null character` issue with Python 3.13+ --- src/StrType.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/StrType.cc b/src/StrType.cc index 1c9ac772..32323fba 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -135,9 +135,11 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { if (JS::LinearStringHasLatin1Chars(lstr)) { // latin1 spidermonkey, latin1 python const JS::Latin1Char *chars = JS::GetLatin1LinearStringChars(nogc, lstr); - if (chars[length] != 0) { // not a null-terminated string - // most Python C APIs assume the string buffer is null-terminated, so we need to create a copy - PyObject *copied = PyUnicode_FromObject(pyString); // create a copy when it's not a true Unicode object + if (Py_Version >= 0x030d0000) { // Python version is greater than 3.13 + // Short path to temporarily fix the issue with Python 3.13+ compact unicode representation. + // It would error with `ValueError: embedded null character`, which is caused by the fact that + // most Python C APIs assume the string buffer is null-terminated, so we need to create a copy. + PyObject *copied = PyUnicode_FromObject((PyObject *)pyString); // create a copy when it's not a true Unicode object Py_DECREF(pyString); return copied; } @@ -163,8 +165,8 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { } else { // utf16 spidermonkey, ucs2 python const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr); - if (chars[length] != 0) { // not a null-terminated string - PyObject *copied = PyUnicode_FromObject(pyString); + if (Py_Version >= 0x030d0000) { // Python 3.13+, see above + PyObject *copied = PyUnicode_FromObject((PyObject *)pyString); // create a copy when it's not a true Unicode object Py_DECREF(pyString); return copied; } From bf049a7fed7c462b623738bb18907e3264af0d22 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Fri, 18 Jul 2025 11:40:53 -0400 Subject: [PATCH 3/5] fix(string): `Py_Version` is only available on Python 3.11+ --- src/StrType.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StrType.cc b/src/StrType.cc index 32323fba..0c384d51 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -135,7 +135,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { if (JS::LinearStringHasLatin1Chars(lstr)) { // latin1 spidermonkey, latin1 python const JS::Latin1Char *chars = JS::GetLatin1LinearStringChars(nogc, lstr); - if (Py_Version >= 0x030d0000) { // Python version is greater than 3.13 + if ((PY_VERSION_HEX) >= 0x030d0000) { // Python version is greater than 3.13 // Short path to temporarily fix the issue with Python 3.13+ compact unicode representation. // It would error with `ValueError: embedded null character`, which is caused by the fact that // most Python C APIs assume the string buffer is null-terminated, so we need to create a copy. @@ -165,7 +165,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { } else { // utf16 spidermonkey, ucs2 python const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr); - if (Py_Version >= 0x030d0000) { // Python 3.13+, see above + if ((PY_VERSION_HEX) >= 0x030d0000) { // Python 3.13+, see above PyObject *copied = PyUnicode_FromObject((PyObject *)pyString); // create a copy when it's not a true Unicode object Py_DECREF(pyString); return copied; From adb18fbce3dda1f54f70b84082cbd2be811ee029 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 19 Jul 2025 07:38:11 -0400 Subject: [PATCH 4/5] fix(string): short path to convert strings --- src/StrType.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StrType.cc b/src/StrType.cc index 0c384d51..18ecce09 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -139,7 +139,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { // Short path to temporarily fix the issue with Python 3.13+ compact unicode representation. // It would error with `ValueError: embedded null character`, which is caused by the fact that // most Python C APIs assume the string buffer is null-terminated, so we need to create a copy. - PyObject *copied = PyUnicode_FromObject((PyObject *)pyString); // create a copy when it's not a true Unicode object + PyObject *copied = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, chars, length); Py_DECREF(pyString); return copied; } @@ -166,7 +166,7 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { else { // utf16 spidermonkey, ucs2 python const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr); if ((PY_VERSION_HEX) >= 0x030d0000) { // Python 3.13+, see above - PyObject *copied = PyUnicode_FromObject((PyObject *)pyString); // create a copy when it's not a true Unicode object + PyObject *copied = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, chars, length); Py_DECREF(pyString); return copied; } From e3410eddd0e92a13ec0408d20e5a18c77851405b Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 19 Jul 2025 09:34:05 -0400 Subject: [PATCH 5/5] fix(string): fix utf16 strings that contain surrogate pairs --- src/StrType.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/StrType.cc b/src/StrType.cc index 18ecce09..a5c65788 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -165,11 +165,6 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { } else { // utf16 spidermonkey, ucs2 python const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr); - if ((PY_VERSION_HEX) >= 0x030d0000) { // Python 3.13+, see above - PyObject *copied = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, chars, length); - Py_DECREF(pyString); - return copied; - } PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars; PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_2BYTE_KIND; @@ -202,6 +197,11 @@ PyObject *StrType::proxifyString(JSContext *cx, JS::HandleValue strVal) { Py_DECREF(pyString); return ucs4Obj; } + if ((PY_VERSION_HEX) >= 0x030d0000) { // Python 3.13+, fix `ValueError: embedded null character` + PyObject *copied = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, chars, length); // create a copy of the string buffer + Py_DECREF(pyString); + return copied; + } } return (PyObject *)pyString;