Skip to content

Commit 9eea86a

Browse files
authored
Merge pull request #353 from Distributive-Network/caleb/fix/rootedstrings
JSString Improvements
2 parents f7f1a1b + 84f00c4 commit 9eea86a

File tree

13 files changed

+152
-80
lines changed

13 files changed

+152
-80
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ else ifeq ($(BUILD),Debug)
2727
PYTHON_BUILD_ENV += BUILD_TYPE=Debug
2828
else ifeq ($(BUILD),DRelease)
2929
PYTHON_BUILD_ENV += BUILD_TYPE=DRelease
30-
else ifeq($(BUILD), None)
30+
else ifeq ($(BUILD), None)
3131
PYTHON_BUILD_ENV += BUILD_TYPE=None
3232
else # Release build
3333
PYTHON_BUILD_ENV += BUILD_TYPE=Release

include/JSStringProxy.hh

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,23 @@
2121
*/
2222
typedef struct {
2323
PyUnicodeObject str;
24-
JS::PersistentRootedValue jsString;
24+
JS::PersistentRootedValue *jsString;
2525
} JSStringProxy;
2626

27+
/**
28+
* @brief This struct is a bundle of methods used by the JSStringProxy type
29+
*
30+
*/
31+
struct JSStringProxyMethodDefinitions {
32+
public:
33+
/**
34+
* @brief Deallocation method (.tp_dealloc), removes the reference to the underlying JSString before freeing the JSStringProxy
35+
*
36+
* @param self - The JSStringProxy to be free'd
37+
*/
38+
static void JSStringProxy_dealloc(JSStringProxy *self);
39+
};
40+
2741
/**
2842
* @brief Struct for the JSStringProxyType, used by all JSStringProxy objects
2943
*/

include/StrType.hh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ public:
3434
*
3535
* @returns PyObject* pointer to the resulting PyObject
3636
*/
37-
static PyObject *getPyObject(JSContext *cx, JSString *str);
37+
static PyObject *getPyObject(JSContext *cx, JS::HandleValue str);
3838

39-
static const char *getValue(JSContext *cx, JSString *str);
39+
static const char *getValue(JSContext *cx, JS::HandleValue str);
4040
};
4141

4242
#endif

include/jsTypeFactory.hh

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,26 @@
1616
#include <Python.h>
1717

1818

19-
struct PythonExternalString;
19+
struct PythonExternalString : public JSExternalStringCallbacks {
20+
public:
21+
/**
22+
* @brief Get the PyObject using the given char buffer
23+
*
24+
* @param chars - the char buffer of the PyObject
25+
* @return PyObject* - the PyObject string
26+
*/
27+
static PyObject *getPyString(const char16_t *chars);
28+
29+
/**
30+
* @brief decrefs the underlying PyObject string when the JSString is finalized
31+
*
32+
* @param chars - The char buffer of the string
33+
*/
34+
void finalize(char16_t *chars) const override;
35+
36+
size_t sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const override;
37+
};
38+
extern PythonExternalString PythonExternalStringCallbacks;
2039

2140
/**
2241
* @brief Function that makes a UTF16-encoded copy of a UCS4 string

src/ExceptionType.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ JSObject *ExceptionType::toJsError(JSContext *cx, PyObject *exceptionValue, PyOb
107107
if (stackObj.get()) {
108108
JS::RootedString stackStr(cx);
109109
JS::BuildStackString(cx, nullptr, stackObj, &stackStr, 2, js::StackFormat::SpiderMonkey);
110-
stackStream << "\nJS Stack Trace:\n" << StrType::getValue(cx, stackStr);
110+
JS::RootedValue stackStrVal(cx, JS::StringValue(stackStr));
111+
stackStream << "\nJS Stack Trace:\n" << StrType::getValue(cx, stackStrVal);
111112
}
112113

113114

src/JSObjectIterProxy.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,11 @@ PyObject *JSObjectIterProxyMethodDefinitions::JSObjectIterProxy_nextkey(JSObject
7878
ret = key;
7979
}
8080

81+
Py_INCREF(ret);
8182
if (self->it.kind != KIND_KEYS) {
8283
Py_DECREF(value);
8384
}
8485

85-
Py_INCREF(ret);
8686
return ret;
8787
}
8888
} else {
@@ -108,11 +108,11 @@ PyObject *JSObjectIterProxyMethodDefinitions::JSObjectIterProxy_nextkey(JSObject
108108
ret = key;
109109
}
110110

111+
Py_INCREF(ret);
111112
if (self->it.kind != KIND_KEYS) {
112113
Py_DECREF(value);
113114
}
114115

115-
Py_INCREF(ret);
116116
return ret;
117117
}
118118
}

src/JSStringProxy.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/**
2+
* @file JSStringProxy.cc
3+
* @author Caleb Aikens ([email protected])
4+
* @brief JSStringProxy is a custom C-implemented python type that derives from str. It acts as a proxy for JSStrings from Spidermonkey, and behaves like a str would.
5+
* @date 2024-05-15
6+
*
7+
* @copyright Copyright (c) 2024 Distributive Corp.
8+
*
9+
*/
10+
11+
#include "include/JSStringProxy.hh"
12+
13+
void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self)
14+
{
15+
delete self->jsString;
16+
}

src/StrType.cc

Lines changed: 58 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "include/StrType.hh"
1212
#include "include/JSStringProxy.hh"
13+
#include "include/jsTypeFactory.hh"
1314

1415
#include <jsapi.h>
1516
#include <js/String.h>
@@ -58,15 +59,15 @@ static bool containsSurrogatePair(const char16_t *chars, size_t length) {
5859
* @return PyObject* - the UCS4-encoding of the pyObject string
5960
*
6061
*/
61-
static PyObject *asUCS4(PyObject *pyObject) {
62-
if (PyUnicode_KIND(pyObject) != PyUnicode_2BYTE_KIND) {
62+
static PyObject *asUCS4(PyObject *pyString) {
63+
if (PyUnicode_KIND(pyString) != PyUnicode_2BYTE_KIND) {
6364
// return a new reference to match the behaviour of `PyUnicode_FromKindAndData`
64-
Py_INCREF(pyObject);
65-
return pyObject;
65+
Py_INCREF(pyString);
66+
return pyString;
6667
}
6768

68-
uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2(pyObject);
69-
size_t length = PY_UNICODE_OBJECT_LENGTH(pyObject);
69+
uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2(pyString);
70+
size_t length = PY_UNICODE_OBJECT_LENGTH(pyString);
7071

7172
uint32_t *ucs4String = new uint32_t[length];
7273
size_t ucs4Length = 0;
@@ -94,45 +95,51 @@ static PyObject *asUCS4(PyObject *pyObject) {
9495
return ret;
9596
}
9697

97-
static PyObject *processString(JSContext *cx, JSString *str) {
98+
static PyObject *processString(JSContext *cx, JS::HandleValue strVal) {
99+
JS::RootedString str(cx, strVal.toString());
98100
JSLinearString *lstr = JS_EnsureLinearString(cx, str);
99101
JS::AutoCheckCannotGC nogc;
100102
PyObject *p;
101103

102104
size_t length = JS::GetLinearStringLength(lstr);
103105

104-
PyObject *pyObject = (PyObject *)PyObject_New(JSStringProxy, &JSStringProxyType); // new reference
105-
Py_INCREF(pyObject);
106+
JSStringProxy *pyString = PyObject_New(JSStringProxy, &JSStringProxyType); // new reference
106107

107-
((JSStringProxy *)pyObject)->jsString.setString((JSString *)lstr);
108+
if (pyString == NULL) {
109+
return NULL;
110+
}
111+
112+
JS::RootedObject obj(cx);
113+
pyString->jsString = new JS::PersistentRootedValue(cx);
114+
pyString->jsString->setString((JSString *)lstr);
108115

109116
// Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93)
110117
// see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245
111-
PY_UNICODE_OBJECT_HASH(pyObject) = -1;
112-
PY_UNICODE_OBJECT_STATE(pyObject).interned = 0;
113-
PY_UNICODE_OBJECT_STATE(pyObject).compact = 0;
114-
PY_UNICODE_OBJECT_STATE(pyObject).ascii = 0;
115-
PY_UNICODE_OBJECT_UTF8(pyObject) = NULL;
116-
PY_UNICODE_OBJECT_UTF8_LENGTH(pyObject) = 0;
118+
PY_UNICODE_OBJECT_HASH(pyString) = -1;
119+
PY_UNICODE_OBJECT_STATE(pyString).interned = 0;
120+
PY_UNICODE_OBJECT_STATE(pyString).compact = 0;
121+
PY_UNICODE_OBJECT_STATE(pyString).ascii = 0;
122+
PY_UNICODE_OBJECT_UTF8(pyString) = NULL;
123+
PY_UNICODE_OBJECT_UTF8_LENGTH(pyString) = 0;
117124

118125
if (JS::LinearStringHasLatin1Chars(lstr)) { // latin1 spidermonkey, latin1 python
119126
const JS::Latin1Char *chars = JS::GetLatin1LinearStringChars(nogc, lstr);
120127

121-
PY_UNICODE_OBJECT_DATA_ANY(pyObject) = (void *)chars;
122-
PY_UNICODE_OBJECT_KIND(pyObject) = PyUnicode_1BYTE_KIND;
123-
PY_UNICODE_OBJECT_LENGTH(pyObject) = length;
128+
PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars;
129+
PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_1BYTE_KIND;
130+
PY_UNICODE_OBJECT_LENGTH(pyString) = length;
124131
#if PY_UNICODE_HAS_WSTR
125-
PY_UNICODE_OBJECT_WSTR(pyObject) = NULL;
126-
PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = 0;
127-
PY_UNICODE_OBJECT_READY(pyObject) = 1;
132+
PY_UNICODE_OBJECT_WSTR(pyString) = NULL;
133+
PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = 0;
134+
PY_UNICODE_OBJECT_READY(pyString) = 1;
128135
#endif
129136
}
130137
else { // utf16 spidermonkey, ucs2 python
131138
const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr);
132139

133-
PY_UNICODE_OBJECT_DATA_ANY(pyObject) = (void *)chars;
134-
PY_UNICODE_OBJECT_KIND(pyObject) = PyUnicode_2BYTE_KIND;
135-
PY_UNICODE_OBJECT_LENGTH(pyObject) = length;
140+
PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars;
141+
PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_2BYTE_KIND;
142+
PY_UNICODE_OBJECT_LENGTH(pyString) = length;
136143

137144
#if PY_UNICODE_HAS_WSTR
138145
// python unicode objects take advantage of a possible performance gain on systems where
@@ -141,38 +148,49 @@ static PyObject *processString(JSContext *cx, JSString *str) {
141148
// On systems where sizeof(wchar_t) == 4, i.e. Unixy systems, a similar performance gain happens if the
142149
// string is using UCS4 encoding [this is automatically handled by asUCS4()]
143150
if (sizeof(wchar_t) == 2) {
144-
PY_UNICODE_OBJECT_WSTR(pyObject) = (wchar_t *)chars;
145-
PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = length;
151+
PY_UNICODE_OBJECT_WSTR(pyString) = (wchar_t *)chars;
152+
PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = length;
146153
}
147154
else {
148-
PY_UNICODE_OBJECT_WSTR(pyObject) = NULL;
149-
PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = 0;
155+
PY_UNICODE_OBJECT_WSTR(pyString) = NULL;
156+
PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = 0;
150157
}
151-
PY_UNICODE_OBJECT_READY(pyObject) = 1;
158+
PY_UNICODE_OBJECT_READY(pyString) = 1;
152159
#endif
153160

154161
if (containsSurrogatePair(chars, length)) {
155162
// We must convert to UCS4 here because Python does not support decoding string containing surrogate pairs to bytes
156-
PyObject *ucs4Obj = asUCS4(pyObject); // convert to a new PyUnicodeObject with UCS4 data
163+
PyObject *ucs4Obj = asUCS4((PyObject *)pyString); // convert to a new PyUnicodeObject with UCS4 data
157164
if (!ucs4Obj) {
158-
// conversion fails, keep the original `pyObject`
159-
return pyObject;
165+
// conversion fails, keep the original `pyString`
166+
return (PyObject *)pyString;
160167
}
161-
Py_DECREF(pyObject);
168+
Py_DECREF(pyString);
162169
return ucs4Obj;
163170
}
164171
}
165172

166-
return pyObject;
173+
return (PyObject *)pyString;
167174
}
168175

169-
PyObject *StrType::getPyObject(JSContext *cx, JSString *str) {
176+
PyObject *StrType::getPyObject(JSContext *cx, JS::HandleValue str) {
177+
const PythonExternalString *callbacks;
178+
const char16_t *chars;
179+
180+
if (JS::IsExternalString(str.toString(), (const JSExternalStringCallbacks **)&callbacks, &chars)) {
181+
if (callbacks == &PythonExternalStringCallbacks) {
182+
PyObject *pyString = callbacks->getPyString(chars);
183+
Py_INCREF(pyString);
184+
return pyString;
185+
}
186+
}
187+
170188
return processString(cx, str);
171189
}
172190

173-
const char *StrType::getValue(JSContext *cx, JSString *str) {
174-
PyObject *pyObject = processString(cx, str);
175-
const char *value = PyUnicode_AsUTF8(pyObject);
176-
Py_DECREF(pyObject);
191+
const char *StrType::getValue(JSContext *cx, JS::HandleValue str) {
192+
PyObject *pyString = processString(cx, str);
193+
const char *value = PyUnicode_AsUTF8(pyString);
194+
Py_DECREF(pyString);
177195
return value;
178196
}

src/jsTypeFactory.cc

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,24 +49,30 @@ static PyObjectProxyHandler pyObjectProxyHandler;
4949
static PyListProxyHandler pyListProxyHandler;
5050
static PyIterableProxyHandler pyIterableProxyHandler;
5151

52-
std::unordered_map<char16_t *, PyObject *> charToPyObjectMap; // a map of char16_t buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
53-
54-
struct PythonExternalString : public JSExternalStringCallbacks {
55-
void finalize(char16_t *chars) const override {
56-
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
57-
// Then, when shutting down, there is only on reference left, and we don't need
58-
// to free the object since the entire process memory is being released.
59-
PyObject *object = charToPyObjectMap[chars];
60-
if (Py_REFCNT(object) > 1) {
61-
Py_DECREF(object);
62-
}
63-
}
64-
size_t sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const override {
65-
return 0;
52+
std::unordered_map<const char16_t *, PyObject *> charToPyObjectMap; // a map of char16_t buffers to their corresponding PyObjects, used when finalizing JSExternalStrings
53+
54+
PyObject *PythonExternalString::getPyString(const char16_t *chars)
55+
{
56+
return charToPyObjectMap[chars];
57+
}
58+
59+
void PythonExternalString::finalize(char16_t *chars) const
60+
{
61+
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
62+
// Then, when shutting down, there is only on reference left, and we don't need
63+
// to free the object since the entire process memory is being released.
64+
PyObject *object = charToPyObjectMap[chars];
65+
if (Py_REFCNT(object) > 1) {
66+
Py_DECREF(object);
6667
}
67-
};
68+
}
69+
70+
size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const
71+
{
72+
return PyUnicode_GetLength(charToPyObjectMap[chars]);
73+
}
6874

69-
static constexpr PythonExternalString PythonExternalStringCallbacks;
75+
PythonExternalString PythonExternalStringCallbacks = {};
7076

7177
size_t UCS4ToUTF16(const uint32_t *chars, size_t length, uint16_t **outStr) {
7278
uint16_t *utf16String = (uint16_t *)malloc(sizeof(uint16_t) * length*2);
@@ -112,7 +118,7 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) {
112118
returnType.setNumber(PyFloat_AsDouble(object));
113119
}
114120
else if (PyObject_TypeCheck(object, &JSStringProxyType)) {
115-
returnType.setString(((JSStringProxy *)object)->jsString.toString());
121+
returnType.setString(((JSStringProxy *)object)->jsString->toString());
116122
}
117123
else if (PyUnicode_Check(object)) {
118124
switch (PyUnicode_KIND(object)) {

src/modules/pythonmonkey/pythonmonkey.cc

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ PyTypeObject JSObjectProxyType = {
128128
PyTypeObject JSStringProxyType = {
129129
.tp_name = PyUnicode_Type.tp_name,
130130
.tp_basicsize = sizeof(JSStringProxy),
131+
.tp_dealloc = (destructor)JSStringProxyMethodDefinitions::JSStringProxy_dealloc,
131132
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_UNICODE_SUBCLASS,
132133
.tp_doc = PyDoc_STR("Javascript String value"),
133134
.tp_base = &PyUnicode_Type
@@ -419,7 +420,7 @@ static PyObject *eval(PyObject *self, PyObject *args) {
419420

420421
// compile the code to execute
421422
JS::RootedScript script(GLOBAL_CX);
422-
JS::Rooted<JS::Value> *rval = new JS::Rooted<JS::Value>(GLOBAL_CX);
423+
JS::Rooted<JS::Value> rval(GLOBAL_CX);
423424
if (code) {
424425
JS::SourceText<mozilla::Utf8Unit> source;
425426
const char *codeChars = PyUnicode_AsUTF8(code);
@@ -440,27 +441,17 @@ static PyObject *eval(PyObject *self, PyObject *args) {
440441
}
441442

442443
// execute the compiled code; last expr goes to rval
443-
if (!JS_ExecuteScript(GLOBAL_CX, script, rval)) {
444+
if (!JS_ExecuteScript(GLOBAL_CX, script, &rval)) {
444445
setSpiderMonkeyException(GLOBAL_CX);
445446
return NULL;
446447
}
447448

448449
// translate to the proper python type
449-
PyObject *returnValue = pyTypeFactory(GLOBAL_CX, *rval);
450+
PyObject *returnValue = pyTypeFactory(GLOBAL_CX, rval);
450451
if (PyErr_Occurred()) {
451452
return NULL;
452453
}
453454

454-
// TODO: Find a way to root strings for the lifetime of a proxying python string
455-
js::ESClass cls = js::ESClass::Other; // placeholder if `rval` is not a JSObject
456-
if (rval->isObject()) {
457-
JS::GetBuiltinClass(GLOBAL_CX, JS::RootedObject(GLOBAL_CX, &rval->toObject()), &cls);
458-
}
459-
460-
if (!(rval->isString() || cls == js::ESClass::String)) { // rval may be a string which must be kept alive.
461-
delete rval;
462-
}
463-
464455
if (returnValue) {
465456
return returnValue;
466457
}

0 commit comments

Comments
 (0)