File tree Expand file tree Collapse file tree 1 file changed +25
-0
lines changed
Expand file tree Collapse file tree 1 file changed +25
-0
lines changed Original file line number Diff line number Diff line change 3333 #define PY_UNICODE_OBJECT_READY (op ) (PY_ASCII_OBJECT_CAST(op)->state.ready)
3434#endif
3535
36+ static bool containsSurrogatePair (const char16_t *chars, size_t length) {
37+ for (size_t i = 0 ; i < length; i++) {
38+ if (Py_UNICODE_IS_SURROGATE (chars[i])) {
39+ return true ;
40+ }
41+ }
42+ return false ;
43+ }
44+
3645StrType::StrType (PyObject *object) : PyType(object) {}
3746
3847StrType::StrType (char *string) : PyType(Py_BuildValue(" s" , string)) {}
@@ -91,6 +100,18 @@ StrType::StrType(JSContext *cx, JSString *str) {
91100 }
92101 PY_UNICODE_OBJECT_READY (pyObject) = 1 ;
93102 #endif
103+
104+ if (containsSurrogatePair (chars, length)) {
105+ // We must convert to UCS4 here because Python does not support decoding string containing surrogate pairs to bytes
106+ PyObject *ucs4Obj = this ->asUCS4 (); // convert `pyObject` to a new PyUnicodeObject with UCS4 data
107+ if (!ucs4Obj) {
108+ // conversion fails, keep the original `pyObject`
109+ PyErr_Clear ();
110+ return ;
111+ }
112+ Py_DECREF (pyObject); // cleanup the old `pyObject`
113+ pyObject = Py_NewRef (ucs4Obj);
114+ }
94115 }
95116}
96117
@@ -99,6 +120,10 @@ const char *StrType::getValue() const {
99120}
100121
101122PyObject *StrType::asUCS4 () {
123+ if (PyUnicode_KIND (pyObject) != PyUnicode_2BYTE_KIND) {
124+ return Py_NewRef (pyObject);
125+ }
126+
102127 uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2 (pyObject);
103128 size_t length = PY_UNICODE_OBJECT_LENGTH (pyObject);
104129
You can’t perform that action at this time.
0 commit comments