Skip to content

Commit b6db481

Browse files
committed
Store and reuse interned unicode object
1 parent 650d0c8 commit b6db481

File tree

6 files changed

+115
-30
lines changed

6 files changed

+115
-30
lines changed

graalpython/com.oracle.graal.python.cext/src/capi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ typedef struct {
347347
BUILTIN(PyTruffleUnicode_Decode, PyObject*, PyObject*, const char*, const char*) \
348348
BUILTIN(PyTruffleUnicode_DecodeUTF8Stateful, PyObject*, void*, const char*, int) \
349349
BUILTIN(PyTruffleUnicode_FromUCS, PyObject*, void*, Py_ssize_t, int) \
350-
BUILTIN(PyTruffleUnicode_InternInPlace, PyObject*, PyObject*) \
350+
BUILTIN(PyTruffleUnicode_LookupAndIntern, PyObject*, PyObject*, PyObject*) \
351351
BUILTIN(PyTruffleUnicode_New, PyObject*, void*, Py_ssize_t, Py_UCS4) \
352352
BUILTIN(PyTruffle_Arg_ParseTupleAndKeywords, int, PyObject*, PyObject*, const char*, void*, void*) \
353353
BUILTIN(PyTruffle_ByteArray_EmptyWithCapacity, PyObject*, Py_ssize_t) \

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,58 @@ void PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) {
222222
Py_XDECREF(right);
223223
}
224224

225-
void PyUnicode_InternInPlace(PyObject **s) {
226-
PyObject *t = GraalPyTruffleUnicode_InternInPlace(*s);
227-
if (t != *s) {
228-
Py_INCREF(t);
229-
Py_SETREF(*s, t);
230-
}
225+
static PyObject *interned = NULL;
226+
227+
void PyUnicode_InternInPlace(PyObject **p) {
228+
PyObject *s = *p;
229+
if (s == NULL || !PyUnicode_Check(s)) {
230+
return;
231+
}
232+
233+
/* If it's a subclass, we don't really know what putting
234+
it in the interned dict might do. */
235+
if (!PyUnicode_CheckExact(s)) {
236+
return;
237+
}
238+
239+
// will be checked by `GraalPyTruffleUnicode_LookupAndIntern` upcall
240+
// if (PyUnicode_CHECK_INTERNED(s)) {
241+
// return;
242+
// }
243+
244+
// if (PyUnicode_READY(s) == -1) {
245+
// PyErr_Clear();
246+
// return;
247+
// }
248+
249+
if (interned == NULL) {
250+
interned = PyDict_New();
251+
if (interned == NULL) {
252+
PyErr_Clear(); /* Don't leave an exception */
253+
return;
254+
}
255+
}
256+
257+
// PyObject *t = PyDict_SetDefault(interned, s, s);
258+
PyObject *t = GraalPyTruffleUnicode_LookupAndIntern(interned, s);
259+
if (t == NULL) {
260+
PyErr_Clear();
261+
return;
262+
}
263+
264+
if (t != s) {
265+
Py_INCREF(t);
266+
Py_SETREF(*p, t);
267+
return;
268+
}
269+
270+
/* The two references in interned dict (key and value) are not counted by
271+
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
272+
this. */
273+
// has been set already by `GraalPyTruffleUnicode_LookupAndIntern` upcall
274+
// Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
275+
// _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
276+
231277
}
232278

233279
// taken from CPython "Python/Objects/unicodeobject.c"

graalpython/com.oracle.graal.python.jni/src/capi_forwards.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ void unimplemented(const char* name) {
764764
#undef PyTruffleUnicode_Decode
765765
#undef PyTruffleUnicode_DecodeUTF8Stateful
766766
#undef PyTruffleUnicode_FromUCS
767-
#undef PyTruffleUnicode_InternInPlace
767+
#undef PyTruffleUnicode_LookupAndIntern
768768
#undef PyTruffleUnicode_New
769769
#undef PyTruffle_Arg_ParseTupleAndKeywords
770770
#undef PyTruffle_ByteArray_EmptyWithCapacity

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
import re
4141
import sys
4242

43-
from . import CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars
43+
from . import CPyExtType, CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars
4444

4545
__dir__ = __file__.rpartition("/")[0]
4646

@@ -895,3 +895,28 @@ def compile_module(self, name):
895895
arguments=["PyObject* string", "PyObject* sep", "Py_ssize_t maxsplit"],
896896
cmpfunc=unhandled_error_compare
897897
)
898+
899+
class TestUnicodeObject(object):
900+
def test_intern(self):
901+
TestIntern = CPyExtType("TestIntern",
902+
'''
903+
static PyObject* set_intern_str(PyObject* self, PyObject* str) {
904+
PyUnicode_InternInPlace(&str);
905+
((TestInternObject*)self)->str = str;
906+
return str;
907+
}
908+
909+
static PyObject* check_is_same_str_ptr(PyObject* self, PyObject* str) {
910+
PyUnicode_InternInPlace(&str);
911+
return str == ((TestInternObject*)self)->str ? Py_True : Py_False;
912+
}
913+
''',
914+
cmembers="PyObject *str;",
915+
tp_methods='''{"set_intern_str", (PyCFunction)set_intern_str, METH_O, ""},
916+
{"check_is_same_str_ptr", (PyCFunction)check_is_same_str_ptr, METH_O, ""}
917+
'''
918+
)
919+
tester = TestIntern()
920+
s = 'some text'
921+
assert tester.set_intern_str(s) == s
922+
assert tester.check_is_same_str_ptr(s)

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltinRegistry.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ private PythonCextBuiltinRegistry() {
303303
public static final CApiBuiltinExecutable PyTruffleUnicode_Decode = new CApiBuiltinExecutable("PyTruffleUnicode_Decode", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.PyObject, ArgDescriptor.ConstCharPtrAsTruffleString, ArgDescriptor.ConstCharPtrAsTruffleString}, 242);
304304
public static final CApiBuiltinExecutable PyTruffleUnicode_DecodeUTF8Stateful = new CApiBuiltinExecutable("PyTruffleUnicode_DecodeUTF8Stateful", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Pointer, ArgDescriptor.ConstCharPtrAsTruffleString, ArgDescriptor.Int}, 243);
305305
public static final CApiBuiltinExecutable PyTruffleUnicode_FromUCS = new CApiBuiltinExecutable("PyTruffleUnicode_FromUCS", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Pointer, ArgDescriptor.Py_ssize_t, ArgDescriptor.Int}, 244);
306-
public static final CApiBuiltinExecutable PyTruffleUnicode_InternInPlace = new CApiBuiltinExecutable("PyTruffleUnicode_InternInPlace", CApiCallPath.Ignored, ArgDescriptor.PyObjectBorrowed, new ArgDescriptor[]{ArgDescriptor.PyObject}, 245);
306+
public static final CApiBuiltinExecutable PyTruffleUnicode_LookupAndIntern = new CApiBuiltinExecutable("PyTruffleUnicode_LookupAndIntern", CApiCallPath.Ignored, ArgDescriptor.PyObject, new ArgDescriptor[]{ArgDescriptor.PyObject, ArgDescriptor.PyObject}, 245);
307307
public static final CApiBuiltinExecutable PyTruffleUnicode_New = new CApiBuiltinExecutable("PyTruffleUnicode_New", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Pointer, ArgDescriptor.Py_ssize_t, ArgDescriptor.PY_UCS4}, 246);
308308
public static final CApiBuiltinExecutable PyTruffle_Arg_ParseTupleAndKeywords = new CApiBuiltinExecutable("PyTruffle_Arg_ParseTupleAndKeywords", CApiCallPath.Ignored, ArgDescriptor.Int, new ArgDescriptor[]{ArgDescriptor.PyObject, ArgDescriptor.PyObject, ArgDescriptor.ConstCharPtrAsTruffleString, ArgDescriptor.Pointer, ArgDescriptor.Pointer}, 247);
309309
public static final CApiBuiltinExecutable PyTruffle_ByteArray_EmptyWithCapacity = new CApiBuiltinExecutable("PyTruffle_ByteArray_EmptyWithCapacity", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Py_ssize_t}, 248);
@@ -849,7 +849,7 @@ private PythonCextBuiltinRegistry() {
849849
PyTruffleUnicode_Decode,
850850
PyTruffleUnicode_DecodeUTF8Stateful,
851851
PyTruffleUnicode_FromUCS,
852-
PyTruffleUnicode_InternInPlace,
852+
PyTruffleUnicode_LookupAndIntern,
853853
PyTruffleUnicode_New,
854854
PyTruffle_Arg_ParseTupleAndKeywords,
855855
PyTruffle_ByteArray_EmptyWithCapacity,
@@ -1643,7 +1643,7 @@ static CApiBuiltinNode createBuiltinNode(int id) {
16431643
case 244:
16441644
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_FromUCSNodeGen.create();
16451645
case 245:
1646-
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_InternInPlaceNodeGen.create();
1646+
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_LookupAndInternNodeGen.create();
16471647
case 246:
16481648
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_NewNodeGen.create();
16491649
case 247:

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PY_UNICODE_PTR;
5656
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Pointer;
5757
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyObject;
58-
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyObjectBorrowed;
5958
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyObjectTransfer;
6059
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Py_ssize_t;
6160
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.SIZE_T;
@@ -103,12 +102,14 @@
103102
import com.oracle.graal.python.builtins.objects.cext.capi.CExtNodes.UnicodeFromFormatNode;
104103
import com.oracle.graal.python.builtins.objects.cext.capi.PySequenceArrayWrapper;
105104
import com.oracle.graal.python.builtins.objects.cext.capi.UnicodeObjectNodes.UnicodeAsWideCharNode;
106-
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor;
107105
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes;
108106
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.Charsets;
109107
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.EncodeNativeStringNode;
110108
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.GetByteArrayNode;
111109
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.UnicodeFromWcharNode;
110+
import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageGetItem;
111+
import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageSetItem;
112+
import com.oracle.graal.python.builtins.objects.dict.PDict;
112113
import com.oracle.graal.python.builtins.objects.ints.PInt;
113114
import com.oracle.graal.python.builtins.objects.memoryview.PMemoryView;
114115
import com.oracle.graal.python.builtins.objects.str.NativeCharSequence;
@@ -130,6 +131,7 @@
130131
import com.oracle.graal.python.nodes.PGuards;
131132
import com.oracle.graal.python.nodes.PRaiseNode;
132133
import com.oracle.graal.python.nodes.StringLiterals;
134+
import com.oracle.graal.python.nodes.attributes.ReadAttributeFromDynamicObjectNode;
133135
import com.oracle.graal.python.nodes.call.CallNode;
134136
import com.oracle.graal.python.nodes.classes.IsSubtypeNode;
135137
import com.oracle.graal.python.nodes.object.GetClassNode;
@@ -278,26 +280,38 @@ protected boolean isStringSubtype(Object obj, GetClassNode getClassNode, IsSubty
278280
}
279281
}
280282

281-
@CApiBuiltin(ret = PyObjectBorrowed, args = {ArgDescriptor.PyObject}, call = Ignored)
282-
abstract static class PyTruffleUnicode_InternInPlace extends CApiUnaryBuiltinNode {
283-
@Specialization(guards = {"!isTruffleString(obj)", "isStringSubtype(obj, getClassNode, isSubtypeNode)"})
284-
Object intern(Object obj,
285-
@Cached InternNode internNode,
286-
@SuppressWarnings("unused") @Cached GetClassNode getClassNode,
287-
@SuppressWarnings("unused") @Cached IsSubtypeNode isSubtypeNode) {
288-
return internNode.execute(null, obj);
283+
@CApiBuiltin(ret = PyObject, args = {PyObject, PyObject}, call = Ignored)
284+
abstract static class PyTruffleUnicode_LookupAndIntern extends CApiBinaryBuiltinNode {
285+
@Specialization
286+
Object withTS(PDict dict, TruffleString str,
287+
@Shared @Cached InternNode internNode,
288+
@Shared @Cached HashingStorageGetItem getItem,
289+
@Shared @Cached HashingStorageSetItem setItem) {
290+
Object interned = getItem.execute(dict.getDictStorage(), str);
291+
if (interned == null) {
292+
interned = internNode.execute(null, str);
293+
dict.setDictStorage(setItem.execute(dict.getDictStorage(), str, interned));
294+
}
295+
return interned;
289296
}
290297

291-
@Specialization(guards = {"!isTruffleString(obj)", "!isStringSubtype(obj, getClassNode, isSubtypeNode)"})
292-
Object intern(@SuppressWarnings("unused") Object obj,
293-
@SuppressWarnings("unused") @Cached GetClassNode getClassNode,
294-
@SuppressWarnings("unused") @Cached IsSubtypeNode isSubtypeNode) {
295-
assert false;
296-
return PNone.NONE;
298+
@Specialization
299+
Object withPString(PDict dict, PString str,
300+
@Cached ReadAttributeFromDynamicObjectNode readNode,
301+
@Shared @Cached InternNode internNode,
302+
@Shared @Cached HashingStorageGetItem getItem,
303+
@Shared @Cached HashingStorageSetItem setItem) {
304+
boolean isInterned = readNode.execute(str, PString.INTERNED) != PNone.NO_VALUE;
305+
if (isInterned) {
306+
return str;
307+
}
308+
return withTS(dict, str.getValueUncached(), internNode, getItem, setItem);
297309
}
298310

299-
protected boolean isStringSubtype(Object obj, GetClassNode getClassNode, IsSubtypeNode isSubtypeNode) {
300-
return isSubtypeNode.execute(getClassNode.execute(obj), PythonBuiltinClassType.PString);
311+
@Fallback
312+
Object nil(@SuppressWarnings("unused") Object dict,
313+
@SuppressWarnings("unused") Object obj) {
314+
return PNone.NONE;
301315
}
302316
}
303317

0 commit comments

Comments
 (0)