Skip to content

Commit 8db45d8

Browse files
committed
[GR-47383] Store and reuse interned unicode object
PullRequest: graalpython/2871
2 parents 764d560 + 7e3a990 commit 8db45d8

File tree

7 files changed

+106
-31
lines changed

7 files changed

+106
-31
lines changed

graalpython/com.oracle.graal.python.cext/src/capi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ typedef struct {
347347
BUILTIN(PyTruffleUnicode_Decode, PyObject*, PyObject*, const char*, const char*) \
348348
BUILTIN(PyTruffleUnicode_DecodeUTF8Stateful, PyObject*, void*, const char*, int) \
349349
BUILTIN(PyTruffleUnicode_FromUCS, PyObject*, void*, Py_ssize_t, int) \
350-
BUILTIN(PyTruffleUnicode_InternInPlace, PyObject*, PyObject*) \
350+
BUILTIN(PyTruffleUnicode_LookupAndIntern, PyObject*, PyObject*) \
351351
BUILTIN(PyTruffleUnicode_New, PyObject*, void*, Py_ssize_t, Py_UCS4) \
352352
BUILTIN(PyTruffle_Arg_ParseTupleAndKeywords, int, PyObject*, PyObject*, const char*, void*, void*) \
353353
BUILTIN(PyTruffle_ByteArray_EmptyWithCapacity, PyObject*, Py_ssize_t) \

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,24 @@ void PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) {
222222
Py_XDECREF(right);
223223
}
224224

225-
void PyUnicode_InternInPlace(PyObject **s) {
226-
PyObject *t = GraalPyTruffleUnicode_InternInPlace(*s);
227-
if (t != *s) {
228-
Py_INCREF(t);
229-
Py_SETREF(*s, t);
230-
}
225+
void PyUnicode_InternInPlace(PyObject **p) {
226+
PyObject *s = *p;
227+
if (s == NULL) {
228+
return;
229+
}
230+
231+
// PyObject *t = PyDict_SetDefault(interned, s, s);
232+
PyObject *t = GraalPyTruffleUnicode_LookupAndIntern(s);
233+
if (t == NULL) {
234+
PyErr_Clear();
235+
return;
236+
}
237+
238+
if (t != s) {
239+
Py_INCREF(t);
240+
Py_SETREF(*p, t);
241+
return;
242+
}
231243
}
232244

233245
// taken from CPython "Python/Objects/unicodeobject.c"

graalpython/com.oracle.graal.python.jni/src/capi_forwards.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ void unimplemented(const char* name) {
764764
#undef PyTruffleUnicode_Decode
765765
#undef PyTruffleUnicode_DecodeUTF8Stateful
766766
#undef PyTruffleUnicode_FromUCS
767-
#undef PyTruffleUnicode_InternInPlace
767+
#undef PyTruffleUnicode_LookupAndIntern
768768
#undef PyTruffleUnicode_New
769769
#undef PyTruffle_Arg_ParseTupleAndKeywords
770770
#undef PyTruffle_ByteArray_EmptyWithCapacity

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
import re
4141
import sys
4242

43-
from . import CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars
43+
from . import CPyExtType, CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars
4444

4545
__dir__ = __file__.rpartition("/")[0]
4646

@@ -895,3 +895,28 @@ def compile_module(self, name):
895895
arguments=["PyObject* string", "PyObject* sep", "Py_ssize_t maxsplit"],
896896
cmpfunc=unhandled_error_compare
897897
)
898+
899+
class TestUnicodeObject(object):
900+
def test_intern(self):
901+
TestIntern = CPyExtType("TestIntern",
902+
'''
903+
static PyObject* set_intern_str(PyObject* self, PyObject* str) {
904+
PyUnicode_InternInPlace(&str);
905+
((TestInternObject*)self)->str = str;
906+
return str;
907+
}
908+
909+
static PyObject* check_is_same_str_ptr(PyObject* self, PyObject* str) {
910+
PyUnicode_InternInPlace(&str);
911+
return str == ((TestInternObject*)self)->str ? Py_True : Py_False;
912+
}
913+
''',
914+
cmembers="PyObject *str;",
915+
tp_methods='''{"set_intern_str", (PyCFunction)set_intern_str, METH_O, ""},
916+
{"check_is_same_str_ptr", (PyCFunction)check_is_same_str_ptr, METH_O, ""}
917+
'''
918+
)
919+
tester = TestIntern()
920+
s = 'some text'
921+
assert tester.set_intern_str(s) == s
922+
assert tester.check_is_same_str_ptr(s)

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltinRegistry.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ private PythonCextBuiltinRegistry() {
303303
public static final CApiBuiltinExecutable PyTruffleUnicode_Decode = new CApiBuiltinExecutable("PyTruffleUnicode_Decode", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.PyObject, ArgDescriptor.ConstCharPtrAsTruffleString, ArgDescriptor.ConstCharPtrAsTruffleString}, 242);
304304
public static final CApiBuiltinExecutable PyTruffleUnicode_DecodeUTF8Stateful = new CApiBuiltinExecutable("PyTruffleUnicode_DecodeUTF8Stateful", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Pointer, ArgDescriptor.ConstCharPtrAsTruffleString, ArgDescriptor.Int}, 243);
305305
public static final CApiBuiltinExecutable PyTruffleUnicode_FromUCS = new CApiBuiltinExecutable("PyTruffleUnicode_FromUCS", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Pointer, ArgDescriptor.Py_ssize_t, ArgDescriptor.Int}, 244);
306-
public static final CApiBuiltinExecutable PyTruffleUnicode_InternInPlace = new CApiBuiltinExecutable("PyTruffleUnicode_InternInPlace", CApiCallPath.Ignored, ArgDescriptor.PyObjectBorrowed, new ArgDescriptor[]{ArgDescriptor.PyObject}, 245);
306+
public static final CApiBuiltinExecutable PyTruffleUnicode_LookupAndIntern = new CApiBuiltinExecutable("PyTruffleUnicode_LookupAndIntern", CApiCallPath.Ignored, ArgDescriptor.PyObject, new ArgDescriptor[]{ArgDescriptor.PyObject}, 245);
307307
public static final CApiBuiltinExecutable PyTruffleUnicode_New = new CApiBuiltinExecutable("PyTruffleUnicode_New", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Pointer, ArgDescriptor.Py_ssize_t, ArgDescriptor.PY_UCS4}, 246);
308308
public static final CApiBuiltinExecutable PyTruffle_Arg_ParseTupleAndKeywords = new CApiBuiltinExecutable("PyTruffle_Arg_ParseTupleAndKeywords", CApiCallPath.Ignored, ArgDescriptor.Int, new ArgDescriptor[]{ArgDescriptor.PyObject, ArgDescriptor.PyObject, ArgDescriptor.ConstCharPtrAsTruffleString, ArgDescriptor.Pointer, ArgDescriptor.Pointer}, 247);
309309
public static final CApiBuiltinExecutable PyTruffle_ByteArray_EmptyWithCapacity = new CApiBuiltinExecutable("PyTruffle_ByteArray_EmptyWithCapacity", CApiCallPath.Ignored, ArgDescriptor.PyObjectTransfer, new ArgDescriptor[]{ArgDescriptor.Py_ssize_t}, 248);
@@ -849,7 +849,7 @@ private PythonCextBuiltinRegistry() {
849849
PyTruffleUnicode_Decode,
850850
PyTruffleUnicode_DecodeUTF8Stateful,
851851
PyTruffleUnicode_FromUCS,
852-
PyTruffleUnicode_InternInPlace,
852+
PyTruffleUnicode_LookupAndIntern,
853853
PyTruffleUnicode_New,
854854
PyTruffle_Arg_ParseTupleAndKeywords,
855855
PyTruffle_ByteArray_EmptyWithCapacity,
@@ -1643,7 +1643,7 @@ static CApiBuiltinNode createBuiltinNode(int id) {
16431643
case 244:
16441644
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_FromUCSNodeGen.create();
16451645
case 245:
1646-
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_InternInPlaceNodeGen.create();
1646+
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_LookupAndInternNodeGen.create();
16471647
case 246:
16481648
return com.oracle.graal.python.builtins.modules.cext.PythonCextUnicodeBuiltinsFactory.PyTruffleUnicode_NewNodeGen.create();
16491649
case 247:

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PY_UNICODE_PTR;
5656
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Pointer;
5757
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyObject;
58-
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyObjectBorrowed;
5958
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PyObjectTransfer;
6059
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Py_ssize_t;
6160
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.SIZE_T;
@@ -85,7 +84,6 @@
8584
import com.oracle.graal.python.builtins.modules.BuiltinFunctions.ChrNode;
8685
import com.oracle.graal.python.builtins.modules.CodecsModuleBuiltins;
8786
import com.oracle.graal.python.builtins.modules.CodecsModuleBuiltins.CodecsEncodeNode;
88-
import com.oracle.graal.python.builtins.modules.SysModuleBuiltins.InternNode;
8987
import com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApi5BuiltinNode;
9088
import com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApi6BuiltinNode;
9189
import com.oracle.graal.python.builtins.modules.cext.PythonCextBuiltins.CApiBinaryBuiltinNode;
@@ -103,12 +101,14 @@
103101
import com.oracle.graal.python.builtins.objects.cext.capi.CExtNodes.UnicodeFromFormatNode;
104102
import com.oracle.graal.python.builtins.objects.cext.capi.PySequenceArrayWrapper;
105103
import com.oracle.graal.python.builtins.objects.cext.capi.UnicodeObjectNodes.UnicodeAsWideCharNode;
106-
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor;
107104
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes;
108105
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.Charsets;
109106
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.EncodeNativeStringNode;
110107
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.GetByteArrayNode;
111108
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.UnicodeFromWcharNode;
109+
import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageGetItem;
110+
import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageSetItem;
111+
import com.oracle.graal.python.builtins.objects.dict.PDict;
112112
import com.oracle.graal.python.builtins.objects.ints.PInt;
113113
import com.oracle.graal.python.builtins.objects.memoryview.PMemoryView;
114114
import com.oracle.graal.python.builtins.objects.str.NativeCharSequence;
@@ -123,15 +123,18 @@
123123
import com.oracle.graal.python.builtins.objects.str.StringBuiltins.RFindNode;
124124
import com.oracle.graal.python.builtins.objects.str.StringBuiltins.ReplaceNode;
125125
import com.oracle.graal.python.builtins.objects.str.StringBuiltins.StartsWithNode;
126+
import com.oracle.graal.python.builtins.objects.str.StringNodes;
126127
import com.oracle.graal.python.lib.PyObjectIsTrueNode;
127128
import com.oracle.graal.python.lib.PyObjectLookupAttr;
128129
import com.oracle.graal.python.lib.PySliceNew;
129130
import com.oracle.graal.python.nodes.ErrorMessages;
130131
import com.oracle.graal.python.nodes.PGuards;
131132
import com.oracle.graal.python.nodes.PRaiseNode;
132133
import com.oracle.graal.python.nodes.StringLiterals;
134+
import com.oracle.graal.python.nodes.attributes.ReadAttributeFromDynamicObjectNode;
133135
import com.oracle.graal.python.nodes.call.CallNode;
134136
import com.oracle.graal.python.nodes.classes.IsSubtypeNode;
137+
import com.oracle.graal.python.nodes.object.BuiltinClassProfiles.IsBuiltinObjectProfile;
135138
import com.oracle.graal.python.nodes.object.GetClassNode;
136139
import com.oracle.graal.python.nodes.truffle.PythonArithmeticTypes;
137140
import com.oracle.graal.python.nodes.truffle.PythonTypes;
@@ -278,26 +281,51 @@ protected boolean isStringSubtype(Object obj, GetClassNode getClassNode, IsSubty
278281
}
279282
}
280283

281-
@CApiBuiltin(ret = PyObjectBorrowed, args = {ArgDescriptor.PyObject}, call = Ignored)
282-
abstract static class PyTruffleUnicode_InternInPlace extends CApiUnaryBuiltinNode {
283-
@Specialization(guards = {"!isTruffleString(obj)", "isStringSubtype(obj, getClassNode, isSubtypeNode)"})
284-
Object intern(Object obj,
285-
@Cached InternNode internNode,
286-
@SuppressWarnings("unused") @Cached GetClassNode getClassNode,
287-
@SuppressWarnings("unused") @Cached IsSubtypeNode isSubtypeNode) {
288-
return internNode.execute(null, obj);
284+
@CApiBuiltin(ret = PyObject, args = {PyObject}, call = Ignored)
285+
abstract static class PyTruffleUnicode_LookupAndIntern extends CApiUnaryBuiltinNode {
286+
@Specialization
287+
Object withTS(TruffleString str,
288+
@Shared @Cached StringNodes.InternStringNode internNode,
289+
@Shared @Cached HashingStorageGetItem getItem,
290+
@Shared @Cached HashingStorageSetItem setItem) {
291+
PDict dict = getCApiContext().getInternedUnicode();
292+
if (dict == null) {
293+
dict = factory().createDict();
294+
getCApiContext().setInternedUnicode(dict);
295+
}
296+
Object interned = getItem.execute(dict.getDictStorage(), str);
297+
if (interned == null) {
298+
interned = internNode.execute(str);
299+
dict.setDictStorage(setItem.execute(dict.getDictStorage(), str, interned));
300+
}
301+
return interned;
289302
}
290303

291-
@Specialization(guards = {"!isTruffleString(obj)", "!isStringSubtype(obj, getClassNode, isSubtypeNode)"})
292-
Object intern(@SuppressWarnings("unused") Object obj,
293-
@SuppressWarnings("unused") @Cached GetClassNode getClassNode,
294-
@SuppressWarnings("unused") @Cached IsSubtypeNode isSubtypeNode) {
295-
assert false;
296-
return PNone.NONE;
304+
@Specialization
305+
Object withPString(PString str,
306+
@Bind("this") Node inliningTarget,
307+
@Cached IsBuiltinObjectProfile isBuiltinClassProfile,
308+
@Cached ReadAttributeFromDynamicObjectNode readNode,
309+
@Shared @Cached StringNodes.InternStringNode internNode,
310+
@Shared @Cached HashingStorageGetItem getItem,
311+
@Shared @Cached HashingStorageSetItem setItem) {
312+
if (!isBuiltinClassProfile.profileObject(inliningTarget, str, PythonBuiltinClassType.PString)) {
313+
return getNativeNull();
314+
}
315+
boolean isInterned = readNode.execute(str, PString.INTERNED) != PNone.NO_VALUE;
316+
if (isInterned) {
317+
return str;
318+
}
319+
return withTS(str.getValueUncached(), internNode, getItem, setItem);
297320
}
298321

299-
protected boolean isStringSubtype(Object obj, GetClassNode getClassNode, IsSubtypeNode isSubtypeNode) {
300-
return isSubtypeNode.execute(getClassNode.execute(obj), PythonBuiltinClassType.PString);
322+
@Fallback
323+
Object nil(@SuppressWarnings("unused") Object obj) {
324+
/*
325+
* If it's a subclass, we don't really know what putting it in the interned dict might
326+
* do.
327+
*/
328+
return getNativeNull();
301329
}
302330
}
303331

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ public final class CApiContext extends CExtContext {
189189
/** Same as {@code import.c: extensions} but we don't keep a PDict; just a bare Java HashMap. */
190190
private final HashMap<Pair<TruffleString, TruffleString>, Object> extensions = new HashMap<>(4);
191191

192+
/** corresponds to {@code unicodeobject.c: interned} */
193+
private PDict internedUnicode;
192194
private final ArrayList<Object> modulesByIndex = new ArrayList<>(0);
193195

194196
public final HashMap<Long, PLock> locks = new HashMap<>();
@@ -276,6 +278,14 @@ public static Object asHex(Object ptr) {
276278
return Objects.toString(ptr);
277279
}
278280

281+
public PDict getInternedUnicode() {
282+
return internedUnicode;
283+
}
284+
285+
public void setInternedUnicode(PDict internedUnicode) {
286+
this.internedUnicode = internedUnicode;
287+
}
288+
279289
/**
280290
* Tries to convert the object to a pointer (type: {@code long}) to avoid materialization of
281291
* pointer objects. If that is not possible, the object will be returned as given.

0 commit comments

Comments
 (0)