Skip to content

Commit dbb0a47

Browse files
committed
Avoid eager materialization of native char sequence
1 parent f1948ff commit dbb0a47

File tree

2 files changed

+40
-6
lines changed

2 files changed

+40
-6
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/PyUnicodeWrappers.java

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,15 @@
5353
import java.nio.charset.CharsetEncoder;
5454
import java.nio.charset.StandardCharsets;
5555

56+
import com.oracle.graal.python.builtins.objects.cext.CExtNodes.SizeofWCharNode;
5657
import com.oracle.graal.python.builtins.objects.cext.DynamicObjectNativeWrapper.PAsPointerNode;
5758
import com.oracle.graal.python.builtins.objects.cext.DynamicObjectNativeWrapper.ToPyObjectNode;
5859
import com.oracle.graal.python.builtins.objects.cext.UnicodeObjectNodes.UnicodeAsWideCharNode;
60+
import com.oracle.graal.python.builtins.objects.str.NativeCharSequence;
5961
import com.oracle.graal.python.builtins.objects.str.PString;
6062
import com.oracle.graal.python.builtins.objects.str.StringNodes.StringLenNode;
63+
import com.oracle.graal.python.builtins.objects.str.StringNodes.StringMaterializeNode;
64+
import com.oracle.truffle.api.CompilerDirectives;
6165
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
6266
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
6367
import com.oracle.truffle.api.dsl.Cached;
@@ -67,6 +71,7 @@
6771
import com.oracle.truffle.api.library.CachedLibrary;
6872
import com.oracle.truffle.api.library.ExportLibrary;
6973
import com.oracle.truffle.api.library.ExportMessage;
74+
import com.oracle.truffle.api.profiles.ConditionProfile;
7075
import com.oracle.truffle.llvm.spi.NativeTypeLibrary;
7176

7277
public abstract class PyUnicodeWrappers {
@@ -163,6 +168,12 @@ Object readMember(String member,
163168
if (isMemberReadable(member)) {
164169
int elementSize = (int) sizeofWcharNode.execute();
165170
PString s = getPString(lib);
171+
CharSequence content = s.getCharSequence();
172+
173+
if (content instanceof NativeCharSequence) {
174+
// in this case, we can just return the pointer
175+
return ((NativeCharSequence) content).getPtr();
176+
}
166177
return new PySequenceArrayWrapper(asWideCharNode.execute(s, elementSize, stringLenNode.execute(s)), elementSize);
167178
}
168179
throw UnknownIdentifierException.create(member);
@@ -209,25 +220,46 @@ boolean isMemberReadable(String member) {
209220
@ExportMessage
210221
Object readMember(String member,
211222
@CachedLibrary("this") PythonNativeWrapperLibrary lib,
223+
@Cached ConditionProfile storageProfile,
224+
@Cached StringMaterializeNode materializeNode,
212225
@Cached CExtNodes.SizeofWCharNode sizeofWcharNode) throws UnknownIdentifierException {
213226
// padding(24), ready(1), ascii(1), compact(1), kind(3), interned(2)
214227
int value = 0b000000000000000000000000_1_0_0_000_00;
215-
if (onlyAscii(getPString(lib).getValue())) {
228+
PString delegate = getPString(lib);
229+
if (onlyAscii(delegate, storageProfile, materializeNode)) {
216230
value |= 0b1_0_000_00;
217231
}
218-
value |= ((int) sizeofWcharNode.execute() << 2) & 0b11100;
232+
value |= (getKind(delegate, storageProfile, sizeofWcharNode) << 2) & 0b11100;
219233
if (isMemberReadable(member)) {
220234
// it's a bit field; so we need to return the whole 32-bit word
221235
return value;
222236
}
223237
throw UnknownIdentifierException.create(member);
224238
}
225239

226-
private boolean onlyAscii(String value) {
240+
private boolean onlyAscii(PString value, ConditionProfile storageProfile, StringMaterializeNode stringMaterializeNode) {
241+
CharSequence storage = value.getCharSequence();
242+
243+
// important: avoid materialization of native sequences
244+
if (storageProfile.profile(storage instanceof NativeCharSequence)) {
245+
return ((NativeCharSequence) storage).isAsciiOnly();
246+
}
247+
227248
if (asciiEncoder == null) {
249+
CompilerDirectives.transferToInterpreterAndInvalidate();
228250
asciiEncoder = newAsciiEncoder();
229251
}
230-
return doCheck(value, asciiEncoder);
252+
return doCheck(stringMaterializeNode.execute(value), asciiEncoder);
253+
}
254+
255+
private int getKind(PString value, ConditionProfile storageProfile, SizeofWCharNode sizeofWcharNode) {
256+
CharSequence storage = value.getCharSequence();
257+
258+
// important: avoid materialization of native sequences
259+
if (storageProfile.profile(storage instanceof NativeCharSequence)) {
260+
return ((NativeCharSequence) storage).getElementSize();
261+
}
262+
return (int) sizeofWcharNode.execute();
231263
}
232264

233265
@TruffleBoundary

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/UnicodeObjectNodes.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -49,6 +49,7 @@
4949
import com.oracle.graal.python.builtins.objects.cext.UnicodeObjectNodesFactory.UnicodeAsWideCharNodeGen.LittleEndianNodeGen;
5050
import com.oracle.graal.python.builtins.objects.cext.UnicodeObjectNodesFactory.UnicodeAsWideCharNodeGen.NativeOrderNodeGen;
5151
import com.oracle.graal.python.builtins.objects.str.PString;
52+
import com.oracle.graal.python.builtins.objects.str.StringNodes.StringMaterializeNode;
5253
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
5354
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
5455
import com.oracle.truffle.api.dsl.Cached;
@@ -129,8 +130,9 @@ public static UnicodeAsWideCharNode getUncachedBigEndian() {
129130

130131
@Specialization
131132
PBytes doUnicode(PString s, long elementSize, long elements,
133+
@Cached StringMaterializeNode materializeNode,
132134
@Shared("factory") @Cached PythonObjectFactory factory) {
133-
return doUnicode(s.getValue(), elementSize, elements, factory);
135+
return doUnicode(materializeNode.execute(s), elementSize, elements, factory);
134136
}
135137

136138
@Specialization

0 commit comments

Comments
 (0)