Skip to content

Commit fb0fb9d

Browse files
committed
[GR-26315] Add test for PyUnicode_New.
PullRequest: graalpython/1298
2 parents ad2172a + 6379eb7 commit fb0fb9d

File tree

7 files changed

+134
-52
lines changed

7 files changed

+134
-52
lines changed

graalpython/com.oracle.graal.python.cext/src/capi.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ PyObject* get_tp_dict(PyTypeObject* obj) {
302302

303303
/** to be used from Java code only; reads native 'tp_base' field */
304304
PyObject* get_tp_base(PyTypeObject* obj) {
305-
return native_to_java(obj->tp_base);
305+
return native_to_java((PyObject*) obj->tp_base);
306306
}
307307

308308
/** to be used from Java code only; reads native 'tp_bases' field */
@@ -480,13 +480,16 @@ const char* PyTruffle_StringToCstr(void* o, int32_t strLen) {
480480
return str;
481481
}
482482

483+
/* Use this function to decode a C char array to a Java string using the source file encoding. */
483484
void* PyTruffle_CstrToString(void* o) {
484-
if (polyglot_fits_in_i64(o)) {
485-
return polyglot_from_string((const char*)polyglot_as_i64(o), SRC_CS);
486-
}
487485
return polyglot_from_string(o, SRC_CS);
488486
}
489487

488+
/* Use this function to decode a C ASCII string to a Java string. */
489+
void* PyTruffle_AsciiToString(void* ptr) {
490+
return polyglot_from_string(ptr, "ascii");
491+
}
492+
490493
/* To be used from Java code only.
491494
* This function is used if a native class inherits from a managed class but uses the 'object.__new__'.
492495
* This function roughly corresponds to CPython's 'object_new'. */

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,9 +512,10 @@ PyObject* PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) {
512512
kind = PyUnicode_4BYTE_KIND;
513513
}
514514

515-
size_t n = size * kind;
516-
int8_t* ptr = (int8_t*) malloc(n);
517-
return _jls_PyUnicode_New(polyglot_from_i8_array((int8_t*)ptr, n), kind, is_ascii);
515+
/* add one to size for the null character */
516+
int8_t* ptr = (int8_t*) calloc(size + 1, kind);
517+
/* We intentionally reduce the size by one because interop users should not see the null character. */
518+
return _jls_PyUnicode_New(polyglot_from_i8_array((int8_t*)ptr, size * kind), kind, is_ascii);
518519
}
519520

520521
UPCALL_ID(PyUnicode_Compare);

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,4 +544,37 @@ def compile_module(self, name):
544544
cmpfunc=unhandled_error_compare
545545
)
546546

547+
test_PyUnicode_New = CPyExtFunction(
548+
lambda args: args[3],
549+
lambda: (
550+
(134818, bytearray([0xA2, 0x0E, 0x02, 0x00]), 1, "𠺢"),
551+
(134988, bytearray([0xA2, 0x0E, 0x02, 0x00, 0x4C, 0x0F, 0x02, 0x00]), 2, "𠺢𠽌"),
552+
(8240, bytearray([0x30, 0x20]), 1, "‰"),
553+
(8252, bytearray([0x30, 0x20, 0x3C, 0x20]), 2, "‰‼"),
554+
(127, bytearray([0x61, 0x62, 0x63, 0x64]), 4, "abcd"),
555+
(127, bytearray([0x61, 0x62, 0x63, 0x64]), 2, "ab"),
556+
),
557+
code='''PyObject* wrap_PyUnicode_New(Py_ssize_t maxchar, Py_buffer buffer, Py_ssize_t nchars, PyObject* dummy) {
558+
PyObject* obj = PyUnicode_New(nchars, (Py_UCS4) maxchar);
559+
void* data = PyUnicode_DATA(obj);
560+
size_t char_size;
561+
if (maxchar < 256) {
562+
char_size = 1;
563+
} else if (maxchar < 65536) {
564+
char_size = 2;
565+
} else {
566+
char_size = 4;
567+
}
568+
memcpy(data, buffer.buf, nchars * char_size);
569+
PyUnicode_READY(obj);
570+
return obj;
571+
}
572+
''',
573+
resultspec="O",
574+
argspec='ny*nO',
575+
arguments=["Py_ssize_t maxchar", "Py_buffer buffer", "Py_ssize_t nchars", "PyObject* dummy"],
576+
callfunction="wrap_PyUnicode_New",
577+
cmpfunc=unhandled_error_compare
578+
)
579+
547580

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/NativeCAPISymbols.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ public abstract class NativeCAPISymbols {
5555
public static final String FUN_PY_NONE_HANDLE = "PyNoneHandle";
5656
public static final String FUN_WHCAR_SIZE = "PyTruffle_Wchar_Size";
5757
public static final String FUN_PY_TRUFFLE_CSTR_TO_STRING = "PyTruffle_CstrToString";
58+
public static final String FUN_PY_TRUFFLE_ASCII_TO_STRING = "PyTruffle_AsciiToString";
5859
public static final String FUN_PY_FLOAT_AS_DOUBLE = "truffle_read_ob_fval";
5960
public static final String FUN_GET_OB_TYPE = "get_ob_type";
6061
public static final String FUN_GET_OB_REFCNT = "get_ob_refcnt";

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeCharSequence.java

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,19 @@
4242

4343
import java.util.Objects;
4444

45+
import com.oracle.graal.python.PythonLanguage;
4546
import com.oracle.graal.python.builtins.objects.cext.CExtNodes.PCallCapiFunction;
46-
import com.oracle.graal.python.builtins.objects.cext.NativeCAPISymbols;
47+
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodesFactory.UnicodeFromWcharNodeGen;
48+
import com.oracle.graal.python.builtins.objects.str.StringNodes.StringMaterializeNode;
49+
import com.oracle.graal.python.nodes.util.CastToJavaIntExactNode;
4750
import com.oracle.truffle.api.CompilerAsserts;
51+
import com.oracle.truffle.api.CompilerDirectives;
52+
import com.oracle.truffle.api.TruffleLogger;
53+
import com.oracle.truffle.api.interop.InteropLibrary;
54+
import com.oracle.truffle.api.interop.UnsupportedMessageException;
4855

49-
public final class NativeCharSequence implements PCharSequence {
56+
public final class NativeCharSequence implements CharSequence {
57+
private static final TruffleLogger LOGGER = PythonLanguage.getLogger(NativeCharSequence.class);
5058

5159
/**
5260
* Pointer to the native buffer (most like a {@code char*} containing ASCII characters but could
@@ -68,6 +76,7 @@ public final class NativeCharSequence implements PCharSequence {
6876
private String materialized;
6977

7078
public NativeCharSequence(Object ptr, int elementSize, boolean asciiOnly) {
79+
assert elementSize == 1 || elementSize == 2 || elementSize == 4;
7180
this.ptr = ptr;
7281
this.elementSize = elementSize;
7382
this.asciiOnly = asciiOnly;
@@ -78,6 +87,17 @@ public int length() {
7887
return materialize().length();
7988
}
8089

90+
int length(InteropLibrary lib, CastToJavaIntExactNode castToJavaIntNode) {
91+
try {
92+
int arraySize = castToJavaIntNode.execute(lib.getArraySize(ptr));
93+
assert arraySize % elementSize == 0;
94+
// we need to subtract the terminating null character
95+
return arraySize / elementSize;
96+
} catch (UnsupportedMessageException e) {
97+
throw CompilerDirectives.shouldNotReachHere("pointer of NativeCharSequence is not an array");
98+
}
99+
}
100+
81101
@Override
82102
public char charAt(int index) {
83103
return materialize().charAt(index);
@@ -88,22 +108,18 @@ public CharSequence subSequence(int start, int end) {
88108
return materialize().subSequence(start, end);
89109
}
90110

91-
@Override
92-
public boolean isMaterialized() {
111+
boolean isMaterialized() {
93112
return materialized != null;
94113
}
95114

96-
@Override
97-
public final String materialize() {
98-
if (!isMaterialized()) {
99-
materialized = (String) PCallCapiFunction.getUncached().call(NativeCAPISymbols.FUN_PY_TRUFFLE_CSTR_TO_STRING, ptr);
100-
}
115+
String getMaterialized() {
101116
return materialized;
102117
}
103118

104-
public String materialize(PCallCapiFunction node) {
119+
private String materialize() {
105120
if (!isMaterialized()) {
106-
materialized = (String) node.call(NativeCAPISymbols.FUN_PY_TRUFFLE_CSTR_TO_STRING, ptr);
121+
LOGGER.warning("uncached materialization of NativeCharSequence");
122+
materialized = StringMaterializeNode.materializeNativeCharSequence(this, PCallCapiFunction.getUncached(), UnicodeFromWcharNodeGen.getUncached());
107123
}
108124
return materialized;
109125
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,24 @@
2929

3030
import com.oracle.graal.python.PythonLanguage;
3131
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
32-
import com.oracle.graal.python.builtins.objects.cext.CExtNodes.PCallCapiFunction;
3332
import com.oracle.graal.python.builtins.objects.cext.PythonNativeWrapperLibrary;
3433
import com.oracle.graal.python.builtins.objects.function.PArguments.ThreadState;
3534
import com.oracle.graal.python.builtins.objects.object.PythonObjectLibrary;
3635
import com.oracle.graal.python.builtins.objects.str.StringNodes.StringMaterializeNode;
36+
import com.oracle.graal.python.builtins.objects.str.StringNodesFactory.StringMaterializeNodeGen;
3737
import com.oracle.graal.python.nodes.ErrorMessages;
3838
import com.oracle.graal.python.nodes.PRaiseNode;
3939
import com.oracle.graal.python.nodes.attributes.LookupAttributeInMRONode;
4040
import com.oracle.graal.python.nodes.attributes.LookupInheritedAttributeNode;
4141
import com.oracle.graal.python.nodes.object.IsBuiltinClassProfile;
4242
import com.oracle.graal.python.nodes.util.CannotCastException;
43+
import com.oracle.graal.python.nodes.util.CastToJavaIntExactNode;
4344
import com.oracle.graal.python.nodes.util.CastToJavaStringNode;
4445
import com.oracle.graal.python.runtime.sequence.PSequence;
4546
import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage;
4647
import com.oracle.truffle.api.CompilerDirectives;
4748
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
49+
import com.oracle.truffle.api.dsl.Bind;
4850
import com.oracle.truffle.api.dsl.Cached;
4951
import com.oracle.truffle.api.dsl.Cached.Exclusive;
5052
import com.oracle.truffle.api.dsl.Cached.Shared;
@@ -69,19 +71,7 @@ public PString(Object clazz, Shape instanceShape, CharSequence value) {
6971
}
7072

7173
public String getValue() {
72-
return PString.getValue(value);
73-
}
74-
75-
public static String getValue(CharSequence charSequence) {
76-
if (charSequence instanceof LazyString) {
77-
LazyString s = (LazyString) charSequence;
78-
return s.materialize();
79-
} else if (charSequence instanceof NativeCharSequence) {
80-
NativeCharSequence s = (NativeCharSequence) charSequence;
81-
return s.materialize();
82-
} else {
83-
return (String) charSequence;
84-
}
74+
return StringMaterializeNodeGen.getUncached().execute(this);
8575
}
8676

8777
public CharSequence getCharSequence() {
@@ -127,7 +117,7 @@ static int string(PString self, @SuppressWarnings("unused") ThreadState state,
127117
@SuppressWarnings("unused") @Shared("builtinProfile") @Cached IsBuiltinClassProfile profile,
128118
@SuppressWarnings("unused") @Shared("lookupSelf") @Cached LookupInheritedAttributeNode.Dynamic lookupSelf,
129119
@SuppressWarnings("unused") @Shared("lookupString") @Cached LookupAttributeInMRONode.Dynamic lookupString) {
130-
return ((String) self.value).length();
120+
return CompilerDirectives.castExact(self.value, String.class).length();
131121
}
132122

133123
@Specialization(guards = {
@@ -138,7 +128,7 @@ static int lazyString(PString self, @SuppressWarnings("unused") ThreadState stat
138128
@SuppressWarnings("unused") @Shared("builtinProfile") @Cached IsBuiltinClassProfile profile,
139129
@SuppressWarnings("unused") @Shared("lookupSelf") @Cached LookupInheritedAttributeNode.Dynamic lookupSelf,
140130
@SuppressWarnings("unused") @Shared("lookupString") @Cached LookupAttributeInMRONode.Dynamic lookupString) {
141-
return ((LazyString) self.value).length();
131+
return CompilerDirectives.castExact(self.value, LazyString.class).length();
142132
}
143133

144134
@Specialization(guards = {
@@ -149,21 +139,21 @@ static int nativeString(PString self, @SuppressWarnings("unused") ThreadState st
149139
@SuppressWarnings("unused") @Shared("builtinProfile") @Cached IsBuiltinClassProfile profile,
150140
@SuppressWarnings("unused") @Shared("lookupSelf") @Cached LookupInheritedAttributeNode.Dynamic lookupSelf,
151141
@SuppressWarnings("unused") @Shared("lookupString") @Cached LookupAttributeInMRONode.Dynamic lookupString) {
152-
return ((NativeCharSequence) self.value).length();
142+
return CompilerDirectives.castExact(self.value, NativeCharSequence.class).getMaterialized().length();
153143
}
154144

155145
@Specialization(guards = {
156146
"isNativeString(self.getCharSequence())", "!isMaterialized(self.getCharSequence())",
157147
"isBuiltin(self, profile) || hasBuiltinLen(self, lookupSelf, lookupString)"
158148
}, replaces = "nativeString", limit = "1")
159-
static int nativeStringMat(PString self, @SuppressWarnings("unused") ThreadState state,
149+
static int nativeStringMat(@SuppressWarnings("unused") PString self, @SuppressWarnings("unused") ThreadState state,
150+
@Bind("getNativeCharSequence(self)") NativeCharSequence nativeCharSequence,
160151
@SuppressWarnings("unused") @Shared("builtinProfile") @Cached IsBuiltinClassProfile profile,
161152
@SuppressWarnings("unused") @Shared("lookupSelf") @Cached LookupInheritedAttributeNode.Dynamic lookupSelf,
162153
@SuppressWarnings("unused") @Shared("lookupString") @Cached LookupAttributeInMRONode.Dynamic lookupString,
163-
@Cached PCallCapiFunction callCapi) {
164-
NativeCharSequence ncs = (NativeCharSequence) self.value;
165-
ncs.materialize(callCapi);
166-
return ncs.length();
154+
@CachedLibrary("nativeCharSequence") InteropLibrary lib,
155+
@Cached CastToJavaIntExactNode castToJavaIntNode) {
156+
return nativeCharSequence.length(lib, castToJavaIntNode);
167157
}
168158

169159
@Specialization(replaces = {"string", "lazyString", "nativeString", "nativeStringMat"})
@@ -178,6 +168,10 @@ static int subclassedString(PString self, ThreadState state,
178168
// call the generic implementation in the superclass
179169
return self.lengthWithState(state, plib, methodLib, gotState, hasLen, ltZero, raiseNode, lib);
180170
}
171+
172+
static NativeCharSequence getNativeCharSequence(PString self) {
173+
return (NativeCharSequence) self.value;
174+
}
181175
}
182176

183177
@ExportMessage

0 commit comments

Comments
 (0)