Skip to content

Commit f1948ff

Browse files
committed
Use native char sequence for PyUnicode_New
1 parent 29d4032 commit f1948ff

File tree

4 files changed

+66
-4
lines changed

4 files changed

+66
-4
lines changed

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ PyTypeObject PyUnicode_Type = PY_TRUFFLE_TYPE("str", &PyType_Type, Py_TPFLAGS_DE
4545
/* The empty Unicode object is shared to improve performance. */
4646
static PyObject *unicode_empty = NULL;
4747

48+
#define MAX_UNICODE 0x10ffff
49+
4850
#define _Py_RETURN_UNICODE_EMPTY() \
4951
do { \
5052
_Py_INCREF_UNICODE_EMPTY(); \
@@ -489,8 +491,30 @@ PyObject* PyUnicode_Join(PyObject *separator, PyObject *seq) {
489491
return UPCALL_CEXT_O(_jls_PyUnicode_Join, native_to_java(separator), native_to_java(seq));
490492
}
491493

494+
typedef PyObject* (*unicode_new_fun_t)(void* data, int elementSize, int is_ascii);
495+
UPCALL_TYPED_ID(PyUnicode_New, unicode_new_fun_t);
492496
PyObject* PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) {
493-
return to_sulong(polyglot_from_string("", "ascii"));
497+
enum PyUnicode_Kind kind;
498+
int is_ascii = 0;
499+
if (maxchar < 128) {
500+
kind = PyUnicode_1BYTE_KIND;
501+
is_ascii = 1;
502+
} else if (maxchar < 256) {
503+
kind = PyUnicode_1BYTE_KIND;
504+
} else if (maxchar < 65536) {
505+
kind = PyUnicode_2BYTE_KIND;
506+
} else {
507+
if (maxchar > MAX_UNICODE) {
508+
PyErr_SetString(PyExc_SystemError,
509+
"invalid maximum character passed to PyUnicode_New");
510+
return NULL;
511+
}
512+
kind = PyUnicode_4BYTE_KIND;
513+
}
514+
515+
size_t n = size * kind;
516+
int8_t* ptr = (int8_t*) malloc(n);
517+
return _jls_PyUnicode_New(polyglot_from_i8_array((int8_t*)ptr, n), kind, is_ascii);
494518
}
495519

496520
UPCALL_ID(PyUnicode_Compare);

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PythonCextBuiltins.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@
187187
import com.oracle.graal.python.builtins.objects.object.PythonObject;
188188
import com.oracle.graal.python.builtins.objects.object.PythonObjectLibrary;
189189
import com.oracle.graal.python.builtins.objects.set.PBaseSet;
190+
import com.oracle.graal.python.builtins.objects.str.NativeCharSequence;
190191
import com.oracle.graal.python.builtins.objects.str.PString;
191192
import com.oracle.graal.python.builtins.objects.traceback.GetTracebackNode;
192193
import com.oracle.graal.python.builtins.objects.traceback.LazyTraceback;
@@ -714,6 +715,17 @@ Object run(PBaseException exception, Object object,
714715
}
715716
}
716717

718+
// directly called without landing function
719+
@Builtin(name = "PyUnicode_New", minNumOfPositionalArgs = 3)
720+
@GenerateNodeFactory
721+
abstract static class PyUnicodeNewNode extends PythonBuiltinNode {
722+
@Specialization
723+
Object doGeneric(Object ptr, int elementSize, int isAscii,
724+
@Cached CExtNodes.ToNewRefNode toNewRefNode) {
725+
return toNewRefNode.execute(factory().createString(new NativeCharSequence(ptr, elementSize, isAscii != 0)));
726+
}
727+
}
728+
717729
@Builtin(name = "PyUnicode_FromString", minNumOfPositionalArgs = 1)
718730
@GenerateNodeFactory
719731
abstract static class PyUnicodeFromStringNode extends PythonBuiltinNode {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/CExtNodes.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ public abstract static class FromCharPointerNode extends Node {
14511451
@Specialization
14521452
PString execute(Object charPtr,
14531453
@Cached PythonObjectFactory factory) {
1454-
return factory.createString(new NativeCharSequence(charPtr));
1454+
return factory.createString(new NativeCharSequence(charPtr, 1, false));
14551455
}
14561456
}
14571457

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/NativeCharSequence.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,29 @@
4848

4949
public final class NativeCharSequence implements PCharSequence {
5050

51+
/**
52+
* Pointer to the native buffer (most like a {@code char*} containing ASCII characters but could
53+
* also be an arbitrary {@code void*} for {@code Py_UCS1}, {@code Py_UCS2}, or {@code Py_UCS4}
54+
* characters)
55+
*/
5156
private final Object ptr;
57+
58+
/**
59+
* The size of a single character in bytes (valid values are {@code 1, 2, 4}).
60+
*/
61+
private final int elementSize;
62+
63+
/**
64+
* Specifies if the native buffer contains only ASCII characters.
65+
*/
66+
private final boolean asciiOnly;
67+
5268
private String materialized;
5369

54-
public NativeCharSequence(Object ptr) {
70+
public NativeCharSequence(Object ptr, int elementSize, boolean asciiOnly) {
5571
this.ptr = ptr;
72+
this.elementSize = elementSize;
73+
this.asciiOnly = asciiOnly;
5674
}
5775

5876
@Override
@@ -90,10 +108,18 @@ public String materialize(PCallCapiFunction node) {
90108
return materialized;
91109
}
92110

93-
Object getPtr() {
111+
public Object getPtr() {
94112
return ptr;
95113
}
96114

115+
public int getElementSize() {
116+
return elementSize;
117+
}
118+
119+
public boolean isAsciiOnly() {
120+
return asciiOnly;
121+
}
122+
97123
@Override
98124
public String toString() {
99125
CompilerAsserts.neverPartOfCompilation();

0 commit comments

Comments
 (0)