Skip to content

Commit 6de8474

Browse files
committed
Implement JNI upcall and native fast path for ctx_Unicode_FromWideChar
1 parent a0416fe commit 6de8474

File tree

3 files changed

+121
-3
lines changed

3 files changed

+121
-3
lines changed

graalpython/com.oracle.graal.python.jni/src/hpy_jni.c

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ static JNIEnv* jniEnv;
8080
UPCALL(NumberCheck, SIG_HPY, SIG_INT) \
8181
UPCALL(Length, SIG_HPY, SIG_SIZE_T) \
8282
UPCALL(ListCheck, SIG_HPY, SIG_INT) \
83+
UPCALL(UnicodeFromWideChar, SIG_PTR SIG_SIZE_T, SIG_HPY) \
84+
UPCALL(UnicodeFromJCharArray, SIG_JCHARARRAY, SIG_HPY) \
8385

8486
#define UPCALL(name, jniSigArgs, jniSigRet) static jmethodID jniMethod_ ## name;
8587
ALL_UPCALLS
@@ -164,6 +166,10 @@ static HPy ctx_TypeGenericNew_jni(HPyContext ctx, HPy type, _HPyPtr args, HPy_ss
164166
return DO_UPCALL_HPY(CONTEXT_INSTANCE(ctx), TypeGenericNew, HPY_UP(type));
165167
}
166168

169+
static HPy ctx_Unicode_FromWideChar_jni(HPyContext ctx, const wchar_t *arr, HPy_ssize_t idx) {
170+
return DO_UPCALL_HPY(CONTEXT_INSTANCE(ctx), UnicodeFromWideChar, (PTR_UP) arr, (SIZE_T_UP) idx);
171+
}
172+
167173
//*************************
168174
// BOXING
169175

@@ -234,7 +240,7 @@ static HPy (*original_LongFromLong)(HPyContext ctx, long l);
234240
static int (*original_ListCheck)(HPyContext ctx, HPy h);
235241
static int (*original_NumberCheck)(HPyContext ctx, HPy h);
236242
static void (*original_Close)(HPyContext ctx, HPy h);
237-
static void *(*original_TrackerNew)(HPyContext ctx, HPy_ssize_t size);
243+
static HPy (*original_UnicodeFromWideChar)(HPyContext ctx, const wchar_t *arr, HPy_ssize_t size);
238244

239245
static void *augment_Cast(HPyContext ctx, HPy h) {
240246
uint64_t bits = toBits(h);
@@ -313,6 +319,80 @@ static int augment_ListCheck(HPyContext ctx, HPy obj) {
313319
}
314320
}
315321

322+
323+
static HPy augment_UnicodeFromWideChar(HPyContext ctx, const wchar_t *u, HPy_ssize_t size) {
324+
if (u == NULL && size != 0) {
325+
return HPy_NULL;
326+
}
327+
328+
if (size == -1) {
329+
size = wcslen(u);
330+
}
331+
332+
if (size > INT32_MAX) {
333+
/* TODO(fa): error message */
334+
return HPy_NULL;
335+
}
336+
337+
/* If the Unicode data is known at construction time, we can apply
338+
some optimizations which share commonly used objects. */
339+
340+
/* Optimization for empty strings */
341+
/* TODO(fa)
342+
if (size == 0)
343+
_Py_RETURN_UNICODE_EMPTY();
344+
*/
345+
346+
/* Single character Unicode objects in the Latin-1 range are
347+
shared when using this constructor */
348+
/* TODO(fa)
349+
if (size == 1 && (Py_UCS4)*u < 256)
350+
return get_latin1_char((unsigned char)*u);
351+
*/
352+
353+
/* If not empty and not single character, copy the Unicode data
354+
into the new object */
355+
uint32_t maxchar = 0;
356+
wchar_t ch;
357+
for (HPy_ssize_t i = 0; i < size; i++) {
358+
#if SIZEOF_WCHAR_T == 2
359+
if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0])
360+
&& (iter+1) < end
361+
&& Py_UNICODE_IS_LOW_SURROGATE(iter[1]))
362+
{
363+
ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]);
364+
++(*num_surrogates);
365+
iter += 2;
366+
}
367+
else
368+
#endif
369+
ch = u[i];
370+
if (ch > maxchar) {
371+
maxchar = ch;
372+
/* TODO(fa): error
373+
if (*maxchar > MAX_UNICODE) {
374+
PyErr_Format(PyExc_ValueError,
375+
"character U+%x is not in range [U+0000; U+10ffff]",
376+
ch);
377+
return -1;
378+
}
379+
*/
380+
}
381+
}
382+
383+
if (maxchar < 65536) {
384+
jarray jCharArray = (*jniEnv)->NewCharArray(jniEnv, (jsize) size);
385+
jchar *content = (*jniEnv)->GetPrimitiveArrayCritical(jniEnv, jCharArray, 0);
386+
for (HPy_ssize_t i = 0; i < size; i++) {
387+
content[i] = (jchar) u[i];
388+
}
389+
(*jniEnv)->ReleasePrimitiveArrayCritical(jniEnv, jCharArray, content, 0);
390+
return DO_UPCALL_HPY(CONTEXT_INSTANCE(ctx), UnicodeFromJCharArray, jCharArray);
391+
} else {
392+
return original_UnicodeFromWideChar(ctx, u, size);
393+
}
394+
}
395+
316396
void initDirectFastPaths(HPyContext context) {
317397
LOG("%p", context);
318398
context->name = "augmented!";
@@ -343,6 +423,9 @@ void initDirectFastPaths(HPyContext context) {
343423

344424
original_ListCheck = context->ctx_List_Check;
345425
context->ctx_List_Check = augment_ListCheck;
426+
427+
original_UnicodeFromWideChar = context->ctx_Unicode_FromWideChar;
428+
context->ctx_Unicode_FromWideChar = augment_UnicodeFromWideChar;
346429
}
347430

348431
void setHPyContextNativeSpace(HPyContext context, void** nativeSpace) {
@@ -381,6 +464,8 @@ JNIEXPORT jint JNICALL Java_com_oracle_graal_python_builtins_objects_cext_hpy_Gr
381464
context->ctx_SetItem_i = ctx_SetItemi_jni;
382465
context->ctx_SetItem = ctx_SetItem_jni;
383466

467+
context->ctx_Unicode_FromWideChar = ctx_Unicode_FromWideChar_jni;
468+
384469
context->ctx_Tracker_New = ctx_Tracker_New;
385470
context->ctx_Tracker_Add = ctx_Tracker_Add;
386471
context->ctx_Tracker_ForgetAll = ctx_Tracker_ForgetAll;
@@ -397,6 +482,7 @@ JNIEXPORT jint JNICALL Java_com_oracle_graal_python_builtins_objects_cext_hpy_Gr
397482
#define SIG_LONG "J"
398483
#define SIG_DOUBLE "D"
399484
#define SIG_TRACKER "J"
485+
#define SIG_JCHARARRAY "[C"
400486

401487
#define UPCALL(name, jniSigArgs, jniSigRet) \
402488
jniMethod_ ## name = (*env)->GetMethodID(env, clazz, "ctx" #name, "(" jniSigArgs ")" jniSigRet); \

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/hpy/GraalHPyContext.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@
159159
import com.oracle.graal.python.builtins.objects.frame.PFrame;
160160
import com.oracle.graal.python.builtins.objects.function.PArguments;
161161
import com.oracle.graal.python.builtins.objects.function.Signature;
162+
import com.oracle.graal.python.builtins.objects.ints.PInt;
162163
import com.oracle.graal.python.builtins.objects.list.PList;
163164
import com.oracle.graal.python.builtins.objects.object.PythonObject;
164165
import com.oracle.graal.python.builtins.objects.type.PythonClass;
@@ -1232,7 +1233,9 @@ public enum Counter {
12321233
UpcallLongAsLong,
12331234
UpcallLongFromLong,
12341235
UpcallFloatAsDouble,
1235-
UpcallFloatFromDouble;
1236+
UpcallFloatFromDouble,
1237+
UpcallUnicodeFromWideChar,
1238+
UpcallUnicodeFromJCharArray;
12361239

12371240
long count;
12381241

@@ -1596,6 +1599,33 @@ public final int ctxListCheck(long handle) {
15961599
}
15971600
}
15981601

1602+
public final long ctxUnicodeFromWideChar(long wcharArrayPtr, long size) {
1603+
Counter.UpcallUnicodeFromWideChar.increment();
1604+
1605+
if (!PInt.isIntRange(size)) {
1606+
// NULL handle
1607+
return 0;
1608+
}
1609+
int isize = (int) size;
1610+
1611+
char[] decoded = new char[isize];
1612+
for (int i = 0; i < size; i++) {
1613+
int wchar = unsafe.getInt(wcharArrayPtr + (long) Integer.BYTES * i);
1614+
if (Character.isBmpCodePoint(wchar)) {
1615+
decoded[i] = (char) wchar;
1616+
} else {
1617+
// TODO(fa): handle this case
1618+
throw new RuntimeException();
1619+
}
1620+
}
1621+
return createHandle(new String(decoded, 0, isize)).getId(this, ConditionProfile.getUncached());
1622+
}
1623+
1624+
public final long ctxUnicodeFromJCharArray(char[] arr) {
1625+
Counter.UpcallUnicodeFromJCharArray.increment();
1626+
return createHandle(new String(arr, 0, arr.length)).getId(this, ConditionProfile.getUncached());
1627+
}
1628+
15991629
@ExportMessage
16001630
@SuppressWarnings("static-method")
16011631
final boolean hasMembers() {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/resources/jni-config.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
{"name":"ctxLongFromLong","parameterTypes":["long"] },
1717
{"name":"ctxNew","parameterTypes":["long","long"] },
1818
{"name":"ctxNumberCheck","parameterTypes":["long"] },
19-
{"name":"ctxTypeGenericNew","parameterTypes":["long"] }
19+
{"name":"ctxTypeGenericNew","parameterTypes":["long"],
20+
{"name":"ctxUnicodeFromWideChar","parameterTypes":["long","long"],
21+
{"name":"ctxUnicodeFromJCharArray","parameterTypes":["char[]"] }
2022
]
2123
}
2224
]

0 commit comments

Comments
 (0)