Skip to content

Commit 4a8e2b1

Browse files
committed
[GR-55296] Add byte[] and native constructors for embedder strings; Add Value.asStringBytes(StringEncoding).
PullRequest: graal/19499
2 parents d2dc49a + 07dbd65 commit 4a8e2b1

File tree

15 files changed

+694
-40
lines changed

15 files changed

+694
-40
lines changed

sdk/src/org.graalvm.polyglot/snapshot.sigtest

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ meth public void printStackTrace(java.io.PrintStream)
9090
meth public void printStackTrace(java.io.PrintWriter)
9191
meth public void setStackTrace(java.lang.StackTraceElement[])
9292
supr java.lang.Object
93-
hfds CAUSE_CAPTION,EMPTY_THROWABLE_ARRAY,NULL_CAUSE_MESSAGE,SELF_SUPPRESSION_MESSAGE,SUPPRESSED_CAPTION,SUPPRESSED_SENTINEL,UNASSIGNED_STACK,backtrace,cause,depth,detailMessage,serialVersionUID,stackTrace,suppressedExceptions
93+
hfds CAUSE_CAPTION,EMPTY_THROWABLE_ARRAY,NULL_CAUSE_MESSAGE,SELF_SUPPRESSION_MESSAGE,SUPPRESSED_CAPTION,SUPPRESSED_SENTINEL,UNASSIGNED_STACK,backtrace,cause,depth,detailMessage,jfrTracing,serialVersionUID,stackTrace,suppressedExceptions
9494
hcls PrintStreamOrWriter,SentinelHolder,WrappedPrintStream,WrappedPrintWriter
9595

9696
CLSS public abstract interface java.lang.annotation.Annotation
@@ -147,7 +147,7 @@ meth public void leave()
147147
meth public void resetLimits()
148148
meth public void safepoint()
149149
supr java.lang.Object
150-
hfds ALL_HOST_CLASSES,EMPTY,NO_HOST_CLASSES,UNSET_HOST_LOOKUP,currentAPI,dispatch,engine,receiver
150+
hfds ALL_HOST_CLASSES,EMPTY,NO_HOST_CLASSES,UNSET_HOST_LOOKUP,creatorContext,currentAPI,dispatch,engine,parent,receiver
151151

152152
CLSS public final org.graalvm.polyglot.Context$Builder
153153
outer org.graalvm.polyglot.Context
@@ -213,8 +213,8 @@ meth public static org.graalvm.polyglot.Engine$Builder newBuilder()
213213
meth public void close()
214214
meth public void close(boolean)
215215
supr java.lang.Object
216-
hfds EMPTY,ENGINES,currentAPI,dispatch,initializationException,receiver,shutdownHookInitialized
217-
hcls APIAccessImpl,ClassPathIsolation,EngineShutDownHook,ImplHolder,PolyglotInvalid
216+
hfds EMPTY,ENGINES,creatorEngine,currentAPI,dispatch,initializationException,receiver,shutdownHookInitialized
217+
hcls APIAccessImpl,CleanableReference,ContextReference,EngineReference,EngineShutDownHook,ImplHolder,PolyglotInvalid
218218

219219
CLSS public final org.graalvm.polyglot.Engine$Builder
220220
outer org.graalvm.polyglot.Engine
@@ -336,16 +336,20 @@ supr java.lang.Enum<org.graalvm.polyglot.HostAccess$TargetMappingPrecedence>
336336

337337
CLSS public final org.graalvm.polyglot.Instrument
338338
meth public <%0 extends java.lang.Object> {%%0} lookup(java.lang.Class<{%%0}>)
339+
meth public boolean equals(java.lang.Object)
340+
meth public int hashCode()
339341
meth public java.lang.String getId()
340342
meth public java.lang.String getName()
341343
meth public java.lang.String getVersion()
342344
meth public java.lang.String getWebsite()
343345
meth public org.graalvm.options.OptionDescriptors getOptions()
344346
supr java.lang.Object
345-
hfds dispatch,receiver
347+
hfds dispatch,engine,receiver
346348

347349
CLSS public final org.graalvm.polyglot.Language
350+
meth public boolean equals(java.lang.Object)
348351
meth public boolean isInteractive()
352+
meth public int hashCode()
349353
meth public java.lang.String getDefaultMimeType()
350354
meth public java.lang.String getId()
351355
meth public java.lang.String getImplementationName()
@@ -355,7 +359,7 @@ meth public java.lang.String getWebsite()
355359
meth public java.util.Set<java.lang.String> getMimeTypes()
356360
meth public org.graalvm.options.OptionDescriptors getOptions()
357361
supr java.lang.Object
358-
hfds dispatch,receiver
362+
hfds dispatch,engine,receiver
359363

360364
CLSS public final org.graalvm.polyglot.PolyglotAccess
361365
fld public final static org.graalvm.polyglot.PolyglotAccess ALL
@@ -403,7 +407,7 @@ meth public void printStackTrace(java.io.PrintStream)
403407
meth public void printStackTrace(java.io.PrintWriter)
404408
meth public void setStackTrace(java.lang.StackTraceElement[])
405409
supr java.lang.RuntimeException
406-
hfds dispatch,impl
410+
hfds anchor,dispatch,impl
407411

408412
CLSS public final org.graalvm.polyglot.PolyglotException$StackFrame
409413
outer org.graalvm.polyglot.PolyglotException
@@ -540,6 +544,7 @@ supr java.lang.Object
540544
hfds rawType,type
541545

542546
CLSS public final org.graalvm.polyglot.Value
547+
innr public final static StringEncoding
543548
meth public !varargs org.graalvm.polyglot.Value execute(java.lang.Object[])
544549
meth public !varargs org.graalvm.polyglot.Value invokeMember(java.lang.String,java.lang.Object[])
545550
meth public !varargs org.graalvm.polyglot.Value newInstance(java.lang.Object[])
@@ -591,6 +596,7 @@ meth public boolean removeHashEntry(java.lang.Object)
591596
meth public boolean removeMember(java.lang.String)
592597
meth public byte asByte()
593598
meth public byte readBufferByte(long)
599+
meth public byte[] asStringBytes(org.graalvm.polyglot.Value$StringEncoding)
594600
meth public double asDouble()
595601
meth public double readBufferDouble(java.nio.ByteOrder,long)
596602
meth public float asFloat()
@@ -632,6 +638,10 @@ meth public org.graalvm.polyglot.Value getMetaParents()
632638
meth public short asShort()
633639
meth public short readBufferShort(java.nio.ByteOrder,long)
634640
meth public static org.graalvm.polyglot.Value asValue(java.lang.Object)
641+
meth public static org.graalvm.polyglot.Value fromByteBasedString(byte[],int,int,org.graalvm.polyglot.Value$StringEncoding,boolean)
642+
meth public static org.graalvm.polyglot.Value fromByteBasedString(byte[],org.graalvm.polyglot.Value$StringEncoding)
643+
meth public static org.graalvm.polyglot.Value fromNativeString(long,int,int,org.graalvm.polyglot.Value$StringEncoding,boolean)
644+
meth public static org.graalvm.polyglot.Value fromNativeString(long,int,org.graalvm.polyglot.Value$StringEncoding)
635645
meth public void pin()
636646
meth public void putHashEntry(java.lang.Object,java.lang.Object)
637647
meth public void putMember(java.lang.String,java.lang.Object)
@@ -645,6 +655,18 @@ meth public void writeBufferLong(java.nio.ByteOrder,long,long)
645655
meth public void writeBufferShort(java.nio.ByteOrder,long,short)
646656
supr java.lang.Object
647657

658+
CLSS public final static org.graalvm.polyglot.Value$StringEncoding
659+
outer org.graalvm.polyglot.Value
660+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_16
661+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_16_BIG_ENDIAN
662+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_16_LITTLE_ENDIAN
663+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_32
664+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_32_BIG_ENDIAN
665+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_32_LITTLE_ENDIAN
666+
fld public final static org.graalvm.polyglot.Value$StringEncoding UTF_8
667+
supr java.lang.Object
668+
hfds value
669+
648670
CLSS public abstract interface org.graalvm.polyglot.io.ByteSequence
649671
meth public abstract byte byteAt(int)
650672
meth public abstract int length()
@@ -772,7 +794,7 @@ intf java.lang.AutoCloseable
772794
meth public static org.graalvm.polyglot.management.ExecutionListener$Builder newBuilder()
773795
meth public void close()
774796
supr java.lang.Object
775-
hfds EMPTY,dispatch,receiver
797+
hfds EMPTY,creatorEngine,dispatch,receiver
776798

777799
CLSS public final org.graalvm.polyglot.management.ExecutionListener$Builder
778800
outer org.graalvm.polyglot.management.ExecutionListener

sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/Value.java

Lines changed: 207 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ public byte readBufferByte(long byteOffset) throws UnsupportedOperationException
539539
* Invoking this message does not cause any observable side-effects.
540540
* <p>
541541
* <b>Example</b> reading into an output stream using a 4k auxiliary byte array:
542-
*
542+
*
543543
* <pre>
544544
* Value val = ...
545545
* assert val.hasBufferElements();
@@ -556,11 +556,11 @@ public byte readBufferByte(long byteOffset) throws UnsupportedOperationException
556556
*
557557
* In case the goal is to read the whole contents into a single byte array, the easiest way is
558558
* to do that through {@link ByteSequence}:
559-
*
559+
*
560560
* <pre>
561561
* byte[] byteArray = val.as(ByteSequence.class).toByteArray();
562562
* </pre>
563-
*
563+
*
564564
* @param byteOffset offset in the buffer to start reading from.
565565
* @param destination byte array to write the read bytes into.
566566
* @param destinationOffset offset in the destination array to start writing from.
@@ -1195,6 +1195,34 @@ public String asString() {
11951195
}
11961196
}
11971197

1198+
/**
1199+
* Returns the bytes of a given string value without converting it to a Java {@link String}.
1200+
* <p>
1201+
* This method retrieves the raw bytes of the string in the specified {@link StringEncoding},
1202+
* avoiding intermediate conversions to a Java {@code String}. This is particularly useful for
1203+
* performance-sensitive scenarios where the overhead of creating a Java {@code String} is
1204+
* undesirable.
1205+
* <p>
1206+
* If the string is not already encoded in the specified encoding, it will be re-encoded before
1207+
* the bytes are returned. Note that re-encoding may involve additional computational overhead
1208+
* depending on the size of the string and the differences between its current encoding and the
1209+
* target encoding.
1210+
*
1211+
* <b>Usage Note:</b> The returned byte array represents the raw data of the string in the
1212+
* requested encoding. Modifications to the array will not affect the underlying string value.
1213+
*
1214+
* @param encoding the desired encoding for the string. Must not be <code>null</code>. Supported
1215+
* encodings are defined in {@link StringEncoding}.
1216+
* @return a byte array containing the string's raw bytes in the specified encoding
1217+
* @throws NullPointerException if {@code encoding} is <code>null</code>
1218+
* @throws IllegalStateException if the string value is no longer valid (e.g., the associated
1219+
* context has been closed)
1220+
* @since 24.2
1221+
*/
1222+
public byte[] asStringBytes(StringEncoding encoding) {
1223+
return dispatch.asStringBytes(this.context, receiver, encoding.value);
1224+
}
1225+
11981226
/**
11991227
* Returns <code>true</code> if this value represents a {@link #isNumber() number} and the value
12001228
* fits in <code>int</code>, else <code>false</code>.
@@ -2576,6 +2604,182 @@ public void pin() {
25762604
dispatch.pin(this.context, receiver);
25772605
Reference.reachabilityFence(creatorContext);
25782606
}
2607+
2608+
/**
2609+
* Creates a byte-based string value that can be passed to polyglot languages.
2610+
* <p>
2611+
* The returned value is guaranteed to return <code>true</code> for {@link Value#isString()}.
2612+
* The string can later be retrieved as a byte array using
2613+
* {@link Value#asStringBytes(StringEncoding)}. This method ensures immutability by
2614+
* conservatively copying the byte array before passing it to the underlying implementation.
2615+
* </p>
2616+
*
2617+
* <b>Performance Note:</b> Copying the byte array can have a performance impact. Use this
2618+
* method when immutability is required, or use the more flexible overloaded method
2619+
* {@link #fromByteBasedString(byte[], int, int, StringEncoding, boolean)} to control copying
2620+
* behavior.
2621+
*
2622+
* @param bytes the byte array representing the string
2623+
* @param encoding the encoding of the byte array
2624+
* @return a polyglot string {@link Value}
2625+
* @throws NullPointerException if either {@code bytes} or {@code encoding} is null
2626+
* @since 24.2
2627+
*/
2628+
public static Value fromByteBasedString(byte[] bytes, StringEncoding encoding) {
2629+
Objects.requireNonNull(bytes);
2630+
Objects.requireNonNull(encoding);
2631+
return Engine.getImpl().fromByteBasedString(bytes, 0, bytes.length, encoding.value, true);
2632+
}
2633+
2634+
/**
2635+
* Creates a byte-based string value with more granular control over the byte array's usage.
2636+
* <p>
2637+
* This method provides additional flexibility by allowing a subset of the byte array to be
2638+
* passed and controlling whether the byte array should be copied to ensure immutability.
2639+
*
2640+
* @param bytes the byte array representing the string
2641+
* @param offset the starting offset in the byte array
2642+
* @param length the number of bytes to include starting from {@code offset}
2643+
* @param encoding the encoding of the byte array
2644+
* @param copy whether to copy the byte array to ensure immutability
2645+
* @return a polyglot string {@link Value}
2646+
* @since 24.2
2647+
*/
2648+
public static Value fromByteBasedString(byte[] bytes, int offset, int length, StringEncoding encoding, boolean copy) {
2649+
Objects.requireNonNull(bytes);
2650+
Objects.requireNonNull(encoding);
2651+
if (offset < 0) {
2652+
throw new IndexOutOfBoundsException("byteLength must not be negative");
2653+
}
2654+
if (length < 0) {
2655+
throw new IndexOutOfBoundsException("byteOffset must not be negative");
2656+
}
2657+
if (offset + length > bytes.length) {
2658+
throw new IndexOutOfBoundsException("byte index is out of bounds");
2659+
}
2660+
return Engine.getImpl().fromByteBasedString(bytes, offset, length, encoding.value, copy);
2661+
}
2662+
2663+
/**
2664+
* Creates a native string object that can be passed to polyglot languages.
2665+
* <p>
2666+
* Native strings avoid copying, offering better performance for certain use cases. However,
2667+
* clients must guarantee the lifetime of the native string as long as the {@link Value} is
2668+
* alive. The returned value is guaranteed to return <code>true</code> for
2669+
* {@link Value#isString()}.
2670+
* <p>
2671+
* <b>Usage Warning:</b> The polyglot context or engine does not manage the lifetime of the
2672+
* native pointer. Clients must ensure that the pointer remains valid and that the memory is not
2673+
* deallocated while the string is in use. Passing a deallocated or invalid pointer can result
2674+
* in crashes or undefined behavior.
2675+
* <p>
2676+
* <b>Note:</b> Whenever possible, use {@link #fromByteBasedString(byte[], StringEncoding)} to
2677+
* avoid the risks associated with native memory management.
2678+
*
2679+
* <ul>
2680+
* <li>The native string's memory must remain valid for the lifetime of the context it is passed
2681+
* to.
2682+
* <li>The native bytes must not be mutated after being passed to this method.
2683+
* <li>The bytes must already be encoded with the specified encoding.
2684+
* </ul>
2685+
*
2686+
* @param basePointer the raw base pointer to the native string in memory
2687+
* @param byteLength the length of the string in bytes
2688+
* @param encoding the encoding of the native string
2689+
* @param copy whether to copy the native string bytes for additional safety
2690+
* @return a polyglot string {@link Value}
2691+
* @since 24.2
2692+
*/
2693+
public static Value fromNativeString(long basePointer, int byteOffset, int byteLength, StringEncoding encoding, boolean copy) {
2694+
Objects.requireNonNull(encoding);
2695+
if (basePointer == 0L) {
2696+
throw new NullPointerException("Null base pointer provided.");
2697+
}
2698+
if (byteLength < 0) {
2699+
throw new IndexOutOfBoundsException("byteLength must not be negative");
2700+
}
2701+
if (byteOffset < 0) {
2702+
throw new IndexOutOfBoundsException("byteOffset must not be negative");
2703+
}
2704+
return Engine.getImpl().fromNativeString(basePointer, byteOffset, byteLength, encoding.value, copy);
2705+
}
2706+
2707+
/**
2708+
* Creates a native string object with default safety settings.
2709+
* <p>
2710+
* This method is equivalent to calling
2711+
* {@link #fromNativeString(long, int, int, StringEncoding, boolean)} with {@code copy} set to
2712+
* {@code true}.
2713+
* </p>
2714+
*
2715+
* @param basePointer the raw base pointer to the native string in memory
2716+
* @param byteLength the length of the string in bytes
2717+
* @param encoding the encoding of the native string
2718+
* @return a polyglot string {@link Value}
2719+
* @since 24.2
2720+
*/
2721+
public static Value fromNativeString(long basePointer, int byteLength, StringEncoding encoding) {
2722+
return fromNativeString(basePointer, 0, byteLength, encoding, true);
2723+
}
2724+
2725+
/**
2726+
* Enum like class representing the supported string encodings. The encodings determine how byte
2727+
* arrays or native strings are interpreted when creating or retrieving string values. This
2728+
* class is not directly a enum to support compatible evolution.
2729+
*
2730+
* @since 24.2
2731+
*/
2732+
public static final class StringEncoding {
2733+
2734+
/**
2735+
* @since 24.2
2736+
*/
2737+
public static final StringEncoding UTF_8 = new StringEncoding(0);
2738+
2739+
/**
2740+
* @since 24.2
2741+
*/
2742+
public static final StringEncoding UTF_16_LITTLE_ENDIAN = new StringEncoding(1);
2743+
/**
2744+
* @since 24.2
2745+
*/
2746+
public static final StringEncoding UTF_16_BIG_ENDIAN = new StringEncoding(2);
2747+
/**
2748+
* @since 24.2
2749+
*/
2750+
public static final StringEncoding UTF_32_LITTLE_ENDIAN = new StringEncoding(3);
2751+
/**
2752+
* @since 24.2
2753+
*/
2754+
public static final StringEncoding UTF_32_BIG_ENDIAN = new StringEncoding(4);
2755+
2756+
/**
2757+
* The native UTF 16 encoding for the current platform.
2758+
*
2759+
* @see ByteOrder#nativeOrder()
2760+
* @since 24.2
2761+
*/
2762+
public static final StringEncoding UTF_16 = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN ? UTF_16_LITTLE_ENDIAN : UTF_16_BIG_ENDIAN;
2763+
2764+
/**
2765+
* The native UTF 32 encoding for the current platform.
2766+
*
2767+
* @see ByteOrder#nativeOrder()
2768+
* @since 24.2
2769+
*/
2770+
public static final StringEncoding UTF_32 = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN ? UTF_32_LITTLE_ENDIAN : UTF_32_BIG_ENDIAN;
2771+
2772+
/*
2773+
* Mapping table to PolyglotImpl.LazyEncodings.TABLE. Keep in sync.
2774+
*/
2775+
final int value;
2776+
2777+
private StringEncoding(int value) {
2778+
this.value = value;
2779+
}
2780+
2781+
}
2782+
25792783
}
25802784

25812785
abstract class AbstractValue {

sdk/src/org.graalvm.polyglot/src/org/graalvm/polyglot/impl/AbstractPolyglotImpl.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,6 +1402,8 @@ public boolean hasHashEntry(Object context, Object receiver, Object key) {
14021402
public abstract Object getHashValuesIterator(Object context, Object receiver);
14031403

14041404
public abstract void pin(Object languageContext, Object receiver);
1405+
1406+
public abstract byte[] asStringBytes(Object context, Object receiver, int encoding);
14051407
}
14061408

14071409
public Class<?> loadLanguageClass(String className) {
@@ -1416,6 +1418,14 @@ public Object asValue(Object o) {
14161418
return getNext().asValue(o);
14171419
}
14181420

1421+
public Value fromNativeString(long basePointer, int byteOffset, int byteLength, int encoding, boolean copy) {
1422+
return getNext().fromNativeString(basePointer, byteOffset, byteLength, encoding, copy);
1423+
}
1424+
1425+
public Value fromByteBasedString(byte[] bytes, int offset, int length, int encoding, boolean copy) {
1426+
return getNext().fromByteBasedString(bytes, offset, length, encoding, copy);
1427+
}
1428+
14191429
public <S, T> Object newTargetTypeMapping(Class<S> sourceType, Class<T> targetType, Predicate<S> acceptsValue, Function<S, T> convertValue, TargetMappingPrecedence precedence) {
14201430
return getNext().newTargetTypeMapping(sourceType, targetType, acceptsValue, convertValue, precedence);
14211431
}

truffle/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ This changelog summarizes major changes between Truffle versions relevant to lan
1818
* Added `ThreadLocalAction#notifyBlocked(Access)` and `ThreadLocalAction#notifyUnblocked(Access)` to notify thread local actions that their processing has been blocked/unblocked due to a blocked call (see `ThreadLocalAction` documentation).
1919
* `TruffleSafepoint#poll(Node)` does not require a non-null location anymore. However, it is still recommended to always pass a location node, if available.
2020
* GR-59565 Added `RootNode.prepareForCompilation` which allows root nodes to offload expensive computation to the compiler thread and to delay compilation if they are not yet fully profiled.
21+
* GR-55296 Added support for creation of strings from raw byte arrays and native memory using
22+
`Value.fromByteBasedString(...)` `Value.fromNativeString(...)`. A `Value.StringEncoding` must be provided.
23+
* GR-55296 Added support to convert any string to a `byte[]` with a given `Value.StringEncoding` using `Value.asStringBytes(...)`.
2124

2225

2326
* GR-54760 `RootNode.translateStackTraceElement()` is now always consulted for polyglot and debugger stack traces. Stack traces now use the source section, the executable name, and the name of the declared meta-object to build `StackTraceElement` instances.

0 commit comments

Comments
 (0)