@@ -539,7 +539,7 @@ public byte readBufferByte(long byteOffset) throws UnsupportedOperationException
539
539
* Invoking this message does not cause any observable side-effects.
540
540
* <p>
541
541
* <b>Example</b> reading into an output stream using a 4k auxiliary byte array:
542
- *
542
+ *
543
543
* <pre>
544
544
* Value val = ...
545
545
* assert val.hasBufferElements();
@@ -556,11 +556,11 @@ public byte readBufferByte(long byteOffset) throws UnsupportedOperationException
556
556
*
557
557
* In case the goal is to read the whole contents into a single byte array, the easiest way is
558
558
* to do that through {@link ByteSequence}:
559
- *
559
+ *
560
560
* <pre>
561
561
* byte[] byteArray = val.as(ByteSequence.class).toByteArray();
562
562
* </pre>
563
- *
563
+ *
564
564
* @param byteOffset offset in the buffer to start reading from.
565
565
* @param destination byte array to write the read bytes into.
566
566
* @param destinationOffset offset in the destination array to start writing from.
@@ -1195,6 +1195,34 @@ public String asString() {
1195
1195
}
1196
1196
}
1197
1197
1198
+ /**
1199
+ * Returns the bytes of a given string value without converting it to a Java {@link String}.
1200
+ * <p>
1201
+ * This method retrieves the raw bytes of the string in the specified {@link StringEncoding},
1202
+ * avoiding intermediate conversions to a Java {@code String}. This is particularly useful for
1203
+ * performance-sensitive scenarios where the overhead of creating a Java {@code String} is
1204
+ * undesirable.
1205
+ * <p>
1206
+ * If the string is not already encoded in the specified encoding, it will be re-encoded before
1207
+ * the bytes are returned. Note that re-encoding may involve additional computational overhead
1208
+ * depending on the size of the string and the differences between its current encoding and the
1209
+ * target encoding.
1210
+ *
1211
+ * <b>Usage Note:</b> The returned byte array represents the raw data of the string in the
1212
+ * requested encoding. Modifications to the array will not affect the underlying string value.
1213
+ *
1214
+ * @param encoding the desired encoding for the string. Must not be <code>null</code>. Supported
1215
+ * encodings are defined in {@link StringEncoding}.
1216
+ * @return a byte array containing the string's raw bytes in the specified encoding
1217
+ * @throws NullPointerException if {@code encoding} is <code>null</code>
1218
+ * @throws IllegalStateException if the string value is no longer valid (e.g., the associated
1219
+ * context has been closed)
1220
+ * @since 24.2
1221
+ */
1222
+ public byte [] asStringBytes (StringEncoding encoding ) {
1223
+ return dispatch .asStringBytes (this .context , receiver , encoding .value );
1224
+ }
1225
+
1198
1226
/**
1199
1227
* Returns <code>true</code> if this value represents a {@link #isNumber() number} and the value
1200
1228
* fits in <code>int</code>, else <code>false</code>.
@@ -2576,6 +2604,182 @@ public void pin() {
2576
2604
dispatch .pin (this .context , receiver );
2577
2605
Reference .reachabilityFence (creatorContext );
2578
2606
}
2607
+
2608
+ /**
2609
+ * Creates a byte-based string value that can be passed to polyglot languages.
2610
+ * <p>
2611
+ * The returned value is guaranteed to return <code>true</code> for {@link Value#isString()}.
2612
+ * The string can later be retrieved as a byte array using
2613
+ * {@link Value#asStringBytes(StringEncoding)}. This method ensures immutability by
2614
+ * conservatively copying the byte array before passing it to the underlying implementation.
2615
+ * </p>
2616
+ *
2617
+ * <b>Performance Note:</b> Copying the byte array can have a performance impact. Use this
2618
+ * method when immutability is required, or use the more flexible overloaded method
2619
+ * {@link #fromByteBasedString(byte[], int, int, StringEncoding, boolean)} to control copying
2620
+ * behavior.
2621
+ *
2622
+ * @param bytes the byte array representing the string
2623
+ * @param encoding the encoding of the byte array
2624
+ * @return a polyglot string {@link Value}
2625
+ * @throws NullPointerException if either {@code bytes} or {@code encoding} is null
2626
+ * @since 24.2
2627
+ */
2628
+ public static Value fromByteBasedString (byte [] bytes , StringEncoding encoding ) {
2629
+ Objects .requireNonNull (bytes );
2630
+ Objects .requireNonNull (encoding );
2631
+ return Engine .getImpl ().fromByteBasedString (bytes , 0 , bytes .length , encoding .value , true );
2632
+ }
2633
+
2634
+ /**
2635
+ * Creates a byte-based string value with more granular control over the byte array's usage.
2636
+ * <p>
2637
+ * This method provides additional flexibility by allowing a subset of the byte array to be
2638
+ * passed and controlling whether the byte array should be copied to ensure immutability.
2639
+ *
2640
+ * @param bytes the byte array representing the string
2641
+ * @param offset the starting offset in the byte array
2642
+ * @param length the number of bytes to include starting from {@code offset}
2643
+ * @param encoding the encoding of the byte array
2644
+ * @param copy whether to copy the byte array to ensure immutability
2645
+ * @return a polyglot string {@link Value}
2646
+ * @since 24.2
2647
+ */
2648
+ public static Value fromByteBasedString (byte [] bytes , int offset , int length , StringEncoding encoding , boolean copy ) {
2649
+ Objects .requireNonNull (bytes );
2650
+ Objects .requireNonNull (encoding );
2651
+ if (offset < 0 ) {
2652
+ throw new IndexOutOfBoundsException ("byteLength must not be negative" );
2653
+ }
2654
+ if (length < 0 ) {
2655
+ throw new IndexOutOfBoundsException ("byteOffset must not be negative" );
2656
+ }
2657
+ if (offset + length > bytes .length ) {
2658
+ throw new IndexOutOfBoundsException ("byte index is out of bounds" );
2659
+ }
2660
+ return Engine .getImpl ().fromByteBasedString (bytes , offset , length , encoding .value , copy );
2661
+ }
2662
+
2663
+ /**
2664
+ * Creates a native string object that can be passed to polyglot languages.
2665
+ * <p>
2666
+ * Native strings avoid copying, offering better performance for certain use cases. However,
2667
+ * clients must guarantee the lifetime of the native string as long as the {@link Value} is
2668
+ * alive. The returned value is guaranteed to return <code>true</code> for
2669
+ * {@link Value#isString()}.
2670
+ * <p>
2671
+ * <b>Usage Warning:</b> The polyglot context or engine does not manage the lifetime of the
2672
+ * native pointer. Clients must ensure that the pointer remains valid and that the memory is not
2673
+ * deallocated while the string is in use. Passing a deallocated or invalid pointer can result
2674
+ * in crashes or undefined behavior.
2675
+ * <p>
2676
+ * <b>Note:</b> Whenever possible, use {@link #fromByteBasedString(byte[], StringEncoding)} to
2677
+ * avoid the risks associated with native memory management.
2678
+ *
2679
+ * <ul>
2680
+ * <li>The native string's memory must remain valid for the lifetime of the context it is passed
2681
+ * to.
2682
+ * <li>The native bytes must not be mutated after being passed to this method.
2683
+ * <li>The bytes must already be encoded with the specified encoding.
2684
+ * </ul>
2685
+ *
2686
+ * @param basePointer the raw base pointer to the native string in memory
2687
+ * @param byteLength the length of the string in bytes
2688
+ * @param encoding the encoding of the native string
2689
+ * @param copy whether to copy the native string bytes for additional safety
2690
+ * @return a polyglot string {@link Value}
2691
+ * @since 24.2
2692
+ */
2693
+ public static Value fromNativeString (long basePointer , int byteOffset , int byteLength , StringEncoding encoding , boolean copy ) {
2694
+ Objects .requireNonNull (encoding );
2695
+ if (basePointer == 0L ) {
2696
+ throw new NullPointerException ("Null base pointer provided." );
2697
+ }
2698
+ if (byteLength < 0 ) {
2699
+ throw new IndexOutOfBoundsException ("byteLength must not be negative" );
2700
+ }
2701
+ if (byteOffset < 0 ) {
2702
+ throw new IndexOutOfBoundsException ("byteOffset must not be negative" );
2703
+ }
2704
+ return Engine .getImpl ().fromNativeString (basePointer , byteOffset , byteLength , encoding .value , copy );
2705
+ }
2706
+
2707
+ /**
2708
+ * Creates a native string object with default safety settings.
2709
+ * <p>
2710
+ * This method is equivalent to calling
2711
+ * {@link #fromNativeString(long, int, int, StringEncoding, boolean)} with {@code copy} set to
2712
+ * {@code true}.
2713
+ * </p>
2714
+ *
2715
+ * @param basePointer the raw base pointer to the native string in memory
2716
+ * @param byteLength the length of the string in bytes
2717
+ * @param encoding the encoding of the native string
2718
+ * @return a polyglot string {@link Value}
2719
+ * @since 24.2
2720
+ */
2721
+ public static Value fromNativeString (long basePointer , int byteLength , StringEncoding encoding ) {
2722
+ return fromNativeString (basePointer , 0 , byteLength , encoding , true );
2723
+ }
2724
+
2725
+ /**
2726
+ * Enum like class representing the supported string encodings. The encodings determine how byte
2727
+ * arrays or native strings are interpreted when creating or retrieving string values. This
2728
+ * class is not directly a enum to support compatible evolution.
2729
+ *
2730
+ * @since 24.2
2731
+ */
2732
+ public static final class StringEncoding {
2733
+
2734
+ /**
2735
+ * @since 24.2
2736
+ */
2737
+ public static final StringEncoding UTF_8 = new StringEncoding (0 );
2738
+
2739
+ /**
2740
+ * @since 24.2
2741
+ */
2742
+ public static final StringEncoding UTF_16_LITTLE_ENDIAN = new StringEncoding (1 );
2743
+ /**
2744
+ * @since 24.2
2745
+ */
2746
+ public static final StringEncoding UTF_16_BIG_ENDIAN = new StringEncoding (2 );
2747
+ /**
2748
+ * @since 24.2
2749
+ */
2750
+ public static final StringEncoding UTF_32_LITTLE_ENDIAN = new StringEncoding (3 );
2751
+ /**
2752
+ * @since 24.2
2753
+ */
2754
+ public static final StringEncoding UTF_32_BIG_ENDIAN = new StringEncoding (4 );
2755
+
2756
+ /**
2757
+ * The native UTF 16 encoding for the current platform.
2758
+ *
2759
+ * @see ByteOrder#nativeOrder()
2760
+ * @since 24.2
2761
+ */
2762
+ public static final StringEncoding UTF_16 = ByteOrder .nativeOrder () == ByteOrder .LITTLE_ENDIAN ? UTF_16_LITTLE_ENDIAN : UTF_16_BIG_ENDIAN ;
2763
+
2764
+ /**
2765
+ * The native UTF 32 encoding for the current platform.
2766
+ *
2767
+ * @see ByteOrder#nativeOrder()
2768
+ * @since 24.2
2769
+ */
2770
+ public static final StringEncoding UTF_32 = ByteOrder .nativeOrder () == ByteOrder .LITTLE_ENDIAN ? UTF_32_LITTLE_ENDIAN : UTF_32_BIG_ENDIAN ;
2771
+
2772
+ /*
2773
+ * Mapping table to PolyglotImpl.LazyEncodings.TABLE. Keep in sync.
2774
+ */
2775
+ final int value ;
2776
+
2777
+ private StringEncoding (int value ) {
2778
+ this .value = value ;
2779
+ }
2780
+
2781
+ }
2782
+
2579
2783
}
2580
2784
2581
2785
abstract class AbstractValue {
0 commit comments