Skip to content

Commit a41f94e

Browse files
committed
TruffleString: add MaterializeSubstringNode.
1 parent fc3dc54 commit a41f94e

File tree

5 files changed

+217
-1
lines changed

5 files changed

+217
-1
lines changed

truffle/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ This changelog summarizes major changes between Truffle versions relevant to lan
1717
* GR-67146: Bytecode DSL: Added support for user-defined yield operations using `@Yield`. These operations behave like the built-in yield but allow you to customize the yield result or perform custom logic on yield.
1818
* GR-69495: Bytecode DSL: Added a new `storeBytecodeIndex` attribute to all operation annotations to configure whether the bytecode index needs to be stored. When `@GenerateBytecode(storeBytecodeIndexInFrame = true)` is set and the attribute is left at its default, the DSL will emit a warning recommending explicit configuration. Additionally, introduced the `@StoreBytecodeIndex` annotation, which lets you specify bytecode index updates for individual specializations or fallbacks.
1919

20+
* GR-68916: Added `TruffleString.MaterializeLazySubstringNode`. Use this node to free any unnecessary memory held by lazy substrings.
21+
* GR-68916: Added `TruffleString.MaterializeSubstringNode`. Use this node to free any unnecessary memory held by lazy substrings.
2022

2123
## Version 25.0
2224
* GR-31495 Added ability to specify language and instrument specific options using `Source.Builder.option(String, String)`. Languages may describe available source options by implementing `TruffleLanguage.getSourceOptionDescriptors()` and `TruffleInstrument.getSourceOptionDescriptors()` respectively.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
42+
package com.oracle.truffle.api.strings.test.ops;
43+
44+
import static org.junit.runners.Parameterized.Parameter;
45+
import static org.junit.runners.Parameterized.Parameters;
46+
47+
import java.util.Arrays;
48+
49+
import org.junit.Assert;
50+
import org.junit.Test;
51+
import org.junit.runner.RunWith;
52+
import org.junit.runners.Parameterized;
53+
54+
import com.oracle.truffle.api.strings.InternalByteArray;
55+
import com.oracle.truffle.api.strings.TruffleString;
56+
import com.oracle.truffle.api.strings.test.TStringTestBase;
57+
58+
@RunWith(Parameterized.class)
59+
public class TStringMaterializeLazySubstringTest extends TStringTestBase {
60+
61+
@Parameter public TruffleString.MaterializeSubstringNode node;
62+
63+
@Parameters(name = "{0}")
64+
public static Iterable<TruffleString.MaterializeSubstringNode> data() {
65+
return Arrays.asList(TruffleString.MaterializeSubstringNode.create(), TruffleString.MaterializeSubstringNode.getUncached());
66+
}
67+
68+
@Test
69+
public void testAll() throws Exception {
70+
forAllStrings(true, (a, array, codeRange, isValid, encoding, codepoints, byteIndices) -> {
71+
TruffleString b = node.execute(a, encoding);
72+
assertBytesEqual(b, encoding, array);
73+
assertCodePointsEqual(b, encoding, codepoints);
74+
InternalByteArray internalByteArray = b.getInternalByteArrayUncached(encoding);
75+
Assert.assertEquals(0, internalByteArray.getOffset());
76+
Assert.assertEquals(array.length, internalByteArray.getLength());
77+
});
78+
}
79+
80+
@Test
81+
public void testNull() throws Exception {
82+
checkNullSE((s, e) -> node.execute(s, e));
83+
}
84+
}

truffle/src/com.oracle.truffle.api.strings/snapshot.sigtest

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ hfds GIL_LOCK,PARENT_LIMIT,SAME_LANGUAGE_CHECK_VISITOR,parent
8181
CLSS public abstract interface com.oracle.truffle.api.nodes.NodeInterface
8282

8383
CLSS public abstract com.oracle.truffle.api.strings.AbstractTruffleString
84+
meth public com.oracle.truffle.api.strings.TruffleString materializeSubstringUncached(com.oracle.truffle.api.strings.TruffleString$Encoding)
8485
meth public com.oracle.truffle.api.strings.TruffleString toValidStringUncached(com.oracle.truffle.api.strings.TruffleString$Encoding)
8586
meth public final boolean codeRangeEqualsUncached(com.oracle.truffle.api.strings.TruffleString$CodeRange)
8687
meth public final boolean equals(java.lang.Object)
@@ -163,6 +164,8 @@ meth public final void copyToNativeMemoryNodeUncached(int,java.lang.Object,int,i
163164
anno 0 java.lang.Deprecated(boolean forRemoval=false, java.lang.String since="22.3")
164165
meth public final void copyToNativeMemoryUncached(int,java.lang.Object,int,int,com.oracle.truffle.api.strings.TruffleString$Encoding)
165166
meth public void materializeUncached(com.oracle.truffle.api.strings.AbstractTruffleString,com.oracle.truffle.api.strings.TruffleString$Encoding)
167+
anno 0 java.lang.Deprecated(boolean forRemoval=false, java.lang.String since="26.0")
168+
meth public void materializeUncached(com.oracle.truffle.api.strings.TruffleString$Encoding)
166169
supr java.lang.Object
167170
hfds DEBUG_ALWAYS_CREATE_JAVA_STRING,DEBUG_NON_ZERO_OFFSET,DEBUG_STRICT_ENCODING_CHECKS,MASKED_ZERO_HASH_CODE,codePointLength,codeRange,data,encoding,flags,hashCode,length,offset,stride
168171
hcls LazyConcat,LazyLong,NativePointer
@@ -343,6 +346,7 @@ innr public abstract static LastByteIndexOfStringNode
343346
innr public abstract static LastIndexOfCodePointNode
344347
innr public abstract static LastIndexOfStringNode
345348
innr public abstract static MaterializeNode
349+
innr public abstract static MaterializeSubstringNode
346350
innr public abstract static ParseDoubleNode
347351
innr public abstract static ParseIntNode
348352
innr public abstract static ParseLongNode
@@ -895,6 +899,13 @@ meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeNode
895899
meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeNode getUncached()
896900
supr com.oracle.truffle.api.nodes.Node
897901

902+
CLSS public abstract static com.oracle.truffle.api.strings.TruffleString$MaterializeSubstringNode
903+
outer com.oracle.truffle.api.strings.TruffleString
904+
meth public abstract com.oracle.truffle.api.strings.TruffleString execute(com.oracle.truffle.api.strings.AbstractTruffleString,com.oracle.truffle.api.strings.TruffleString$Encoding)
905+
meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeSubstringNode create()
906+
meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeSubstringNode getUncached()
907+
supr com.oracle.truffle.api.nodes.Node
908+
898909
CLSS public final static com.oracle.truffle.api.strings.TruffleString$NumberFormatException
899910
outer com.oracle.truffle.api.strings.TruffleString
900911
meth public java.lang.String getMessage()
@@ -1150,7 +1161,7 @@ CLSS public final com.oracle.truffle.api.strings.TruffleStringFactory
11501161
cons public init()
11511162
innr public final static WithMaskFactory
11521163
supr java.lang.Object
1153-
hcls AsManagedNodeGen,AsNativeNodeGen,AsTruffleStringNodeGen,ByteIndexOfAnyByteNodeGen,ByteIndexOfCodePointNodeGen,ByteIndexOfCodePointSetNodeGen,ByteIndexOfStringNodeGen,ByteIndexToCodePointIndexNodeGen,ByteLengthOfCodePointNodeGen,CharIndexOfAnyCharUTF16NodeGen,CodePointAtByteIndexNodeGen,CodePointAtIndexNodeGen,CodePointIndexToByteIndexNodeGen,CodePointLengthNodeGen,CodeRangeEqualsNodeGen,CompareBytesNodeGen,CompareCharsUTF16NodeGen,CompareIntsUTF32NodeGen,ConcatNodeGen,CopyToByteArrayNodeGen,CopyToNativeMemoryNodeGen,CreateBackwardCodePointIteratorNodeGen,CreateCodePointIteratorNodeGen,EqualNodeGen,ForceEncodingNodeGen,FromByteArrayNodeGen,FromCharArrayUTF16NodeGen,FromCodePointNodeGen,FromIntArrayUTF32NodeGen,FromJavaStringNodeGen,FromLongNodeGen,FromNativePointerNodeGen,GetByteCodeRangeNodeGen,GetCodeRangeImpreciseNodeGen,GetCodeRangeNodeGen,GetInternalByteArrayNodeGen,GetInternalNativePointerNodeGen,GetStringCompactionLevelNodeGen,HashCodeNodeGen,IndexOfCodePointNodeGen,IndexOfStringNodeGen,IntIndexOfAnyIntUTF32NodeGen,InternalAsTruffleStringNodeGen,InternalCopyToByteArrayNodeGen,InternalSwitchEncodingNodeGen,IsValidNodeGen,LastByteIndexOfCodePointNodeGen,LastByteIndexOfStringNodeGen,LastIndexOfCodePointNodeGen,LastIndexOfStringNodeGen,MaterializeNodeGen,ParseDoubleNodeGen,ParseIntNodeGen,ParseLongNodeGen,ReadByteNodeGen,ReadCharUTF16NodeGen,RegionEqualByteIndexNodeGen,RegionEqualNodeGen,RepeatNodeGen,SubstringByteIndexNodeGen,SubstringNodeGen,SwitchEncodingNodeGen,ToJavaStringNodeGen,ToValidStringNodeGen
1164+
hcls AsManagedNodeGen,AsNativeNodeGen,AsTruffleStringNodeGen,ByteIndexOfAnyByteNodeGen,ByteIndexOfCodePointNodeGen,ByteIndexOfCodePointSetNodeGen,ByteIndexOfStringNodeGen,ByteIndexToCodePointIndexNodeGen,ByteLengthOfCodePointNodeGen,CharIndexOfAnyCharUTF16NodeGen,CodePointAtByteIndexNodeGen,CodePointAtIndexNodeGen,CodePointIndexToByteIndexNodeGen,CodePointLengthNodeGen,CodeRangeEqualsNodeGen,CompareBytesNodeGen,CompareCharsUTF16NodeGen,CompareIntsUTF32NodeGen,ConcatNodeGen,CopyToByteArrayNodeGen,CopyToNativeMemoryNodeGen,CreateBackwardCodePointIteratorNodeGen,CreateCodePointIteratorNodeGen,EqualNodeGen,ForceEncodingNodeGen,FromByteArrayNodeGen,FromCharArrayUTF16NodeGen,FromCodePointNodeGen,FromIntArrayUTF32NodeGen,FromJavaStringNodeGen,FromLongNodeGen,FromNativePointerNodeGen,GetByteCodeRangeNodeGen,GetCodeRangeImpreciseNodeGen,GetCodeRangeNodeGen,GetInternalByteArrayNodeGen,GetInternalNativePointerNodeGen,GetStringCompactionLevelNodeGen,HashCodeNodeGen,IndexOfCodePointNodeGen,IndexOfStringNodeGen,IntIndexOfAnyIntUTF32NodeGen,InternalAsTruffleStringNodeGen,InternalCopyToByteArrayNodeGen,InternalSwitchEncodingNodeGen,IsValidNodeGen,LastByteIndexOfCodePointNodeGen,LastByteIndexOfStringNodeGen,LastIndexOfCodePointNodeGen,LastIndexOfStringNodeGen,MaterializeNodeGen,MaterializeSubstringNodeGen,ParseDoubleNodeGen,ParseIntNodeGen,ParseLongNodeGen,ReadByteNodeGen,ReadCharUTF16NodeGen,RegionEqualByteIndexNodeGen,RegionEqualNodeGen,RepeatNodeGen,SubstringByteIndexNodeGen,SubstringNodeGen,SwitchEncodingNodeGen,ToJavaStringNodeGen,ToValidStringNodeGen
11541165

11551166
CLSS public final static com.oracle.truffle.api.strings.TruffleStringFactory$WithMaskFactory
11561167
outer com.oracle.truffle.api.strings.TruffleStringFactory

truffle/src/com.oracle.truffle.api.strings/src/com/oracle/truffle/api/strings/AbstractTruffleString.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,35 @@ public final MutableTruffleString asManagedMutableTruffleStringUncached(TruffleS
628628
/**
629629
* Shorthand for calling the uncached version of {@link TruffleString.MaterializeNode}.
630630
*
631+
* @deprecated since 26.0, use {@link #materializeUncached(Encoding)} instead.
631632
* @since 23.1
632633
*/
634+
@Deprecated(since = "26.0")
633635
@TruffleBoundary
634636
public void materializeUncached(AbstractTruffleString a, Encoding expectedEncoding) {
635637
TruffleString.MaterializeNode.getUncached().execute(a, expectedEncoding);
636638
}
637639

640+
/**
641+
* Shorthand for calling the uncached version of {@link TruffleString.MaterializeNode}.
642+
*
643+
* @since 26.0
644+
*/
645+
@TruffleBoundary
646+
public void materializeUncached(Encoding expectedEncoding) {
647+
TruffleString.MaterializeNode.getUncached().execute(this, expectedEncoding);
648+
}
649+
650+
/**
651+
* Shorthand for calling the uncached version of {@link TruffleString.MaterializeSubstringNode}.
652+
*
653+
* @since 26.0
654+
*/
655+
@TruffleBoundary
656+
public TruffleString materializeSubstringUncached(Encoding expectedEncoding) {
657+
return TruffleString.MaterializeSubstringNode.getUncached().execute(this, expectedEncoding);
658+
}
659+
638660
/**
639661
* Shorthand for calling the uncached version of {@link TruffleString.GetCodeRangeNode}.
640662
*

truffle/src/com.oracle.truffle.api.strings/src/com/oracle/truffle/api/strings/TruffleString.java

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,103 @@ public static MaterializeNode getUncached() {
25912591
}
25922592
}
25932593

2594+
/**
2595+
* Extended version of {@link MaterializeNode} that also materializes substrings/string views
2596+
* created via e.g. {@link SubstringNode} with the {@code lazy} parameter set to {@code true} or
2597+
* via {@link FromByteArrayNode} with the {@code copy} parameter set to {@code false}. Note that
2598+
* this node returns a potentially new {@link TruffleString}, because these substrings cannot be
2599+
* materialized internally. {@link #isNative() Native-backed} strings are kept as-is, since this
2600+
* node cannot decide whether a native-backed string is a substring/string view.
2601+
* <p>
2602+
* Lazy substrings keep a reference to the potentially larger byte array they were created from,
2603+
* which may keep more memory alive than necessary (see the caveats mentioned at
2604+
* {@link SubstringNode#execute}). With this node, unnecessarily held memory is reclaimed by
2605+
* copying the referenced subregions. This is especially useful if a string is added to a data
2606+
* structure known to be long-lived, e.g. during string interning.
2607+
*
2608+
* @see SubstringNode#execute
2609+
* @since 26.0
2610+
*/
2611+
public abstract static class MaterializeSubstringNode extends AbstractPublicNode {
2612+
2613+
MaterializeSubstringNode() {
2614+
}
2615+
2616+
/**
2617+
* Extended version of {@link MaterializeNode} that also materializes substrings/string
2618+
* views created via e.g. {@link SubstringNode} with the {@code lazy} parameter set to
2619+
* {@code true} or via {@link FromByteArrayNode} with the {@code copy} parameter set to
2620+
* {@code false}. Note that this node returns a potentially new {@link TruffleString},
2621+
* because these substrings cannot be materialized internally. {@link #isNative()
2622+
* Native-backed} strings are kept as-is, since this node cannot decide whether a
2623+
* native-backed string is a substring/string view.
2624+
* <p>
2625+
* Lazy substrings keep a reference to the potentially larger byte array they were created
2626+
* from, which may keep more memory alive than necessary (see the caveats mentioned at
2627+
* {@link SubstringNode#execute}). With this node, unnecessarily held memory is reclaimed by
2628+
* copying the referenced subregions. This is especially useful if a string is added to a
2629+
* data structure known to be long-lived, e.g. during string interning.
2630+
*
2631+
* @see SubstringNode#execute
2632+
* @since 26.0
2633+
*/
2634+
public abstract TruffleString execute(AbstractTruffleString a, Encoding expectedEncoding);
2635+
2636+
@Specialization
2637+
final TruffleString doTruffleString(TruffleString a, Encoding expectedEncoding,
2638+
@Cached InlinedConditionProfile managedProfileA,
2639+
@Cached InlinedConditionProfile nativeProfileA,
2640+
@Cached InlinedConditionProfile stringViewProfile) {
2641+
a.checkEncoding(expectedEncoding);
2642+
Object dataA = a.data();
2643+
try {
2644+
if (managedProfileA.profile(this, dataA instanceof byte[])) {
2645+
final byte[] arrayA = (byte[]) dataA;
2646+
int byteLength = a.length() << a.stride();
2647+
if (stringViewProfile.profile(this, byteLength < arrayA.length)) {
2648+
byte[] copy = new byte[byteLength];
2649+
System.arraycopy(arrayA, a.offset(), copy, 0, byteLength);
2650+
return TruffleString.createFromByteArray(copy, 0, a.length(), a.stride(), expectedEncoding, a.codePointLength(), a.codeRange(), a.hashCode, true);
2651+
} else {
2652+
return a;
2653+
}
2654+
} else if (nativeProfileA.profile(this, dataA instanceof NativePointer)) {
2655+
return a;
2656+
} else {
2657+
a.materializeLazy(this, dataA);
2658+
return a;
2659+
}
2660+
} finally {
2661+
Reference.reachabilityFence(dataA);
2662+
}
2663+
}
2664+
2665+
@Specialization
2666+
final TruffleString doMutableTruffleString(MutableTruffleString a, Encoding expectedEncoding,
2667+
@Cached TStringInternalNodes.FromBufferWithStringCompactionKnownAttributesNode fromBufferWithStringCompactionNode) {
2668+
return fromBufferWithStringCompactionNode.execute(this, a, expectedEncoding);
2669+
}
2670+
2671+
/**
2672+
* Create a new {@link MaterializeSubstringNode}.
2673+
*
2674+
* @since 26.0
2675+
*/
2676+
@NeverDefault
2677+
public static MaterializeSubstringNode create() {
2678+
return TruffleStringFactory.MaterializeSubstringNodeGen.create();
2679+
}
2680+
2681+
/**
2682+
* Get the uncached version of {@link MaterializeSubstringNode}.
2683+
*
2684+
* @since 26.0
2685+
*/
2686+
public static MaterializeSubstringNode getUncached() {
2687+
return TruffleStringFactory.MaterializeSubstringNodeGen.getUncached();
2688+
}
2689+
}
2690+
25942691
/**
25952692
* Node to get a string's precise {@link CodeRange}.
25962693
*

0 commit comments

Comments
 (0)