Skip to content

Commit 87e570b

Browse files
committed
[GR-68916] TruffleString: add MaterializeSubstringNode.
PullRequest: graal/22135
2 parents 57ee59f + 5fa7d8b commit 87e570b

File tree

6 files changed

+237
-3
lines changed

6 files changed

+237
-3
lines changed

truffle/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ This changelog summarizes major changes between Truffle versions relevant to lan
1717
* GR-67146: Bytecode DSL: Added support for user-defined yield operations using `@Yield`. These operations behave like the built-in yield but allow you to customize the yield result or perform custom logic on yield.
1818
* GR-69495: Bytecode DSL: Added a new `storeBytecodeIndex` attribute to all operation annotations to configure whether the bytecode index needs to be stored. When `@GenerateBytecode(storeBytecodeIndexInFrame = true)` is set and the attribute is left at its default, the DSL will emit a warning recommending explicit configuration. Additionally, introduced the `@StoreBytecodeIndex` annotation, which lets you specify bytecode index updates for individual specializations or fallbacks.
1919

20+
* GR-68916: Added `TruffleString.MaterializeLazySubstringNode`. Use this node to free any unnecessary memory held by lazy substrings.
21+
* GR-68916: Added `TruffleString.MaterializeSubstringNode`. Use this node to free any unnecessary memory held by lazy substrings.
2022

2123
## Version 25.0
2224
* GR-31495 Added ability to specify language and instrument specific options using `Source.Builder.option(String, String)`. Languages may describe available source options by implementing `TruffleLanguage.getSourceOptionDescriptors()` and `TruffleInstrument.getSourceOptionDescriptors()` respectively.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
42+
package com.oracle.truffle.api.strings.test.ops;
43+
44+
import static org.junit.runners.Parameterized.Parameter;
45+
import static org.junit.runners.Parameterized.Parameters;
46+
47+
import java.util.Arrays;
48+
49+
import org.junit.Assert;
50+
import org.junit.Test;
51+
import org.junit.runner.RunWith;
52+
import org.junit.runners.Parameterized;
53+
54+
import com.oracle.truffle.api.strings.InternalByteArray;
55+
import com.oracle.truffle.api.strings.TruffleString;
56+
import com.oracle.truffle.api.strings.test.TStringTestBase;
57+
58+
@RunWith(Parameterized.class)
59+
public class TStringMaterializeLazySubstringTest extends TStringTestBase {
60+
61+
@Parameter public TruffleString.MaterializeSubstringNode node;
62+
63+
@Parameters(name = "{0}")
64+
public static Iterable<TruffleString.MaterializeSubstringNode> data() {
65+
return Arrays.asList(TruffleString.MaterializeSubstringNode.create(), TruffleString.MaterializeSubstringNode.getUncached());
66+
}
67+
68+
@Test
69+
public void testAll() throws Exception {
70+
forAllStrings(true, (a, array, codeRange, isValid, encoding, codepoints, byteIndices) -> {
71+
TruffleString b = node.execute(a, encoding);
72+
assertBytesEqual(b, encoding, array);
73+
assertCodePointsEqual(b, encoding, codepoints);
74+
InternalByteArray internalByteArray = b.getInternalByteArrayUncached(encoding);
75+
Assert.assertEquals(0, internalByteArray.getOffset());
76+
Assert.assertEquals(array.length, internalByteArray.getLength());
77+
});
78+
}
79+
80+
@Test
81+
public void testNull() throws Exception {
82+
checkNullSE((s, e) -> node.execute(s, e));
83+
}
84+
}

truffle/src/com.oracle.truffle.api.strings/snapshot.sigtest

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ hfds GIL_LOCK,PARENT_LIMIT,SAME_LANGUAGE_CHECK_VISITOR,parent
8181
CLSS public abstract interface com.oracle.truffle.api.nodes.NodeInterface
8282

8383
CLSS public abstract com.oracle.truffle.api.strings.AbstractTruffleString
84+
meth public com.oracle.truffle.api.strings.TruffleString materializeSubstringUncached(com.oracle.truffle.api.strings.TruffleString$Encoding)
8485
meth public com.oracle.truffle.api.strings.TruffleString toValidStringUncached(com.oracle.truffle.api.strings.TruffleString$Encoding)
8586
meth public final boolean codeRangeEqualsUncached(com.oracle.truffle.api.strings.TruffleString$CodeRange)
8687
meth public final boolean equals(java.lang.Object)
@@ -163,6 +164,8 @@ meth public final void copyToNativeMemoryNodeUncached(int,java.lang.Object,int,i
163164
anno 0 java.lang.Deprecated(boolean forRemoval=false, java.lang.String since="22.3")
164165
meth public final void copyToNativeMemoryUncached(int,java.lang.Object,int,int,com.oracle.truffle.api.strings.TruffleString$Encoding)
165166
meth public void materializeUncached(com.oracle.truffle.api.strings.AbstractTruffleString,com.oracle.truffle.api.strings.TruffleString$Encoding)
167+
anno 0 java.lang.Deprecated(boolean forRemoval=false, java.lang.String since="26.0")
168+
meth public void materializeUncached(com.oracle.truffle.api.strings.TruffleString$Encoding)
166169
supr java.lang.Object
167170
hfds DEBUG_ALWAYS_CREATE_JAVA_STRING,DEBUG_NON_ZERO_OFFSET,DEBUG_STRICT_ENCODING_CHECKS,MASKED_ZERO_HASH_CODE,codePointLength,codeRange,data,encoding,flags,hashCode,length,offset,stride
168171
hcls LazyConcat,LazyLong,NativePointer
@@ -343,6 +346,7 @@ innr public abstract static LastByteIndexOfStringNode
343346
innr public abstract static LastIndexOfCodePointNode
344347
innr public abstract static LastIndexOfStringNode
345348
innr public abstract static MaterializeNode
349+
innr public abstract static MaterializeSubstringNode
346350
innr public abstract static ParseDoubleNode
347351
innr public abstract static ParseIntNode
348352
innr public abstract static ParseLongNode
@@ -895,6 +899,13 @@ meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeNode
895899
meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeNode getUncached()
896900
supr com.oracle.truffle.api.nodes.Node
897901

902+
CLSS public abstract static com.oracle.truffle.api.strings.TruffleString$MaterializeSubstringNode
903+
outer com.oracle.truffle.api.strings.TruffleString
904+
meth public abstract com.oracle.truffle.api.strings.TruffleString execute(com.oracle.truffle.api.strings.AbstractTruffleString,com.oracle.truffle.api.strings.TruffleString$Encoding)
905+
meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeSubstringNode create()
906+
meth public static com.oracle.truffle.api.strings.TruffleString$MaterializeSubstringNode getUncached()
907+
supr com.oracle.truffle.api.nodes.Node
908+
898909
CLSS public final static com.oracle.truffle.api.strings.TruffleString$NumberFormatException
899910
outer com.oracle.truffle.api.strings.TruffleString
900911
meth public java.lang.String getMessage()
@@ -1150,7 +1161,7 @@ CLSS public final com.oracle.truffle.api.strings.TruffleStringFactory
11501161
cons public init()
11511162
innr public final static WithMaskFactory
11521163
supr java.lang.Object
1153-
hcls AsManagedNodeGen,AsNativeNodeGen,AsTruffleStringNodeGen,ByteIndexOfAnyByteNodeGen,ByteIndexOfCodePointNodeGen,ByteIndexOfCodePointSetNodeGen,ByteIndexOfStringNodeGen,ByteIndexToCodePointIndexNodeGen,ByteLengthOfCodePointNodeGen,CharIndexOfAnyCharUTF16NodeGen,CodePointAtByteIndexNodeGen,CodePointAtIndexNodeGen,CodePointIndexToByteIndexNodeGen,CodePointLengthNodeGen,CodeRangeEqualsNodeGen,CompareBytesNodeGen,CompareCharsUTF16NodeGen,CompareIntsUTF32NodeGen,ConcatNodeGen,CopyToByteArrayNodeGen,CopyToNativeMemoryNodeGen,CreateBackwardCodePointIteratorNodeGen,CreateCodePointIteratorNodeGen,EqualNodeGen,ForceEncodingNodeGen,FromByteArrayNodeGen,FromCharArrayUTF16NodeGen,FromCodePointNodeGen,FromIntArrayUTF32NodeGen,FromJavaStringNodeGen,FromLongNodeGen,FromNativePointerNodeGen,GetByteCodeRangeNodeGen,GetCodeRangeImpreciseNodeGen,GetCodeRangeNodeGen,GetInternalByteArrayNodeGen,GetInternalNativePointerNodeGen,GetStringCompactionLevelNodeGen,HashCodeNodeGen,IndexOfCodePointNodeGen,IndexOfStringNodeGen,IntIndexOfAnyIntUTF32NodeGen,InternalAsTruffleStringNodeGen,InternalCopyToByteArrayNodeGen,InternalSwitchEncodingNodeGen,IsValidNodeGen,LastByteIndexOfCodePointNodeGen,LastByteIndexOfStringNodeGen,LastIndexOfCodePointNodeGen,LastIndexOfStringNodeGen,MaterializeNodeGen,ParseDoubleNodeGen,ParseIntNodeGen,ParseLongNodeGen,ReadByteNodeGen,ReadCharUTF16NodeGen,RegionEqualByteIndexNodeGen,RegionEqualNodeGen,RepeatNodeGen,SubstringByteIndexNodeGen,SubstringNodeGen,SwitchEncodingNodeGen,ToJavaStringNodeGen,ToValidStringNodeGen
1164+
hcls AsManagedNodeGen,AsNativeNodeGen,AsTruffleStringNodeGen,ByteIndexOfAnyByteNodeGen,ByteIndexOfCodePointNodeGen,ByteIndexOfCodePointSetNodeGen,ByteIndexOfStringNodeGen,ByteIndexToCodePointIndexNodeGen,ByteLengthOfCodePointNodeGen,CharIndexOfAnyCharUTF16NodeGen,CodePointAtByteIndexNodeGen,CodePointAtIndexNodeGen,CodePointIndexToByteIndexNodeGen,CodePointLengthNodeGen,CodeRangeEqualsNodeGen,CompareBytesNodeGen,CompareCharsUTF16NodeGen,CompareIntsUTF32NodeGen,ConcatNodeGen,CopyToByteArrayNodeGen,CopyToNativeMemoryNodeGen,CreateBackwardCodePointIteratorNodeGen,CreateCodePointIteratorNodeGen,EqualNodeGen,ForceEncodingNodeGen,FromByteArrayNodeGen,FromCharArrayUTF16NodeGen,FromCodePointNodeGen,FromIntArrayUTF32NodeGen,FromJavaStringNodeGen,FromLongNodeGen,FromNativePointerNodeGen,GetByteCodeRangeNodeGen,GetCodeRangeImpreciseNodeGen,GetCodeRangeNodeGen,GetInternalByteArrayNodeGen,GetInternalNativePointerNodeGen,GetStringCompactionLevelNodeGen,HashCodeNodeGen,IndexOfCodePointNodeGen,IndexOfStringNodeGen,IntIndexOfAnyIntUTF32NodeGen,InternalAsTruffleStringNodeGen,InternalCopyToByteArrayNodeGen,InternalSwitchEncodingNodeGen,IsValidNodeGen,LastByteIndexOfCodePointNodeGen,LastByteIndexOfStringNodeGen,LastIndexOfCodePointNodeGen,LastIndexOfStringNodeGen,MaterializeNodeGen,MaterializeSubstringNodeGen,ParseDoubleNodeGen,ParseIntNodeGen,ParseLongNodeGen,ReadByteNodeGen,ReadCharUTF16NodeGen,RegionEqualByteIndexNodeGen,RegionEqualNodeGen,RepeatNodeGen,SubstringByteIndexNodeGen,SubstringNodeGen,SwitchEncodingNodeGen,ToJavaStringNodeGen,ToValidStringNodeGen
11541165

11551166
CLSS public final static com.oracle.truffle.api.strings.TruffleStringFactory$WithMaskFactory
11561167
outer com.oracle.truffle.api.strings.TruffleStringFactory

truffle/src/com.oracle.truffle.api.strings/src/com/oracle/truffle/api/strings/AbstractTruffleString.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,35 @@ public final MutableTruffleString asManagedMutableTruffleStringUncached(TruffleS
628628
/**
629629
* Shorthand for calling the uncached version of {@link TruffleString.MaterializeNode}.
630630
*
631+
* @deprecated since 26.0, use {@link #materializeUncached(Encoding)} instead.
631632
* @since 23.1
632633
*/
634+
@Deprecated(since = "26.0")
633635
@TruffleBoundary
634636
public void materializeUncached(AbstractTruffleString a, Encoding expectedEncoding) {
635637
TruffleString.MaterializeNode.getUncached().execute(a, expectedEncoding);
636638
}
637639

640+
/**
641+
* Shorthand for calling the uncached version of {@link TruffleString.MaterializeNode}.
642+
*
643+
* @since 26.0
644+
*/
645+
@TruffleBoundary
646+
public void materializeUncached(Encoding expectedEncoding) {
647+
TruffleString.MaterializeNode.getUncached().execute(this, expectedEncoding);
648+
}
649+
650+
/**
651+
* Shorthand for calling the uncached version of {@link TruffleString.MaterializeSubstringNode}.
652+
*
653+
* @since 26.0
654+
*/
655+
@TruffleBoundary
656+
public TruffleString materializeSubstringUncached(Encoding expectedEncoding) {
657+
return TruffleString.MaterializeSubstringNode.getUncached().execute(this, expectedEncoding);
658+
}
659+
638660
/**
639661
* Shorthand for calling the uncached version of {@link TruffleString.GetCodeRangeNode}.
640662
*

truffle/src/com.oracle.truffle.api.strings/src/com/oracle/truffle/api/strings/TStringInternalNodes.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ static TruffleString fromBufferWithStringCompaction(Node node, Object dataA, int
317317
@Cached InlinedConditionProfile utf32Profile,
318318
@Cached InlinedConditionProfile utf32Compact0Profile,
319319
@Cached InlinedConditionProfile utf32Compact1Profile,
320+
@Cached InlinedConditionProfile singleByteProfile,
320321
@Cached InlinedByteValueProfile unlikelyEncodingProfile) {
321322
assert dataA instanceof byte[] || dataA instanceof NativePointer;
322323
try {
@@ -343,6 +344,9 @@ static TruffleString fromBufferWithStringCompaction(Node node, Object dataA, int
343344
codeRange = StringAttributes.getCodeRange(attrs);
344345
stride = Stride.fromCodeRangeUTF16(codeRange);
345346
if (copy || stride == 0) {
347+
if (singleByteProfile.profile(node, isCacheHead && length == 1 && stride == 0)) {
348+
return TStringConstants.getSingleByte(Encoding.UTF_16, TStringOps.readS1(arrayA, offsetA, 1, 0));
349+
}
346350
offset = 0;
347351
array = new byte[length << stride];
348352
if (utf16CompactProfile.profile(node, stride == 0)) {
@@ -361,6 +365,9 @@ static TruffleString fromBufferWithStringCompaction(Node node, Object dataA, int
361365
codePointLength = length;
362366
stride = Stride.fromCodeRangeUTF32(codeRange);
363367
if (copy || stride < 2) {
368+
if (singleByteProfile.profile(node, isCacheHead && length == 1 && stride == 0)) {
369+
return TStringConstants.getSingleByte(Encoding.UTF_32, TStringOps.readS2(arrayA, offsetA, 1, 0));
370+
}
364371
offset = 0;
365372
array = new byte[length << stride];
366373
if (utf32Compact0Profile.profile(node, stride == 0)) {
@@ -379,6 +386,9 @@ static TruffleString fromBufferWithStringCompaction(Node node, Object dataA, int
379386
stride = 0;
380387
final long attrs;
381388
if (utf8Profile.profile(node, isUTF8(encoding))) {
389+
if (singleByteProfile.profile(node, isCacheHead && length == 1)) {
390+
return TStringConstants.getSingleByte(Encoding.UTF_8, TStringOps.readS0(arrayA, offsetA, 1, 0));
391+
}
382392
attrs = TStringOps.calcStringAttributesUTF8(node, arrayA, offsetA, length, false, false, utf8BrokenProfile);
383393
} else {
384394
attrs = unlikelyCases(node, arrayA, offsetA, byteLength, encoding, unlikelyEncodingProfile.profile(node, encoding.id));
@@ -1410,7 +1420,8 @@ static TruffleString materializeSubstring(Node node, AbstractTruffleString a, by
14101420
@SuppressWarnings("unused") boolean lazy,
14111421
@Shared("attributes") @Cached CalcStringAttributesNode calcAttributesNode,
14121422
@Exclusive @Cached InlinedConditionProfile utf16Profile,
1413-
@Exclusive @Cached InlinedConditionProfile utf32Profile) {
1423+
@Exclusive @Cached InlinedConditionProfile utf32Profile,
1424+
@Exclusive @Cached InlinedConditionProfile singleByteProfile) {
14141425
final long attrs;
14151426
final int codeRange;
14161427
final int stride;
@@ -1431,6 +1442,9 @@ static TruffleString materializeSubstring(Node node, AbstractTruffleString a, by
14311442
codeRange = StringAttributes.getCodeRange(attrs);
14321443
newStride = 0;
14331444
}
1445+
if (singleByteProfile.profile(node, length == 1 && newStride == 0 && encoding.isSupported())) {
1446+
return TStringConstants.getSingleByte(encoding, TStringOps.readValue(arrayA, offsetA, a.length(), stride, fromIndex));
1447+
}
14341448
byte[] newBytes = TStringOps.arraycopyOfWithStride(node, arrayA, offsetA + (fromIndex << stride), length, stride, length, newStride);
14351449
return TruffleString.createFromByteArray(newBytes, length, newStride, encoding, StringAttributes.getCodePointLength(attrs), codeRange);
14361450
}
@@ -1440,11 +1454,15 @@ static TruffleString createLazySubstring(Node node, TruffleString a, byte[] arra
14401454
@SuppressWarnings("unused") boolean lazy,
14411455
@Shared("attributes") @Cached CalcStringAttributesNode calcAttributesNode,
14421456
@Exclusive @Cached InlinedConditionProfile stride1MustMaterializeProfile,
1443-
@Exclusive @Cached InlinedConditionProfile stride2MustMaterializeProfile) {
1457+
@Exclusive @Cached InlinedConditionProfile stride2MustMaterializeProfile,
1458+
@Exclusive @Cached InlinedConditionProfile singleByteProfile) {
14441459
long lazyOffset = offsetA + (fromIndex << a.stride());
14451460
long attrs = calcAttributesNode.execute(node, a, arrayA, offsetA, length, a.stride(), encoding, fromIndex, codeRangeA);
14461461
int codeRange = StringAttributes.getCodeRange(attrs);
14471462
int codePointLength = StringAttributes.getCodePointLength(attrs);
1463+
if (singleByteProfile.profile(node, length == 1 && Stride.fromCodeRange(codeRange, encoding) == 0 && encoding.isSupported())) {
1464+
return TStringConstants.getSingleByte(encoding, TStringOps.readValue(arrayA, offsetA, a.length(), a.stride(), fromIndex));
1465+
}
14481466
final Object data;
14491467
final int offset;
14501468
final int stride;

0 commit comments

Comments
 (0)