Skip to content

Commit 08546e2

Browse files
committed
Add ByteBasedCharSequence to deal with YARP giving us byte offsets but Truffle wanting CharSequence
1 parent 40d6a7e commit 08546e2

File tree

6 files changed

+97
-24
lines changed

6 files changed

+97
-24
lines changed

src/main/java/org/truffleruby/debug/TruffleDebugNodes.java

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import com.oracle.truffle.api.object.DynamicObjectLibrary;
3434
import com.oracle.truffle.api.source.Source;
3535
import com.oracle.truffle.api.source.SourceSection;
36-
import com.oracle.truffle.api.strings.AbstractTruffleString;
3736
import com.oracle.truffle.api.strings.TruffleString;
3837
import org.graalvm.collections.Pair;
3938
import org.truffleruby.Layouts;
@@ -302,20 +301,15 @@ public abstract static class YARPExecuteNode extends CoreMethodArrayArgumentsNod
302301
@Specialization(guards = "strings.isRubyString(code)", limit = "1")
303302
protected Object yarpExecute(VirtualFrame frame, Object code,
304303
@Cached RubyStringLibrary strings) {
305-
var tstring = strings.getTString(code);
306-
var encoding = strings.getEncoding(code);
307-
308-
return doExecute(tstring, encoding, RubyArguments.getMethod(frame));
304+
return doExecute(code, RubyArguments.getMethod(frame));
309305
}
310306

311307
@TruffleBoundary
312-
private Object doExecute(AbstractTruffleString tstring, RubyEncoding encoding, InternalMethod method) {
313-
String sourceString = TStringUtils.toJavaStringOrThrow(tstring, encoding);
314-
Source source = Source.newBuilder("ruby", sourceString, "<parse_ast>").build();
308+
private Object doExecute(Object code, InternalMethod method) {
315309
TranslatorEnvironment.resetTemporaryVariablesIndex();
316310

317311
final RootCallTarget callTarget = RubyLanguage.getCurrentContext().getCodeLoader().parseWithYARP(
318-
new RubySource(source, source.getName()),
312+
code,
319313
ParserContext.TOP_LEVEL,
320314
null,
321315
RubyLanguage.getCurrentContext().getRootLexicalScope(),
@@ -1465,23 +1459,22 @@ private RubyRootNode parse(String sourceCode) {
14651459
@CoreMethod(names = "parse_with_yarp_and_dump_truffle_ast", onSingleton = true, required = 3, lowerFixnum = 3)
14661460
public abstract static class ParseWithYARPAndDumpTruffleASTNode extends CoreMethodArrayArgumentsNode {
14671461

1468-
@Specialization
14691462
@TruffleBoundary
1470-
protected Object parseAndDump(Object sourceCode, Object focusedNodeClassName, int index,
1463+
@Specialization(guards = "strings.isRubyString(code)", limit = "1")
1464+
protected Object parseAndDump(Object code, Object focusedNodeClassName, int index,
1465+
@Cached RubyStringLibrary strings,
14711466
@Cached TruffleString.FromJavaStringNode fromJavaStringNode) {
1472-
String sourceCodeString = RubyGuards.getJavaString(sourceCode);
14731467
String nodeClassNameString = RubyGuards.getJavaString(focusedNodeClassName);
1474-
RubyRootNode rootNode = parse(sourceCodeString);
1468+
RubyRootNode rootNode = parse(code);
14751469
String output = TruffleASTPrinter.dump(rootNode, nodeClassNameString, index);
14761470
return createString(fromJavaStringNode, output, Encodings.UTF_8);
14771471
}
14781472

1479-
private RubyRootNode parse(String sourceCode) {
1480-
Source source = Source.newBuilder("ruby", sourceCode, "<parse_ast>").build();
1473+
private RubyRootNode parse(Object code) {
14811474
TranslatorEnvironment.resetTemporaryVariablesIndex();
14821475

14831476
final RootCallTarget callTarget = RubyLanguage.getCurrentContext().getCodeLoader().parseWithYARP(
1484-
new RubySource(source, source.getName()),
1477+
code,
14851478
ParserContext.TOP_LEVEL,
14861479
null,
14871480
RubyLanguage.getCurrentContext().getRootLexicalScope(),
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) 2023 Oracle and/or its affiliates. All rights reserved. This
3+
* code is released under a tri EPL/GPL/LGPL license. You can use it,
4+
* redistribute it and/or modify it under the terms of the:
5+
*
6+
* Eclipse Public License version 2.0, or
7+
* GNU General Public License version 2, or
8+
* GNU Lesser General Public License version 2.1.
9+
*/
10+
package org.truffleruby.language.loader;
11+
12+
import com.oracle.truffle.api.strings.TruffleString;
13+
import org.truffleruby.core.encoding.Encodings;
14+
import org.truffleruby.core.encoding.RubyEncoding;
15+
import org.truffleruby.core.string.TStringWithEncoding;
16+
17+
/** YARP gives use byte offsets but Truffle wants a CharSequence, this tries to integrate both as much as possible.
18+
* Using a java.lang.String instead would mean computing char offsets, which is prohibitively expensive. */
19+
public final class ByteBasedCharSequence implements CharSequence {
20+
21+
private final byte[] bytes;
22+
private final int offset;
23+
private final int length;
24+
private final RubyEncoding encoding;
25+
26+
public ByteBasedCharSequence(TStringWithEncoding tstringWithEnc) {
27+
this(tstringWithEnc.getBytesOrCopy(), 0, tstringWithEnc.byteLength(), tstringWithEnc.encoding);
28+
29+
// Ensure it can be converted to a Java String early
30+
if (tstringWithEnc.encoding == Encodings.BINARY) {
31+
tstringWithEnc.toJavaStringOrThrow();
32+
}
33+
}
34+
35+
private ByteBasedCharSequence(byte[] bytes, int offset, int length, RubyEncoding encoding) {
36+
this.bytes = bytes;
37+
this.offset = offset;
38+
this.length = length;
39+
this.encoding = encoding;
40+
}
41+
42+
@Override
43+
public int length() {
44+
return bytes.length;
45+
}
46+
47+
@Override
48+
public char charAt(int index) {
49+
assert index >= offset && index < offset + length;
50+
return (char) bytes[offset + index];
51+
}
52+
53+
@Override
54+
public CharSequence subSequence(int start, int end) {
55+
return new ByteBasedCharSequence(bytes, start, end - start, encoding);
56+
}
57+
58+
@Override
59+
public String toString() {
60+
return TruffleString.fromByteArrayUncached(bytes, offset, length, encoding.tencoding, false)
61+
.toJavaStringUncached();
62+
}
63+
}

src/main/java/org/truffleruby/language/loader/CodeLoader.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
import org.truffleruby.core.string.TStringWithEncoding;
1818
import org.truffleruby.language.LexicalScope;
1919
import org.truffleruby.language.Nil;
20+
import org.truffleruby.language.RubyGuards;
2021
import org.truffleruby.language.RubyNode;
2122
import org.truffleruby.language.RubyRootNode;
2223
import org.truffleruby.annotations.Visibility;
2324
import org.truffleruby.language.arguments.EmptyArgumentsDescriptor;
2425
import org.truffleruby.language.arguments.RubyArguments;
26+
import org.truffleruby.language.library.RubyStringLibrary;
2527
import org.truffleruby.language.methods.DeclarationContext;
2628
import org.truffleruby.language.methods.InternalMethod;
2729
import org.truffleruby.language.methods.SharedMethodInfo;
@@ -88,14 +90,18 @@ public RootCallTarget parse(RubySource source,
8890
}
8991

9092
@TruffleBoundary
91-
public RootCallTarget parseWithYARP(RubySource source,
93+
public RootCallTarget parseWithYARP(Object code,
9294
ParserContext parserContext,
9395
MaterializedFrame parentFrame,
9496
LexicalScope lexicalScope,
9597
Node currentNode) {
96-
final YARPTranslatorDriver translator = new YARPTranslatorDriver(context, source);
97-
return translator
98-
.parse(source, parserContext, null, parentFrame, lexicalScope, currentNode);
98+
var tstringWithEnc = new TStringWithEncoding(RubyGuards.asTruffleStringUncached(code),
99+
RubyStringLibrary.getUncached().getEncoding(code));
100+
var charSequence = new ByteBasedCharSequence(tstringWithEnc);
101+
Source source = Source.newBuilder("ruby", charSequence, "<parse_ast>").build();
102+
var rubySource = new RubySource(source, source.getName(), tstringWithEnc);
103+
final YARPTranslatorDriver translator = new YARPTranslatorDriver(context, rubySource);
104+
return translator.parse(rubySource, parserContext, null, parentFrame, lexicalScope, currentNode);
99105
}
100106

101107
@TruffleBoundary

src/main/java/org/truffleruby/parser/RubySource.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import com.oracle.truffle.api.source.Source;
2121
import org.truffleruby.core.encoding.RubyEncoding;
22+
import org.truffleruby.core.encoding.TStringUtils;
2223
import org.truffleruby.core.string.TStringWithEncoding;
2324

2425
public final class RubySource {
@@ -29,6 +30,7 @@ public final class RubySource {
2930
* Sources in the future (but then we'll need to still use this path in Ruby backtraces). */
3031
private final String sourcePath;
3132
private final TruffleString code;
33+
private byte[] bytes;
3234
private final RubyEncoding encoding;
3335
private final boolean isEval;
3436
private final int lineOffset;
@@ -69,6 +71,7 @@ public boolean hasTruffleString() {
6971
}
7072

7173
public TruffleString getTruffleString() {
74+
assert hasTruffleString();
7275
return code;
7376
}
7477

@@ -77,6 +80,15 @@ public TStringWithEncoding getTStringWithEncoding() {
7780
return new TStringWithEncoding(code, encoding);
7881
}
7982

83+
public byte[] getBytes() {
84+
assert hasTruffleString();
85+
if (bytes != null) {
86+
return bytes;
87+
} else {
88+
return bytes = TStringUtils.getBytesOrCopy(code, encoding);
89+
}
90+
}
91+
8092
public RubyEncoding getEncoding() {
8193
assert hasTruffleString();
8294
return encoding;

src/main/java/org/truffleruby/parser/YARPTranslatorDriver.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ public RootCallTarget parse(RubySource rubySource, ParserContext parserContext,
184184
// Needs the magic comment to be parsed
185185
parseEnvironment.allowTruffleRubyPrimitives = parserConfiguration.allowTruffleRubyPrimitives;
186186

187-
final SourceSection sourceSection = source.createSection(0, source.getCharacters().length());
187+
final SourceSection sourceSection = source.createSection(0, rubySource.getBytes().length);
188188
final SourceIndexLength sourceIndexLength = SourceIndexLength.fromSourceSection(sourceSection);
189189

190190
final String modulePath = staticLexicalScope == null || staticLexicalScope == context.getRootLexicalScope()
@@ -226,7 +226,7 @@ public RootCallTarget parse(RubySource rubySource, ParserContext parserContext,
226226

227227
// Translate to Ruby Truffle nodes
228228

229-
byte[] sourceBytes = rubySource.getSource().getCharacters().toString().getBytes();
229+
byte[] sourceBytes = rubySource.getBytes();
230230
final YARPTranslator translator = new YARPTranslator(
231231
language,
232232
null,
@@ -395,7 +395,7 @@ public static org.yarp.Nodes.Node parseToYARPAST(RubyContext context, RubyLangua
395395
TruffleSafepoint.poll(DummyNode.INSTANCE);
396396

397397
// YARP begin
398-
byte[] sourceBytes = rubySource.getSource().getCharacters().toString().getBytes();
398+
byte[] sourceBytes = rubySource.getBytes();
399399
org.yarp.Parser.loadLibrary(language.getRubyHome() + "/lib/libyarp" + Platform.LIB_SUFFIX);
400400
byte[] serializedBytes = Parser.parseAndSerialize(sourceBytes);
401401

src/main/java/org/truffleruby/parser/lexer/LexerSource.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ public LexerSource(RubySource rubySource) {
7474
rubyEncoding = Encodings.UTF_8;
7575
// TODO CS 5-Sep-17 can we get the bytes directly rather than using getCharacters -> toString -> getBytes?
7676
var sourceString = source.getCharacters().toString();
77-
// this.sourceBytes = TStringUtils.fromByteArray(sourceString.getBytes(StandardCharsets.UTF_8), TruffleString.Encoding.UTF_8);
7877
this.sourceBytes = TStringUtils.utf8TString(sourceString);
7978
}
8079
this.sourceByteLength = sourceBytes.byteLength(rubyEncoding.tencoding);

0 commit comments

Comments
 (0)