Skip to content

Commit 233d5f6

Browse files
committed
Ensure the RubySource#encoding is always the same as the magic encoding or default encoding
1 parent ef58462 commit 233d5f6

File tree

8 files changed

+73
-64
lines changed

8 files changed

+73
-64
lines changed

src/main/java/org/truffleruby/RubyFileTypeDetector.java

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import java.util.regex.Pattern;
1818

1919
import org.jcodings.Encoding;
20-
import org.truffleruby.core.encoding.EncodingManager;
2120
import org.truffleruby.core.encoding.Encodings;
2221
import org.truffleruby.core.encoding.TStringUtils;
2322
import org.truffleruby.core.string.TStringWithEncoding;
@@ -72,7 +71,7 @@ public Charset findEncoding(TruffleFile file) {
7271
return findEncoding(fileContent).getCharset();
7372
} catch (IOException | SecurityException e) {
7473
// Reading random files could cause all sorts of errors
75-
return Encodings.UTF_8.jcoding.getCharset();
74+
return StandardCharsets.UTF_8;
7675
}
7776
}
7877

@@ -90,17 +89,9 @@ public static Encoding findEncoding(BufferedReader reader) {
9089
if (encodingCommentLine != null) {
9190
var encodingComment = new TStringWithEncoding(
9291
TStringUtils.fromJavaString(encodingCommentLine, Encodings.BINARY), Encodings.BINARY);
93-
Encoding[] encodingHolder = new Encoding[1];
94-
RubyLexer.parseMagicComment(encodingComment, (name, value) -> {
95-
if (RubyLexer.isMagicEncodingComment(name)) {
96-
Encoding encoding = EncodingManager.getEncoding(value);
97-
if (encoding != null) {
98-
encodingHolder[0] = encoding;
99-
}
100-
}
101-
});
102-
if (encodingHolder[0] != null) {
103-
return encodingHolder[0];
92+
var encoding = RubyLexer.parseMagicEncodingComment(encodingComment);
93+
if (encoding != null) {
94+
return encoding.jcoding;
10495
}
10596
}
10697
}

src/main/java/org/truffleruby/language/loader/EvalLoader.java

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
import com.oracle.truffle.api.nodes.Node;
1313
import com.oracle.truffle.api.strings.AbstractTruffleString;
1414
import org.truffleruby.RubyContext;
15-
import org.truffleruby.core.encoding.EncodingManager;
16-
import org.truffleruby.core.encoding.Encodings;
1715
import org.truffleruby.core.encoding.RubyEncoding;
1816
import org.truffleruby.core.string.CannotConvertBinaryRubyStringToJavaString;
1917
import org.truffleruby.core.string.TStringWithEncoding;
@@ -32,7 +30,7 @@ public static RubySource createEvalSource(RubyContext context, AbstractTruffleSt
3230
RubyEncoding encoding, String method, String file, int line, Node currentNode) {
3331
var code = new TStringWithEncoding(codeTString.asTruffleStringUncached(encoding.tencoding), encoding);
3432

35-
var sourceTString = createEvalTString(code);
33+
var sourceTString = RubyLexer.createSourceTStringBasedOnMagicEncodingComment(code, code.encoding);
3634
var sourceEncoding = sourceTString.encoding;
3735

3836
if (!sourceEncoding.isAsciiCompatible) {
@@ -65,20 +63,4 @@ public static RubySource createEvalSource(RubyContext context, AbstractTruffleSt
6563
return rubySource;
6664
}
6765

68-
private static TStringWithEncoding createEvalTString(TStringWithEncoding source) {
69-
final RubyEncoding[] encoding = { source.getEncoding() };
70-
71-
RubyLexer.parseMagicComment(source, (name, value) -> {
72-
if (RubyLexer.isMagicEncodingComment(name)) {
73-
encoding[0] = Encodings.getBuiltInEncoding(EncodingManager.getEncoding(value));
74-
}
75-
});
76-
77-
if (source.getEncoding() != encoding[0]) {
78-
source = source.forceEncoding(encoding[0]);
79-
}
80-
81-
return source;
82-
}
83-
8466
}

src/main/java/org/truffleruby/language/loader/FileLoader.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
import org.truffleruby.RubyContext;
1818
import org.truffleruby.RubyLanguage;
1919
import org.truffleruby.core.encoding.Encodings;
20-
import org.truffleruby.core.encoding.TStringUtils;
2120
import org.truffleruby.core.string.TStringWithEncoding;
2221
import org.truffleruby.language.control.RaiseException;
22+
import org.truffleruby.parser.lexer.RubyLexer;
2323
import org.truffleruby.shared.TruffleRuby;
2424

2525
import com.oracle.truffle.api.TruffleFile;
@@ -69,10 +69,11 @@ public Pair<Source, TStringWithEncoding> loadFile(String path) throws IOExceptio
6969
* and pass them down to the lexer and to the Source. */
7070

7171
final byte[] sourceBytes = file.readAllBytes();
72-
var tstringWithEnc = new TStringWithEncoding(TStringUtils.fromByteArray(sourceBytes, Encodings.UTF_8),
73-
Encodings.UTF_8);
74-
final Source source = buildSource(file, path, tstringWithEnc, isInternal(path), false);
75-
return Pair.create(source, tstringWithEnc);
72+
73+
var sourceTString = RubyLexer.createSourceTStringBasedOnMagicEncodingComment(sourceBytes, Encodings.UTF_8);
74+
75+
final Source source = buildSource(file, path, sourceTString, isInternal(path), false);
76+
return Pair.create(source, sourceTString);
7677
}
7778

7879
public static TruffleFile getSafeTruffleFile(RubyLanguage language, RubyContext context, String path) {

src/main/java/org/truffleruby/language/loader/MainLoader.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
import org.truffleruby.RubyContext;
1616
import org.truffleruby.RubyLanguage;
1717
import org.truffleruby.core.encoding.Encodings;
18-
import org.truffleruby.core.encoding.TStringUtils;
1918
import org.truffleruby.core.string.TStringWithEncoding;
2019
import org.truffleruby.parser.RubySource;
20+
import org.truffleruby.parser.lexer.RubyLexer;
2121
import org.truffleruby.shared.TruffleRuby;
2222

2323
import com.oracle.truffle.api.TruffleFile;
@@ -64,7 +64,7 @@ private TStringWithEncoding transformScript(Node currentNode, String path, byte[
6464
sourceBytes = embeddedScript.transformForExecution(currentNode, sourceBytes, path);
6565
}
6666

67-
return new TStringWithEncoding(TStringUtils.fromByteArray(sourceBytes, Encodings.UTF_8), Encodings.UTF_8);
67+
return RubyLexer.createSourceTStringBasedOnMagicEncodingComment(sourceBytes, Encodings.UTF_8);
6868
}
6969

7070
private byte[] readAllOfStandardIn() throws IOException {

src/main/java/org/truffleruby/parser/RubySource.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.truffleruby.core.encoding.RubyEncoding;
2626
import org.truffleruby.core.encoding.TStringUtils;
2727
import org.truffleruby.core.string.TStringWithEncoding;
28+
import org.truffleruby.parser.lexer.RubyLexer;
2829

2930
public final class RubySource {
3031

@@ -35,7 +36,6 @@ public final class RubySource {
3536
private final String sourcePath;
3637
private final TruffleString code;
3738
private byte[] bytes;
38-
// FIXME: not always the source encoding, e.g. when loading from a file or eval, need to check the magic encoding comment
3939
private final RubyEncoding encoding;
4040
private final boolean isEval;
4141
private final int lineOffset;
@@ -67,13 +67,20 @@ public RubySource(Source source, String sourcePath, TStringWithEncoding code, bo
6767
var encoding = Encodings.getBuiltInEncoding(jcoding);
6868
code = new TStringWithEncoding(TStringUtils.fromJavaString(sourceString, encoding), encoding);
6969
}
70+
assert checkMagicEncoding(code);
7071

7172
this.code = code.tstring;
7273
this.encoding = code.encoding;
7374
this.isEval = isEval;
7475
this.lineOffset = lineOffset;
7576
}
7677

78+
private static boolean checkMagicEncoding(TStringWithEncoding code) {
79+
var magicEncoding = RubyLexer.parseMagicEncodingComment(code);
80+
assert magicEncoding == null || magicEncoding == code.encoding;
81+
return true;
82+
}
83+
7784
public Source getSource() {
7885
return source;
7986
}

src/main/java/org/truffleruby/parser/YARPTranslatorDriver.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,11 @@ public static org.prism.Nodes.Node parseToYARPAST(RubyContext context, RubyLangu
503503
public static RubySource createRubySource(Object code) {
504504
var tstringWithEnc = new TStringWithEncoding(RubyGuards.asTruffleStringUncached(code),
505505
RubyStringLibrary.getUncached().getEncoding(code));
506-
var charSequence = new ByteBasedCharSequence(tstringWithEnc);
506+
var sourceTString = RubyLexer.createSourceTStringBasedOnMagicEncodingComment(tstringWithEnc,
507+
tstringWithEnc.encoding);
508+
var charSequence = new ByteBasedCharSequence(sourceTString);
507509
Source source = Source.newBuilder("ruby", charSequence, "<parse_ast>").build();
508-
return new RubySource(source, source.getName(), tstringWithEnc);
510+
return new RubySource(source, source.getName(), sourceTString);
509511
}
510512

511513
public static Nodes.Source createYARPSource(byte[] sourceBytes, RubySource rubySource) {

src/main/java/org/truffleruby/parser/lexer/LexerSource.java

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@
3838

3939
import com.oracle.truffle.api.strings.TruffleString;
4040
import org.jcodings.Encoding;
41-
import org.truffleruby.core.encoding.Encodings;
4241
import org.truffleruby.core.encoding.RubyEncoding;
43-
import org.truffleruby.core.encoding.TStringUtils;
4442
import org.truffleruby.core.string.TStringConstants;
4543
import org.truffleruby.parser.RubySource;
4644
import org.truffleruby.parser.parser.ParserRopeOperations;
@@ -49,13 +47,13 @@
4947

5048
public final class LexerSource {
5149

52-
public ParserRopeOperations parserRopeOperations;
50+
public final ParserRopeOperations parserRopeOperations;
5351
private final Source source;
5452
private final String sourcePath;
5553

56-
private TruffleString sourceBytes;
54+
private final TruffleString sourceBytes;
5755
private final int sourceByteLength;
58-
private RubyEncoding encoding;
56+
private final RubyEncoding encoding;
5957
private int byteOffset;
6058
private final int lineOffset;
6159

@@ -87,13 +85,6 @@ public RubyEncoding getRubyEncoding() {
8785
return encoding;
8886
}
8987

90-
public void setEncoding(Encoding jcoding) {
91-
var rubyEncoding = Encodings.getBuiltInEncoding(jcoding);
92-
this.sourceBytes = sourceBytes.forceEncodingUncached(this.encoding.tencoding, rubyEncoding.tencoding);
93-
this.encoding = rubyEncoding;
94-
this.parserRopeOperations = new ParserRopeOperations(this.encoding);
95-
}
96-
9788
public int getOffset() {
9889
return byteOffset;
9990
}

src/main/java/org/truffleruby/parser/lexer/RubyLexer.java

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@
5252
import java.util.Collections;
5353
import java.util.HashMap;
5454
import java.util.Map;
55+
import java.util.Objects;
5556
import java.util.function.BiConsumer;
5657

58+
import com.oracle.truffle.api.CompilerDirectives;
5759
import com.oracle.truffle.api.TruffleSafepoint;
5860
import com.oracle.truffle.api.strings.InternalByteArray;
5961
import com.oracle.truffle.api.strings.TruffleString;
@@ -64,6 +66,7 @@
6466
import org.truffleruby.RubyContext;
6567
import org.truffleruby.annotations.SuppressFBWarnings;
6668
import org.truffleruby.collections.ByteArrayBuilder;
69+
import org.truffleruby.collections.Memo;
6770
import org.truffleruby.core.DummyNode;
6871
import org.truffleruby.core.array.ArrayUtils;
6972
import org.truffleruby.core.encoding.EncodingManager;
@@ -91,7 +94,6 @@
9194
import org.truffleruby.parser.ast.ParseNode;
9295
import org.truffleruby.parser.ast.RationalParseNode;
9396
import org.truffleruby.parser.ast.StrParseNode;
94-
import org.truffleruby.parser.parser.ParserRopeOperations;
9597
import org.truffleruby.parser.parser.ParserSupport;
9698
import org.truffleruby.parser.parser.RubyParser;
9799

@@ -823,7 +825,7 @@ private int yylex() {
823825
// affect lexer performance.
824826
if (!tokenSeen) {
825827
if (!parser_magic_comment(new TStringWithEncoding(lexb, encoding), lex_p, lex_pend - lex_p,
826-
src.parserRopeOperations, this)) {
828+
this)) {
827829
if (comment_at_top()) {
828830
set_file_encoding(lex_p, lex_pend);
829831
}
@@ -1115,6 +1117,43 @@ private static int newLineIndex(InternalByteArray bytes, int start) {
11151117
}
11161118
}
11171119

1120+
public static TStringWithEncoding createSourceTStringBasedOnMagicEncodingComment(byte[] bytes,
1121+
RubyEncoding defaultEncoding) {
1122+
// We need a TStringWithEncoding, it could be in any encoding, it's just bytes at this stage.
1123+
// We use the defaultEncoding so then we do not need to scan bytes for CodeRange a second time
1124+
// if there is no magic encoding or if it's the same as defaultEncoding.
1125+
var tstring = new TStringWithEncoding(TStringUtils.fromByteArray(bytes, defaultEncoding), defaultEncoding);
1126+
return createSourceTStringBasedOnMagicEncodingComment(tstring, defaultEncoding);
1127+
}
1128+
1129+
public static TStringWithEncoding createSourceTStringBasedOnMagicEncodingComment(TStringWithEncoding source,
1130+
RubyEncoding defaultEncoding) {
1131+
Objects.requireNonNull(defaultEncoding);
1132+
var encoding = RubyLexer.parseMagicEncodingComment(source);
1133+
if (encoding == null) {
1134+
encoding = defaultEncoding;
1135+
}
1136+
if (source.getEncoding() != encoding) {
1137+
source = source.forceEncoding(encoding);
1138+
}
1139+
return source;
1140+
}
1141+
1142+
public static RubyEncoding parseMagicEncodingComment(TStringWithEncoding source) {
1143+
var encoding = new Memo<RubyEncoding>(null);
1144+
1145+
parseMagicComment(source, (name, value) -> {
1146+
if (RubyLexer.isMagicEncodingComment(name)) {
1147+
Encoding jcoding = EncodingManager.getEncoding(value);
1148+
if (jcoding != null) {
1149+
encoding.set(Encodings.getBuiltInEncoding(jcoding));
1150+
}
1151+
}
1152+
});
1153+
1154+
return encoding.get();
1155+
}
1156+
11181157
/** Peak in source to see if there is a magic comment. This is used by eval() & friends to know the actual encoding
11191158
* of the source code, and be able to convert to a Java String faithfully. */
11201159
public static void parseMagicComment(TStringWithEncoding source, BiConsumer<String, String> magicCommentHandler) {
@@ -1141,9 +1180,7 @@ public static void parseMagicComment(TStringWithEncoding source, BiConsumer<Stri
11411180
}
11421181
int magicLineLength = endOfMagicLine - magicLineStart;
11431182

1144-
RubyEncoding rubyEncoding = source.getEncoding();
11451183
parser_magic_comment(source, magicLineStart, magicLineLength,
1146-
new ParserRopeOperations(rubyEncoding),
11471184
(name, value) -> {
11481185
magicCommentHandler.accept(name, value.toJavaStringUncached());
11491186
return isKnownMagicComment(name);
@@ -1153,7 +1190,7 @@ public static void parseMagicComment(TStringWithEncoding source, BiConsumer<Stri
11531190

11541191
// MRI: parser_magic_comment
11551192
private static boolean parser_magic_comment(TStringWithEncoding magicLine, int magicLineOffset, int magicLineLength,
1156-
ParserRopeOperations parserRopeOperations, MagicCommentHandler magicCommentHandler) {
1193+
MagicCommentHandler magicCommentHandler) {
11571194
boolean emacsStyle = false;
11581195
int i = magicLineOffset;
11591196
int end = magicLineOffset + magicLineLength;
@@ -3301,11 +3338,9 @@ public void setCurrentArg(TruffleString current_arg) {
33013338
}
33023339

33033340
public void setEncoding(Encoding jcoding) {
3304-
src.setEncoding(jcoding);
3305-
var prevEncoding = this.encoding;
3306-
this.encoding = Encodings.getBuiltInEncoding(jcoding);
3307-
this.tencoding = this.encoding.tencoding;
3308-
lexb = lexb.forceEncodingUncached(prevEncoding.tencoding, tencoding);
3341+
if (jcoding != encoding.jcoding) {
3342+
throw CompilerDirectives.shouldNotReachHere("the encoding must already be set correctly in RubySource");
3343+
}
33093344
}
33103345

33113346
protected void set_file_encoding(int str, int send) {

0 commit comments

Comments
 (0)