Skip to content

Commit a454ad8

Browse files
committed
Always have a TruffleString in RubySource
* This simplifies callers significantly and avoids duplicating that logic. * It is also more efficient as it creates the TruffleString immediately in the right encoding.
1 parent 670fdb4 commit a454ad8

File tree

3 files changed

+46
-21
lines changed

3 files changed

+46
-21
lines changed

spec/truffle/interop/polyglot/polyglot_spec.rb

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,8 @@
4242
Polyglot.eval("ruby", "# encoding: us-ascii\n__ENCODING__.name").should == "US-ASCII"
4343
end
4444

45-
it "will not allow code in Ruby to have a magic comment to change the encoding to something not a subset of UTF-8" do
46-
-> {
47-
Polyglot.eval("ruby", "# encoding: big5\n__ENCODING__.name")
48-
}.should raise_error(ArgumentError, /big5 cannot be used as an encoding for a Polyglot API Source/)
45+
it "will allow code in Ruby to have a magic comment to change the encoding to something not a subset of UTF-8" do
46+
Polyglot.eval("ruby", "# encoding: big5\n__ENCODING__.name").should == "Big5"
4947
end
5048
end
5149

@@ -87,10 +85,8 @@
8785
Polyglot.eval_file("ruby", fixture(__FILE__, "usascii_magic.rb")).should == "US-ASCII"
8886
end
8987

90-
it "will not allow code in Ruby to have a magic comment to change the encoding" do
91-
-> {
92-
Polyglot.eval_file("ruby", fixture(__FILE__, "big5_magic.rb"))
93-
}.should raise_error(ArgumentError, /big5 cannot be used as an encoding for a Polyglot API Source/)
88+
it "will allow code in Ruby to have a magic comment to change the encoding" do
89+
Polyglot.eval_file("ruby", fixture(__FILE__, "big5_magic.rb")).should == "Big5"
9490
end
9591
end
9692

src/main/java/org/truffleruby/RubyFileTypeDetector.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,34 +65,48 @@ public String findMimeType(TruffleFile file) throws IOException {
6565
}
6666

6767
@Override
68-
public Charset findEncoding(TruffleFile file) throws IOException {
68+
public Charset findEncoding(TruffleFile file) {
6969
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
70-
final String firstLine = fileContent.readLine();
70+
return findEncoding(fileContent).getCharset();
71+
} catch (IOException | SecurityException e) {
72+
// Reading random files as UTF-8 could cause all sorts of errors
73+
return Encodings.UTF_8.jcoding.getCharset();
74+
}
75+
}
76+
77+
public static Encoding findEncoding(BufferedReader reader) {
78+
try {
79+
final String firstLine = reader.readLine();
7180
if (firstLine != null) {
72-
String encodingCommentLine;
81+
final String encodingCommentLine;
7382
if (SHEBANG_REGEXP.matcher(firstLine).matches()) {
74-
encodingCommentLine = fileContent.readLine();
83+
encodingCommentLine = reader.readLine();
7584
} else {
7685
encodingCommentLine = firstLine;
7786
}
87+
7888
if (encodingCommentLine != null) {
7989
var encodingComment = new TStringWithEncoding(TStringUtils.utf8TString(encodingCommentLine),
8090
Encodings.UTF_8);
81-
Charset[] encodingHolder = new Charset[1];
91+
Encoding[] encodingHolder = new Encoding[1];
8292
RubyLexer.parseMagicComment(encodingComment, (name, value) -> {
8393
if (RubyLexer.isMagicEncodingComment(name)) {
8494
Encoding encoding = EncodingManager.getEncoding(value);
8595
if (encoding != null) {
86-
encodingHolder[0] = encoding.getCharset();
96+
encodingHolder[0] = encoding;
8797
}
8898
}
8999
});
90-
return encodingHolder[0];
100+
if (encodingHolder[0] != null) {
101+
return encodingHolder[0];
102+
}
91103
}
92104
}
93-
} catch (IOException | SecurityException e) {
94-
// Reading random files as UTF-8 could cause all sorts of errors
105+
} catch (IOException e) {
106+
// Use the default encoding if reading failed somehow
95107
}
96-
return null;
108+
109+
// The default encoding
110+
return Encodings.UTF_8.jcoding;
97111
}
98112
}

src/main/java/org/truffleruby/parser/RubySource.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@
99
*/
1010
package org.truffleruby.parser;
1111

12+
import java.io.BufferedReader;
13+
import java.io.StringReader;
1214
import java.util.Objects;
1315

1416
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
1517
import com.oracle.truffle.api.source.SourceSection;
1618
import com.oracle.truffle.api.strings.TruffleString;
1719
import org.truffleruby.RubyContext;
20+
import org.truffleruby.RubyFileTypeDetector;
1821
import org.truffleruby.RubyLanguage;
1922

2023
import com.oracle.truffle.api.source.Source;
24+
import org.truffleruby.core.encoding.Encodings;
2125
import org.truffleruby.core.encoding.RubyEncoding;
2226
import org.truffleruby.core.encoding.TStringUtils;
2327
import org.truffleruby.core.string.TStringWithEncoding;
@@ -43,7 +47,7 @@ public RubySource(Source source, String sourcePath, TStringWithEncoding code) {
4347
this(source, sourcePath, code, false);
4448
}
4549

46-
public RubySource(Source source, String sourcePath, TStringWithEncoding code, boolean isEval) {
50+
private RubySource(Source source, String sourcePath, TStringWithEncoding code, boolean isEval) {
4751
this(source, sourcePath, code, isEval, 0);
4852
}
4953

@@ -52,8 +56,19 @@ public RubySource(Source source, String sourcePath, TStringWithEncoding code, bo
5256
this.source = Objects.requireNonNull(source);
5357
//intern() to improve footprint
5458
this.sourcePath = Objects.requireNonNull(sourcePath).intern();
55-
this.code = code != null ? code.tstring : null;
56-
this.encoding = code != null ? code.encoding : null;
59+
60+
if (code == null) {
61+
// We only have the Source, which only contains a java.lang.String.
62+
// The sourcePath might not exist, so we cannot reread from the filesystem.
63+
// So we look for the magic encoding comment and if not found use UTF-8.
64+
var sourceString = source.getCharacters().toString();
65+
var jcoding = RubyFileTypeDetector.findEncoding(new BufferedReader(new StringReader(sourceString)));
66+
var encoding = Encodings.getBuiltInEncoding(jcoding);
67+
code = new TStringWithEncoding(TStringUtils.fromJavaString(sourceString, encoding), encoding);
68+
}
69+
70+
this.code = code.tstring;
71+
this.encoding = code.encoding;
5772
this.isEval = isEval;
5873
this.lineOffset = lineOffset;
5974
}

0 commit comments

Comments
 (0)