Skip to content

Commit ef58462

Browse files
committed
Use the correct encodings to scan for magic comments in RubyFileTypeDetector
1 parent a984ded commit ef58462

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

src/main/java/org/truffleruby/RubyFileTypeDetector.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,25 @@ public String findMimeType(TruffleFile file) throws IOException {
5353
}
5454
}
5555

56-
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
56+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.ISO_8859_1)) {
5757
final String firstLine = fileContent.readLine();
5858
if (firstLine != null && SHEBANG_REGEXP.matcher(firstLine).matches()) {
5959
return RubyLanguage.getMimeType(false);
6060
}
6161
} catch (IOException | SecurityException e) {
62-
// Reading random files as UTF-8 could cause all sorts of errors
62+
// Reading random files could cause all sorts of errors
6363
}
6464
return null;
6565
}
6666

6767
@Override
6868
public Charset findEncoding(TruffleFile file) {
69-
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.UTF_8)) {
69+
// We use ISO-8859-1 because every byte is valid in that encoding and
70+
// we only care about US-ASCII characters for magic encoding comments.
71+
try (BufferedReader fileContent = file.newBufferedReader(StandardCharsets.ISO_8859_1)) {
7072
return findEncoding(fileContent).getCharset();
7173
} catch (IOException | SecurityException e) {
72-
// Reading random files as UTF-8 could cause all sorts of errors
74+
// Reading random files could cause all sorts of errors
7375
return Encodings.UTF_8.jcoding.getCharset();
7476
}
7577
}
@@ -86,8 +88,8 @@ public static Encoding findEncoding(BufferedReader reader) {
8688
}
8789

8890
if (encodingCommentLine != null) {
89-
var encodingComment = new TStringWithEncoding(TStringUtils.utf8TString(encodingCommentLine),
90-
Encodings.UTF_8);
91+
var encodingComment = new TStringWithEncoding(
92+
TStringUtils.fromJavaString(encodingCommentLine, Encodings.BINARY), Encodings.BINARY);
9193
Encoding[] encodingHolder = new Encoding[1];
9294
RubyLexer.parseMagicComment(encodingComment, (name, value) -> {
9395
if (RubyLexer.isMagicEncodingComment(name)) {

0 commit comments

Comments
 (0)