Skip to content

Commit abd6d2c

Browse files
committed
Also use the get_file_encoding() logic for parseMagicEncodingComment()
1 parent 953df20 commit abd6d2c

File tree

1 file changed

+50
-33
lines changed

1 file changed

+50
-33
lines changed

src/main/java/org/truffleruby/parser/lexer/RubyLexer.java

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
import java.util.HashMap;
5454
import java.util.Map;
5555
import java.util.Objects;
56-
import java.util.function.BiConsumer;
5756

5857
import com.oracle.truffle.api.CompilerDirectives;
5958
import com.oracle.truffle.api.TruffleSafepoint;
@@ -451,7 +450,7 @@ protected void setTokenInfo(String name, TruffleString value) {
451450

452451
protected void setEncoding(TruffleString name) {
453452
final RubyContext context = parserSupport.getConfiguration().getContext();
454-
var nameString = name.toJavaStringUncached();
453+
String nameString = name.toJavaStringUncached();
455454
final Encoding newEncoding = EncodingManager.getEncoding(nameString);
456455

457456
if (newEncoding == null) {
@@ -1139,24 +1138,11 @@ public static TStringWithEncoding createSourceTStringBasedOnMagicEncodingComment
11391138
return source;
11401139
}
11411140

1141+
/** Peak in source to see if there is a magic encoding comment. This is used by eval() & friends to know the actual
1142+
* encoding of the source code, and be able to convert to a Java String faithfully. */
11421143
public static RubyEncoding parseMagicEncodingComment(TStringWithEncoding source) {
11431144
var encoding = new Memo<RubyEncoding>(null);
11441145

1145-
parseMagicComment(source, (name, value) -> {
1146-
if (RubyLexer.isMagicEncodingComment(name)) {
1147-
Encoding jcoding = EncodingManager.getEncoding(value);
1148-
if (jcoding != null) {
1149-
encoding.set(Encodings.getBuiltInEncoding(jcoding));
1150-
}
1151-
}
1152-
});
1153-
1154-
return encoding.get();
1155-
}
1156-
1157-
/** Peak in source to see if there is a magic comment. This is used by eval() & friends to know the actual encoding
1158-
* of the source code, and be able to convert to a Java String faithfully. */
1159-
public static void parseMagicComment(TStringWithEncoding source, BiConsumer<String, String> magicCommentHandler) {
11601146
var bytes = source.getInternalByteArray();
11611147
final int length = bytes.getLength();
11621148
int start = 0;
@@ -1180,12 +1166,32 @@ public static void parseMagicComment(TStringWithEncoding source, BiConsumer<Stri
11801166
}
11811167
int magicLineLength = endOfMagicLine - magicLineStart;
11821168

1183-
parser_magic_comment(source, magicLineStart, magicLineLength,
1169+
TStringWithEncoding magicLine = source.substring(magicLineStart, magicLineLength);
1170+
1171+
parser_magic_comment(magicLine, 0, magicLineLength,
11841172
(name, value) -> {
1185-
magicCommentHandler.accept(name, value.toJavaStringUncached());
1186-
return isKnownMagicComment(name);
1173+
if (RubyLexer.isMagicEncodingComment(name)) {
1174+
Encoding jcoding = EncodingManager.getEncoding(value.toJavaStringUncached());
1175+
if (jcoding != null) {
1176+
encoding.set(Encodings.getBuiltInEncoding(jcoding));
1177+
return true;
1178+
}
1179+
}
1180+
return false;
11871181
});
1182+
1183+
if (encoding.get() == null) {
1184+
TruffleString encodingName = get_file_encoding(magicLine);
1185+
if (encodingName != null) {
1186+
Encoding jcoding = EncodingManager.getEncoding(encodingName.toJavaStringUncached());
1187+
if (jcoding != null) {
1188+
encoding.set(Encodings.getBuiltInEncoding(jcoding));
1189+
}
1190+
}
1191+
}
11881192
}
1193+
1194+
return encoding.get();
11891195
}
11901196

11911197
// MRI: parser_magic_comment
@@ -3339,18 +3345,21 @@ public void setCurrentArg(TruffleString current_arg) {
33393345

33403346
public void setEncoding(Encoding jcoding) {
33413347
if (jcoding != encoding.jcoding) {
3342-
throw CompilerDirectives.shouldNotReachHere("the encoding must already be set correctly in RubySource");
3348+
throw CompilerDirectives
3349+
.shouldNotReachHere("the encoding must already be set correctly in RubySource for " + getFile());
33433350
}
33443351
}
33453352

3346-
protected void set_file_encoding(int str, int send) {
3353+
private static TruffleString get_file_encoding(TStringWithEncoding magicLine) {
3354+
int str = 0;
3355+
int send = magicLine.byteLength();
33473356
boolean sep = false;
33483357
for (;;) {
33493358
if (send - str <= 6) {
3350-
return;
3359+
return null;
33513360
}
33523361

3353-
switch (p(str + 6)) {
3362+
switch (magicLine.get(str + 6)) {
33543363
case 'C':
33553364
case 'c':
33563365
str += 6;
@@ -3382,13 +3391,12 @@ protected void set_file_encoding(int str, int send) {
33823391
break;
33833392
default:
33843393
str += 6;
3385-
if (Character.isSpaceChar(p(str))) {
3394+
if (Character.isSpaceChar(magicLine.get(str))) {
33863395
break;
33873396
}
33883397
continue;
33893398
}
3390-
if (src.parserRopeOperations.makeShared(lexb, str - 6, 6).toJavaStringUncached()
3391-
.equalsIgnoreCase("coding")) {
3399+
if (magicLine.substring(str - 6, 6).toJavaString().equalsIgnoreCase("coding")) {
33923400
break;
33933401
}
33943402
}
@@ -3397,24 +3405,33 @@ protected void set_file_encoding(int str, int send) {
33973405
do {
33983406
str++;
33993407
if (str >= send) {
3400-
return;
3408+
return null;
34013409
}
3402-
} while (Character.isSpaceChar(p(str)));
3410+
} while (Character.isSpaceChar(magicLine.get(str)));
34033411
if (sep) {
34043412
break;
34053413
}
34063414

3407-
if (p(str) != '=' && p(str) != ':') {
3408-
return;
3415+
if (magicLine.get(str) != '=' && magicLine.get(str) != ':') {
3416+
return null;
34093417
}
34103418
sep = true;
34113419
str++;
34123420
}
34133421

34143422
int beg = str;
3415-
while ((p(str) == '-' || p(str) == '_' || Character.isLetterOrDigit(p(str))) && ++str < send) {
3423+
while ((magicLine.get(str) == '-' || magicLine.get(str) == '_' ||
3424+
Character.isLetterOrDigit(magicLine.get(str))) && ++str < send) {
3425+
}
3426+
return magicLine.substring(beg, str - beg).tstring;
3427+
}
3428+
3429+
protected void set_file_encoding(int str, int send) {
3430+
TStringWithEncoding magicLine = new TStringWithEncoding(lexb, encoding).substring(str, send - str);
3431+
TruffleString encoding = get_file_encoding(magicLine);
3432+
if (encoding != null) {
3433+
setEncoding(encoding);
34163434
}
3417-
setEncoding(src.parserRopeOperations.makeShared(lexb, beg, str - beg));
34183435
}
34193436

34203437
public void setHeredocLineIndent(int heredoc_line_indent) {

0 commit comments

Comments
 (0)