Skip to content

Commit 914fea9

Browse files
committed
Parse encoding comment on second line only when first line was blank or comment
1 parent 0bb2a8a commit 914fea9

File tree

1 file changed

+16
-11
lines changed

1 file changed

+16
-11
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonFileDetector.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public final class PythonFileDetector implements TruffleFile.FileTypeDetector {
5858

5959
private static final String UTF_8_BOM_IN_LATIN_1 = new String(new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}, StandardCharsets.ISO_8859_1);
6060
private static final Pattern ENCODING_COMMENT = Pattern.compile("^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+).*");
61+
private static final Pattern BLANK_LINE = Pattern.compile("^[ \t\f]*(?:#.*)?");
6162

6263
@Override
6364
public String findMimeType(TruffleFile file) throws IOException {
@@ -106,19 +107,23 @@ private static Charset tryGetCharsetFromLine(String line, boolean hasBOM) {
106107

107108
@TruffleBoundary
108109
public static Charset findEncodingStrict(BufferedReader reader) throws IOException {
109-
Charset charset;
110110
// Read first two lines like CPython
111111
String firstLine = reader.readLine();
112-
boolean hasBOM = false;
113-
if (firstLine != null && firstLine.startsWith(UTF_8_BOM_IN_LATIN_1)) {
114-
hasBOM = true;
115-
firstLine = firstLine.substring(UTF_8_BOM_IN_LATIN_1.length());
116-
}
117-
if ((charset = tryGetCharsetFromLine(firstLine, hasBOM)) != null) {
118-
return charset;
119-
}
120-
if ((charset = tryGetCharsetFromLine(reader.readLine(), hasBOM)) != null) {
121-
return charset;
112+
if (firstLine != null) {
113+
boolean hasBOM = false;
114+
if (firstLine.startsWith(UTF_8_BOM_IN_LATIN_1)) {
115+
hasBOM = true;
116+
firstLine = firstLine.substring(UTF_8_BOM_IN_LATIN_1.length());
117+
}
118+
Charset charset;
119+
if ((charset = tryGetCharsetFromLine(firstLine, hasBOM)) != null) {
120+
return charset;
121+
}
122+
if (BLANK_LINE.matcher(firstLine).matches()) {
123+
if ((charset = tryGetCharsetFromLine(reader.readLine(), hasBOM)) != null) {
124+
return charset;
125+
}
126+
}
122127
}
123128
return StandardCharsets.UTF_8;
124129
}

0 commit comments

Comments
 (0)