@@ -58,6 +58,7 @@ public final class PythonFileDetector implements TruffleFile.FileTypeDetector {
58
58
59
59
private static final String UTF_8_BOM_IN_LATIN_1 = new String (new byte []{(byte ) 0xEF , (byte ) 0xBB , (byte ) 0xBF }, StandardCharsets .ISO_8859_1 );
60
60
private static final Pattern ENCODING_COMMENT = Pattern .compile ("^[ \t \f ]*#.*?coding[:=][ \t ]*([-_.a-zA-Z0-9]+).*" );
61
+ private static final Pattern BLANK_LINE = Pattern .compile ("^[ \t \f ]*(?:#.*)?" );
61
62
62
63
@ Override
63
64
public String findMimeType (TruffleFile file ) throws IOException {
@@ -106,19 +107,23 @@ private static Charset tryGetCharsetFromLine(String line, boolean hasBOM) {
106
107
107
108
@ TruffleBoundary
108
109
public static Charset findEncodingStrict (BufferedReader reader ) throws IOException {
109
- Charset charset ;
110
110
// Read first two lines like CPython
111
111
String firstLine = reader .readLine ();
112
- boolean hasBOM = false ;
113
- if (firstLine != null && firstLine .startsWith (UTF_8_BOM_IN_LATIN_1 )) {
114
- hasBOM = true ;
115
- firstLine = firstLine .substring (UTF_8_BOM_IN_LATIN_1 .length ());
116
- }
117
- if ((charset = tryGetCharsetFromLine (firstLine , hasBOM )) != null ) {
118
- return charset ;
119
- }
120
- if ((charset = tryGetCharsetFromLine (reader .readLine (), hasBOM )) != null ) {
121
- return charset ;
112
+ if (firstLine != null ) {
113
+ boolean hasBOM = false ;
114
+ if (firstLine .startsWith (UTF_8_BOM_IN_LATIN_1 )) {
115
+ hasBOM = true ;
116
+ firstLine = firstLine .substring (UTF_8_BOM_IN_LATIN_1 .length ());
117
+ }
118
+ Charset charset ;
119
+ if ((charset = tryGetCharsetFromLine (firstLine , hasBOM )) != null ) {
120
+ return charset ;
121
+ }
122
+ if (BLANK_LINE .matcher (firstLine ).matches ()) {
123
+ if ((charset = tryGetCharsetFromLine (reader .readLine (), hasBOM )) != null ) {
124
+ return charset ;
125
+ }
126
+ }
122
127
}
123
128
return StandardCharsets .UTF_8 ;
124
129
}
0 commit comments