File tree Expand file tree Collapse file tree 2 files changed +6
-8
lines changed Expand file tree Collapse file tree 2 files changed +6
-8
lines changed Original file line number Diff line number Diff line change 4
4
5
5
Updated Clojure, Common Lisp and Zig parsers.
6
6
7
- File detection is now stricter with Windows-1252 (Latin 1) encoded
8
- text. Windows-1252 was added in 0.63 and some binary files
9
- (e.g. Brotli compressed files) were incorrectly treated as this
10
- encoding.
7
+ Text encoding detection is now stricter, fixing more cases where
8
+ binary files were treated as text.
11
9
12
10
Added the ` --override-binary ` option to force files to be treated as
13
11
binary rather than text.
Original file line number Diff line number Diff line change @@ -223,7 +223,7 @@ pub(crate) fn guess_content(
223
223
let utf8_string = String :: from_utf8_lossy ( bytes) . to_string ( ) ;
224
224
let num_utf8_invalid = utf8_string
225
225
. chars ( )
226
- . take ( 5000 )
226
+ . take ( 50000 )
227
227
. filter ( |c| * c == std:: char:: REPLACEMENT_CHARACTER || * c == '\0' )
228
228
. count ( ) ;
229
229
if num_utf8_invalid <= 2 {
@@ -239,7 +239,7 @@ pub(crate) fn guess_content(
239
239
let utf16_string = String :: from_utf16_lossy ( & u16_values) ;
240
240
let num_utf16_invalid = utf16_string
241
241
. chars ( )
242
- . take ( 5000 )
242
+ . take ( 50000 )
243
243
. filter ( |c| * c == std:: char:: REPLACEMENT_CHARACTER || * c == '\0' )
244
244
. count ( ) ;
245
245
if num_utf16_invalid <= 1 {
@@ -250,13 +250,13 @@ pub(crate) fn guess_content(
250
250
return ProbableFileKind :: Text ( utf16_string) ;
251
251
}
252
252
253
- // If the input bytes are valid Windows-1252 (an extension of
253
+ // If the input bytes are mostly valid Windows-1252 (an extension of
254
254
// ISO-8859-1 aka Latin 1), treat them as such.
255
255
let ( latin1_str, _encoding, saw_malformed) = encoding_rs:: WINDOWS_1252 . decode ( bytes) ;
256
256
if !saw_malformed {
257
257
let num_null = latin1_str
258
258
. chars ( )
259
- . take ( 5000 )
259
+ . take ( 50000 )
260
260
. filter ( |c| * c == std:: char:: REPLACEMENT_CHARACTER || * c == '\0' )
261
261
. count ( ) ;
262
262
if num_null <= 1 {
You can’t perform that action at this time.
0 commit comments