Skip to content

Commit b0fff7e

Browse files
Fix parser validate UTF-8 on first input byte (#14750)
1 parent 1ac3280 commit b0fff7e

File tree

3 files changed

+17
-5
lines changed

3 files changed

+17
-5
lines changed

spec/compiler/crystal/tools/format_spec.cr

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ describe Crystal::Command::FormatCommand do
5757
format_command.run
5858
format_command.status_code.should eq(1)
5959
stdout.to_s.should be_empty
60-
stderr.to_s.should contain("file 'STDIN' is not a valid Crystal source file: Unexpected byte 0xff at position 1, malformed UTF-8")
60+
stderr.to_s.should contain("file 'STDIN' is not a valid Crystal source file: Unexpected byte 0xfe at position 0, malformed UTF-8")
6161
end
6262

6363
it "formats stdin (bug)" do
@@ -162,7 +162,7 @@ describe Crystal::Command::FormatCommand do
162162
format_command.status_code.should eq(1)
163163
stdout.to_s.should contain("Format #{Path[".", "format.cr"]}")
164164
stderr.to_s.should contain("syntax error in '#{Path[".", "syntax_error.cr"]}:1:3': unexpected token: EOF")
165-
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xff at position 1, malformed UTF-8")
165+
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xfe at position 0, malformed UTF-8")
166166

167167
File.read(File.join(path, "format.cr")).should eq("if true\n 1\nend\n")
168168
end
@@ -226,7 +226,7 @@ describe Crystal::Command::FormatCommand do
226226
stderr.to_s.should_not contain("not_format.cr")
227227
stderr.to_s.should contain("formatting '#{Path[".", "format.cr"]}' produced changes")
228228
stderr.to_s.should contain("syntax error in '#{Path[".", "syntax_error.cr"]}:1:3': unexpected token: EOF")
229-
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xff at position 1, malformed UTF-8")
229+
stderr.to_s.should contain("file '#{Path[".", "invalid_byte_sequence_error.cr"]}' is not a valid Crystal source file: Unexpected byte 0xfe at position 0, malformed UTF-8")
230230
end
231231
end
232232
end

spec/compiler/lexer/lexer_spec.cr

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,15 @@ describe "Lexer" do
657657
assert_syntax_error "'\\u{DFFF}'", "invalid unicode codepoint (surrogate half)"
658658
assert_syntax_error ":+1", "unexpected token"
659659

660+
it "invalid byte sequence" do
661+
expect_raises(InvalidByteSequenceError, "Unexpected byte 0xff at position 0, malformed UTF-8") do
662+
parse "\xFF"
663+
end
664+
expect_raises(InvalidByteSequenceError, "Unexpected byte 0xff at position 1, malformed UTF-8") do
665+
parse " \xFF"
666+
end
667+
end
668+
660669
assert_syntax_error "'\\1'", "invalid char escape sequence"
661670

662671
it_lexes_string %("\\1"), String.new(Bytes[1])

src/compiler/crystal/syntax/lexer.cr

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ module Crystal
5959
def initialize(string, string_pool : StringPool? = nil, warnings : WarningCollection? = nil)
6060
@warnings = warnings || WarningCollection.new
6161
@reader = Char::Reader.new(string)
62+
check_reader_error
6263
@token = Token.new
6364
@temp_token = Token.new
6465
@line_number = 1
@@ -2754,11 +2755,13 @@ module Crystal
27542755
end
27552756

27562757
def next_char_no_column_increment
2757-
char = @reader.next_char
2758+
@reader.next_char.tap { check_reader_error }
2759+
end
2760+
2761+
private def check_reader_error
27582762
if error = @reader.error
27592763
::raise InvalidByteSequenceError.new("Unexpected byte 0x#{error.to_s(16)} at position #{@reader.pos}, malformed UTF-8")
27602764
end
2761-
char
27622765
end
27632766

27642767
def next_char

0 commit comments

Comments
 (0)