Skip to content

Commit 88c71b8

Browse files
authored
Merge pull request #3304 from ruby/eileencodes-partially-fix-3230
Fix percent delimiter strings with crlfs
2 parents 0e0050d + e573cea commit 88c71b8

File tree

2 files changed

+86
-3
lines changed

2 files changed

+86
-3
lines changed

src/prism.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10508,6 +10508,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
1050810508
}
1050910509

1051010510
const uint8_t *end = parser->current.end - 1;
10511+
assert(end >= start);
1051110512
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
1051210513

1051310514
token_buffer->cursor = end;
@@ -10588,9 +10589,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
1058810589
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
1058910590
}
1059010591

10591-
const uint8_t delimiter = *parser->current.end;
10592-
parser->current.end += eol_length;
10592+
uint8_t delimiter = *parser->current.end;
10593+
10594+
// If our delimiter is \r\n, we want to treat it as if it's \n.
10595+
// For example, %\r\nfoo\r\n should be "foo"
10596+
if (eol_length == 2) {
10597+
delimiter = *(parser->current.end + 1);
10598+
}
1059310599

10600+
parser->current.end += eol_length;
1059410601
return delimiter;
1059510602
}
1059610603

@@ -12340,10 +12347,28 @@ parser_lex(pm_parser_t *parser) {
1234012347
continue;
1234112348
}
1234212349

12350+
bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
12351+
12352+
// If the terminator is newline, we need to consider \r\n _also_ a newline
12353+
// For example: `%\nfoo\r\n`
12354+
// The string should be "foo", not "foo\r"
12355+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12356+
if (lex_mode->as.string.terminator == '\n') {
12357+
is_terminator = true;
12358+
}
12359+
12360+
// If the terminator is a CR, but we see a CRLF, we need to
12361+
// treat the CRLF as a newline, meaning this is _not_ the
12362+
// terminator
12363+
if (lex_mode->as.string.terminator == '\r') {
12364+
is_terminator = false;
12365+
}
12366+
}
12367+
1234312368
// Note that we have to check the terminator here first because we could
1234412369
// potentially be parsing a % string that has a # character as the
1234512370
// terminator.
12346-
if (*breakpoint == lex_mode->as.string.terminator) {
12371+
if (is_terminator) {
1234712372
// If this terminator doesn't actually close the string, then we need
1234812373
// to continue on past it.
1234912374
if (lex_mode->as.string.nesting > 0) {
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# frozen_string_literal: true
2+
3+
require_relative "test_helper"
4+
5+
module Prism
6+
class PercentDelimiterStringTest < TestCase
7+
def test_newline_terminator_with_lf_crlf
8+
str = "%\n123456\r\n"
9+
assert_parse "123456", str
10+
end
11+
12+
def test_newline_terminator_with_lf_crlf_with_extra_cr
13+
str = "%\n123456\r\r\n"
14+
assert_parse "123456\r", str
15+
end
16+
17+
def test_newline_terminator_with_crlf_pair
18+
str = "%\r\n123456\r\n"
19+
assert_parse "123456", str
20+
end
21+
22+
def test_newline_terminator_with_crlf_crlf_with_extra_cr
23+
str = "%\r\n123456\r\r\n"
24+
assert_parse "123456\r", str
25+
end
26+
27+
def test_newline_terminator_with_cr_cr
28+
str = "%\r123456\r;\n"
29+
assert_parse "123456", str
30+
end
31+
32+
def test_newline_terminator_with_crlf_lf
33+
str = "%\r\n123456\n;\n"
34+
assert_parse "123456", str
35+
end
36+
37+
def test_cr_crlf
38+
str = "%\r1\r\n \r"
39+
assert_parse "1\n ", str
40+
end
41+
42+
def test_lf_crlf
43+
str = "%\n1\r\n \n"
44+
assert_parse "1", str
45+
end
46+
47+
def test_lf_lf
48+
str = "%\n1\n \n"
49+
assert_parse "1", str
50+
end
51+
52+
def assert_parse(expected, str)
53+
tree = Prism.parse str
54+
node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
55+
assert_equal expected, node.unescaped
56+
end
57+
end
58+
end

0 commit comments

Comments
 (0)