Skip to content

Commit a2732af

Browse files
authored
Merge pull request #3305 from tenderlove/percent_r
Decode %r like % strings
2 parents fbad866 + 85bfd9c commit a2732af

File tree

2 files changed

+62
-18
lines changed

2 files changed

+62
-18
lines changed

src/prism.c

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12115,9 +12115,28 @@ parser_lex(pm_parser_t *parser) {
1211512115
pm_regexp_token_buffer_t token_buffer = { 0 };
1211612116

1211712117
while (breakpoint != NULL) {
12118+
uint8_t term = lex_mode->as.regexp.terminator;
12119+
bool is_terminator = (*breakpoint == term);
12120+
12121+
// If the terminator is newline, we need to consider \r\n _also_ a newline
12122+
// For example: `%\nfoo\r\n`
12123+
// The string should be "foo", not "foo\r"
12124+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12125+
if (term == '\n') {
12126+
is_terminator = true;
12127+
}
12128+
12129+
// If the terminator is a CR, but we see a CRLF, we need to
12130+
// treat the CRLF as a newline, meaning this is _not_ the
12131+
// terminator
12132+
if (term == '\r') {
12133+
is_terminator = false;
12134+
}
12135+
}
12136+
1211812137
// If we hit the terminator, we need to determine what kind of
1211912138
// token to return.
12120-
if (*breakpoint == lex_mode->as.regexp.terminator) {
12139+
if (is_terminator) {
1212112140
if (lex_mode->as.regexp.nesting > 0) {
1212212141
parser->current.end = breakpoint + 1;
1212312142
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12347,20 +12366,21 @@ parser_lex(pm_parser_t *parser) {
1234712366
continue;
1234812367
}
1234912368

12350-
bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
12369+
uint8_t term = lex_mode->as.string.terminator;
12370+
bool is_terminator = (*breakpoint == term);
1235112371

1235212372
// If the terminator is newline, we need to consider \r\n _also_ a newline
12353-
// For example: `%\nfoo\r\n`
12354-
// The string should be "foo", not "foo\r"
12373+
// For example: `%r\nfoo\r\n`
12374+
// The string should be /foo/, not /foo\r/
1235512375
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12356-
if (lex_mode->as.string.terminator == '\n') {
12376+
if (term == '\n') {
1235712377
is_terminator = true;
1235812378
}
1235912379

1236012380
// If the terminator is a CR, but we see a CRLF, we need to
1236112381
// treat the CRLF as a newline, meaning this is _not_ the
1236212382
// terminator
12363-
if (lex_mode->as.string.terminator == '\r') {
12383+
if (term == '\r') {
1236412384
is_terminator = false;
1236512385
}
1236612386
}

test/prism/percent_delimiter_string_test.rb

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,56 +3,80 @@
33
require_relative "test_helper"
44

55
module Prism
6-
class PercentDelimiterStringTest < TestCase
6+
module PercentDelimiterTests
77
def test_newline_terminator_with_lf_crlf
8-
str = "%\n123456\r\n"
8+
str = l "\n123456\r\n"
99
assert_parse "123456", str
1010
end
1111

1212
def test_newline_terminator_with_lf_crlf_with_extra_cr
13-
str = "%\n123456\r\r\n"
13+
str = l "\n123456\r\r\n"
1414
assert_parse "123456\r", str
1515
end
1616

1717
def test_newline_terminator_with_crlf_pair
18-
str = "%\r\n123456\r\n"
18+
str = l "\r\n123456\r\n"
1919
assert_parse "123456", str
2020
end
2121

2222
def test_newline_terminator_with_crlf_crlf_with_extra_cr
23-
str = "%\r\n123456\r\r\n"
23+
str = l "\r\n123456\r\r\n"
2424
assert_parse "123456\r", str
2525
end
2626

2727
def test_newline_terminator_with_cr_cr
28-
str = "%\r123456\r;\n"
28+
str = l "\r123456\r;\n"
2929
assert_parse "123456", str
3030
end
3131

3232
def test_newline_terminator_with_crlf_lf
33-
str = "%\r\n123456\n;\n"
33+
str = l "\r\n123456\n;\n"
3434
assert_parse "123456", str
3535
end
3636

3737
def test_cr_crlf
38-
str = "%\r1\r\n \r"
38+
str = l "\r1\r\n \r"
3939
assert_parse "1\n ", str
4040
end
4141

4242
def test_lf_crlf
43-
str = "%\n1\r\n \n"
43+
str = l "\n1\r\n \n"
4444
assert_parse "1", str
4545
end
4646

4747
def test_lf_lf
48-
str = "%\n1\n \n"
48+
str = l "\n1\n \n"
4949
assert_parse "1", str
5050
end
5151

5252
def assert_parse(expected, str)
53+
assert_equal expected, find_node(str).unescaped
54+
end
55+
end
56+
57+
class PercentDelimiterStringTest < TestCase
58+
include PercentDelimiterTests
59+
60+
def find_node(str)
61+
tree = Prism.parse str
62+
tree.value.breadth_first_search { |x| Prism::StringNode === x }
63+
end
64+
65+
def l(str)
66+
"%" + str
67+
end
68+
end
69+
70+
class PercentDelimiterRegexpTest < TestCase
71+
include PercentDelimiterTests
72+
73+
def l(str)
74+
"%r" + str
75+
end
76+
77+
def find_node(str)
5378
tree = Prism.parse str
54-
node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
55-
assert_equal expected, node.unescaped
79+
tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x }
5680
end
5781
end
5882
end

0 commit comments

Comments
 (0)