Skip to content

Commit a234fd5

Browse files
Earlopainkddnewton
authored andcommitted
[ruby/prism] Fix parser translator ast for regex with line continuation
Turns out, the vast majority of work was already done with handling the same for heredocs I'm confident this should also apply to actual string nodes (there's even a todo for it) but no tests change if I apply it there too, so I can't say for sure if the logic would be correct. The individual test files are a bit too large, maybe something else would break that currently passes. Leaving it for later to look more closely into that. ruby/prism@6bba1c54e1
1 parent d1a7001 commit a234fd5

File tree

3 files changed

+64
-56
lines changed

3 files changed

+64
-56
lines changed

lib/prism/translation/parser/compiler.rb

Lines changed: 56 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,13 +1511,9 @@ def visit_redo_node(node)
15111511
# /foo/
15121512
# ^^^^^
15131513
def visit_regular_expression_node(node)
1514-
content = node.content
15151514
parts =
1516-
if content.include?("\n")
1517-
offset = node.content_loc.start_offset
1518-
content.lines.map do |line|
1519-
builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
1520-
end
1515+
if node.content.include?("\n")
1516+
string_nodes_from_line_continuations(node, node.content_loc.start_offset, node.opening)
15211517
else
15221518
[builder.string_internal(token(node.content_loc))]
15231519
end
@@ -2074,55 +2070,7 @@ def visit_heredoc(node)
20742070
node.parts.each do |part|
20752071
pushing =
20762072
if part.is_a?(StringNode) && part.unescaped.include?("\n")
2077-
unescaped = part.unescaped.lines
2078-
escaped = part.content.lines
2079-
2080-
escaped_lengths = []
2081-
normalized_lengths = []
2082-
# Keeps track of where an unescaped line should start a new token. An unescaped
2083-
# \n would otherwise be indistinguishable from the actual newline at the end of
2084-
# of the line. The parser gem only emits a new string node at "real" newlines,
2085-
# line continuations don't start a new node as well.
2086-
do_next_tokens = []
2087-
2088-
if node.opening.end_with?("'")
2089-
escaped.each do |line|
2090-
escaped_lengths << line.bytesize
2091-
normalized_lengths << chomped_bytesize(line)
2092-
do_next_tokens << true
2093-
end
2094-
else
2095-
escaped
2096-
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
2097-
.each do |lines|
2098-
escaped_lengths << lines.sum(&:bytesize)
2099-
normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
2100-
unescaped_lines_count = lines.sum do |line|
2101-
line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
2102-
end
2103-
do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
2104-
do_next_tokens[-1] = true
2105-
end
2106-
end
2107-
2108-
start_offset = part.location.start_offset
2109-
current_line = +""
2110-
current_normalized_length = 0
2111-
2112-
unescaped.filter_map.with_index do |unescaped_line, index|
2113-
current_line << unescaped_line
2114-
current_normalized_length += normalized_lengths.fetch(index, 0)
2115-
2116-
if do_next_tokens[index]
2117-
inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
2118-
start_offset += escaped_lengths.fetch(index, 0)
2119-
current_line = +""
2120-
current_normalized_length = 0
2121-
inner_part
2122-
else
2123-
nil
2124-
end
2125-
end
2073+
string_nodes_from_line_continuations(part, part.location.start_offset, node.opening)
21262074
else
21272075
[visit(part)]
21282076
end
@@ -2172,6 +2120,59 @@ def within_pattern
21722120
parser.pattern_variables.pop
21732121
end
21742122
end
2123+
2124+
# Create parser string nodes from a single prism node. The parser gem
2125+
# "glues" strings together when a line continuation is encountered.
2126+
def string_nodes_from_line_continuations(node, start_offset, opening)
2127+
unescaped = node.unescaped.lines
2128+
escaped = node.content.lines
2129+
2130+
escaped_lengths = []
2131+
normalized_lengths = []
2132+
# Keeps track of where an unescaped line should start a new token. An unescaped
2133+
# \n would otherwise be indistinguishable from the actual newline at the end of
2134+
# of the line. The parser gem only emits a new string node at "real" newlines,
2135+
# line continuations don't start a new node as well.
2136+
do_next_tokens = []
2137+
2138+
if opening.end_with?("'")
2139+
escaped.each do |line|
2140+
escaped_lengths << line.bytesize
2141+
normalized_lengths << chomped_bytesize(line)
2142+
do_next_tokens << true
2143+
end
2144+
else
2145+
escaped
2146+
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
2147+
.each do |lines|
2148+
escaped_lengths << lines.sum(&:bytesize)
2149+
normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
2150+
unescaped_lines_count = lines.sum do |line|
2151+
line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
2152+
end
2153+
do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
2154+
do_next_tokens[-1] = true
2155+
end
2156+
end
2157+
2158+
current_line = +""
2159+
current_normalized_length = 0
2160+
2161+
unescaped.filter_map.with_index do |unescaped_line, index|
2162+
current_line << unescaped_line
2163+
current_normalized_length += normalized_lengths.fetch(index, 0)
2164+
2165+
if do_next_tokens[index]
2166+
inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
2167+
start_offset += escaped_lengths.fetch(index, 0)
2168+
current_line = +""
2169+
current_normalized_length = 0
2170+
inner_part
2171+
else
2172+
nil
2173+
end
2174+
end
2175+
end
21752176
end
21762177
end
21772178
end

test/prism/fixtures/regex.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,11 @@ tap { /(?<a>)/ =~ to_s }
4646
def foo(nil:) = /(?<nil>)/ =~ ""
4747

4848
/(?-x:#)/x
49+
50+
/a
51+
b\
52+
c\
53+
d\\\
54+
e\\
55+
f\
56+
/

test/prism/ruby/parser_test.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ class ParserTest < TestCase
6262
# These files are either failing to parse or failing to translate, so we'll
6363
# skip them for now.
6464
skip_all = skip_incorrect | [
65-
"regex.txt",
6665
"unescaping.txt",
6766
"seattlerb/bug190.txt",
6867
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",

0 commit comments

Comments
 (0)